Statistics from a Large Sample

medium math statistics frequency array

Problem

You are given a large sample of integers in the range [0, 255], represented by an array count of length 256 where count[k] is the number of times k appears. Return [minimum, maximum, mean, median, mode] as floating-point numbers. The median is the middle value when sorted (the average of the two middle values when the total count is even), and the mode is the value with the highest frequency.

Inputcount = [0, 1, 3, 4] (rest are 0)
Output[1.0, 3.0, 2.375, 2.5, 3.0]
Sample is [1, 2, 2, 2, 3, 3, 3, 3] (8 values). min=1, max=3, mean=19/8=2.375, median averages the 4th and 5th values (2 and 3) = 2.5, mode=3.

def sample_stats(count):
    total = sum(count)
    minimum = maximum = mode = -1
    mode_freq = 0
    sum_vals = 0
    for v in range(256):
        if count[v] == 0:
            continue
        if minimum < 0:
            minimum = v
        maximum = v
        sum_vals += v * count[v]
        if count[v] > mode_freq:
            mode_freq = count[v]
            mode = v
    mean = sum_vals / total

    def kth(k):                       # 1-indexed value at sorted position k
        seen = 0
        for v in range(256):
            seen += count[v]
            if seen >= k:
                return v
        return -1

    if total % 2 == 1:
        median = float(kth(total // 2 + 1))
    else:
        median = (kth(total // 2) + kth(total // 2 + 1)) / 2
    return [float(minimum), float(maximum), mean, median, float(mode)]
function sampleStats(count) {
  let total = count.reduce((a, b) => a + b, 0);
  let minimum = -1, maximum = -1, mode = -1, modeFreq = 0, sumVals = 0;
  for (let v = 0; v < 256; v++) {
    if (count[v] === 0) continue;
    if (minimum < 0) minimum = v;
    maximum = v;
    sumVals += v * count[v];
    if (count[v] > modeFreq) { modeFreq = count[v]; mode = v; }
  }
  const mean = sumVals / total;
  const kth = (k) => {
    let seen = 0;
    for (let v = 0; v < 256; v++) {
      seen += count[v];
      if (seen >= k) return v;
    }
    return -1;
  };
  let median;
  if (total % 2 === 1) median = kth(Math.floor(total / 2) + 1);
  else median = (kth(total / 2) + kth(total / 2 + 1)) / 2;
  return [minimum, maximum, mean, median, mode];
}
class Solution {
    public double[] sampleStats(int[] count) {
        long total = 0, sumVals = 0;
        int minimum = -1, maximum = -1, mode = -1, modeFreq = 0;
        for (int v = 0; v < 256; v++) {
            if (count[v] == 0) continue;
            total += count[v];
            if (minimum < 0) minimum = v;
            maximum = v;
            sumVals += (long) v * count[v];
            if (count[v] > modeFreq) { modeFreq = count[v]; mode = v; }
        }
        double mean = (double) sumVals / total;
        long midLo = total / 2, midHi = total / 2 + 1;
        double median;
        if (total % 2 == 1) median = kth(count, midHi);
        else median = (kth(count, midLo) + kth(count, midHi)) / 2.0;
        return new double[]{ minimum, maximum, mean, median, mode };
    }
    private int kth(int[] count, long k) {
        long seen = 0;
        for (int v = 0; v < 256; v++) {
            seen += count[v];
            if (seen >= k) return v;
        }
        return -1;
    }
}
vector<double> sampleStats(vector<int>& count) {
    long long total = 0, sumVals = 0;
    int minimum = -1, maximum = -1, mode = -1, modeFreq = 0;
    for (int v = 0; v < 256; v++) {
        if (count[v] == 0) continue;
        total += count[v];
        if (minimum < 0) minimum = v;
        maximum = v;
        sumVals += (long long) v * count[v];
        if (count[v] > modeFreq) { modeFreq = count[v]; mode = v; }
    }
    double mean = (double) sumVals / total;
    auto kth = [&](long long k) {
        long long seen = 0;
        for (int v = 0; v < 256; v++) {
            seen += count[v];
            if (seen >= k) return v;
        }
        return -1;
    };
    double median = (total % 2 == 1)
        ? kth(total / 2 + 1)
        : (kth(total / 2) + kth(total / 2 + 1)) / 2.0;
    return { (double) minimum, (double) maximum, mean, median, (double) mode };
}
Time: O(256) Space: O(1)