Running Median From a Data Stream

hard heap median

Problem

Design a data structure that supports two operations: add(x) appends a number to the stream, and median() returns the median of all numbers seen so far. Both must be efficient as the stream grows.

Inputadd(1), add(2), median()→1.5, add(3), median()→2
Keep two heaps: a max-heap L for the smaller half and a min-heap R for the larger half. Maintain |L| − |R| ∈ {0, 1}. The median is L.top() (odd) or (L.top() + R.top())/2 (even).

import heapq

class MedianFinder:
    def __init__(self):
        self.lo = []  # max-heap (negated)
        self.hi = []  # min-heap

    def add(self, x):
        heapq.heappush(self.lo, -heapq.heappushpop(self.hi, x))
        if len(self.lo) > len(self.hi) + 1:
            heapq.heappush(self.hi, -heapq.heappop(self.lo))

    def median(self):
        if len(self.lo) > len(self.hi):
            return -self.lo[0]
        return (-self.lo[0] + self.hi[0]) / 2.0
// MaxHeap and MinHeap are simple binary heap classes (omitted).
class MedianFinder {
  constructor() { this.lo = new MaxHeap(); this.hi = new MinHeap(); }
  add(x) {
    if (this.lo.size() === 0 || x <= this.lo.top()) this.lo.push(x);
    else this.hi.push(x);
    if (this.lo.size() > this.hi.size() + 1) this.hi.push(this.lo.pop());
    else if (this.hi.size() > this.lo.size()) this.lo.push(this.hi.pop());
  }
  median() {
    if (this.lo.size() > this.hi.size()) return this.lo.top();
    return (this.lo.top() + this.hi.top()) / 2;
  }
}
class MedianFinder {
    PriorityQueue<Integer> lo = new PriorityQueue<>((a, b) -> b - a);
    PriorityQueue<Integer> hi = new PriorityQueue<>();
    public void add(int x) {
        if (lo.isEmpty() || x <= lo.peek()) lo.add(x); else hi.add(x);
        if (lo.size() > hi.size() + 1) hi.add(lo.poll());
        else if (hi.size() > lo.size()) lo.add(hi.poll());
    }
    public double median() {
        if (lo.size() > hi.size()) return lo.peek();
        return (lo.peek() + hi.peek()) / 2.0;
    }
}
class MedianFinder {
    priority_queue<int> lo;                              // max-heap
    priority_queue<int, vector<int>, greater<>> hi;     // min-heap
public:
    void add(int x) {
        if (lo.empty() || x <= lo.top()) lo.push(x); else hi.push(x);
        if ((int) lo.size() > (int) hi.size() + 1) { hi.push(lo.top()); lo.pop(); }
        else if ((int) hi.size() > (int) lo.size()) { lo.push(hi.top()); hi.pop(); }
    }
    double median() {
        if (lo.size() > hi.size()) return lo.top();
        return (lo.top() + hi.top()) / 2.0;
    }
};
Time: O(log n) per add, O(1) per median Space: O(n)