Find Median from Data Stream

hard heap median

Problem

The median is the middle value in an ordered integer list. If the size of the list is even, there is no middle value and the median is the mean of the two middle values. For example, for arr = [2,3,4], the median is 3.

Inputadd(1), add(2), median()→1.5, add(3), median()→2
Keep two heaps: a max-heap L for the smaller half and a min-heap R for the larger half. Maintain |L| − |R| ∈ {0, 1}. The median is L.top() (odd) or (L.top() + R.top())/2 (even).

import heapq

class MedianFinder:
    def __init__(self):
        self.lo = []  # max-heap (negated)
        self.hi = []  # min-heap

    def add(self, x):
        heapq.heappush(self.lo, -heapq.heappushpop(self.hi, x))
        if len(self.lo) > len(self.hi) + 1:
            heapq.heappush(self.hi, -heapq.heappop(self.lo))

    def median(self):
        if len(self.lo) > len(self.hi):
            return -self.lo[0]
        return (-self.lo[0] + self.hi[0]) / 2.0
// MaxHeap and MinHeap are simple binary heap classes (omitted).
class MedianFinder {
  constructor() { this.lo = new MaxHeap(); this.hi = new MinHeap(); }
  add(x) {
    if (this.lo.size() === 0 || x <= this.lo.top()) this.lo.push(x);
    else this.hi.push(x);
    if (this.lo.size() > this.hi.size() + 1) this.hi.push(this.lo.pop());
    else if (this.hi.size() > this.lo.size()) this.lo.push(this.hi.pop());
  }
  median() {
    if (this.lo.size() > this.hi.size()) return this.lo.top();
    return (this.lo.top() + this.hi.top()) / 2;
  }
}
class MedianFinder {
    PriorityQueue<Integer> lo = new PriorityQueue<>((a, b) -> b - a);
    PriorityQueue<Integer> hi = new PriorityQueue<>();
    public void add(int x) {
        if (lo.isEmpty() || x <= lo.peek()) lo.add(x); else hi.add(x);
        if (lo.size() > hi.size() + 1) hi.add(lo.poll());
        else if (hi.size() > lo.size()) lo.add(hi.poll());
    }
    public double median() {
        if (lo.size() > hi.size()) return lo.peek();
        return (lo.peek() + hi.peek()) / 2.0;
    }
}
class MedianFinder {
    priority_queue<int> lo;                              // max-heap
    priority_queue<int, vector<int>, greater<>> hi;     // min-heap
public:
    void add(int x) {
        if (lo.empty() || x <= lo.top()) lo.push(x); else hi.push(x);
        if ((int) lo.size() > (int) hi.size() + 1) { hi.push(lo.top()); lo.pop(); }
        else if ((int) hi.size() > (int) lo.size()) { lo.push(hi.top()); hi.pop(); }
    }
    double median() {
        if (lo.size() > hi.size()) return lo.top();
        return (lo.top() + hi.top()) / 2.0;
    }
};
Time: O(log n) per add, O(1) per median Space: O(n)