0
0
DSA Goprogramming

Median of Data Stream Using Two Heaps in DSA Go

Choose your learning style9 modes available
Mental Model
We keep two groups of numbers: smaller half and larger half. The middle value is easy to find by looking at the tops of these groups.
Analogy: Imagine two boxes: one holds all smaller toys, the other holds bigger toys. The median toy is the one right between these two boxes.
MaxHeap (smaller half)      MinHeap (larger half)
      [5, 3, 1]                  [6, 8, 9]
       ↑ top is 5               ↑ top is 6

Median is either top of MaxHeap or average of tops.
Dry Run Walkthrough
Input: Stream: 5, 3, 8, 9, 1, 6
Goal: Find median after each new number is added
Step 1: Add 5 to MaxHeap (smaller half)
MaxHeap: [5] ↑top=5  MinHeap: [] null
Why: Start by adding first number to smaller half
Step 2: Add 3 to MaxHeap, then move top to MinHeap to balance
MaxHeap: [3] ↑top=3  MinHeap: [5] ↑top=5
Why: Keep heaps balanced so sizes differ by at most 1
Step 3: Add 8 to MinHeap, then move top to MaxHeap to balance
MaxHeap: [5, 3] ↑top=5  MinHeap: [8] ↑top=8
Why: Balance heaps after adding to larger half
Step 4: Add 9 to MinHeap
MaxHeap: [5, 3] ↑top=5  MinHeap: [8, 9] ↑top=8
Why: Add to larger half since 9 > 5, now sizes equal (2-2)
Step 5: Add 1 to MaxHeap
MaxHeap: [5, 3, 1] ↑top=5  MinHeap: [8, 9] ↑top=8
Why: Add to smaller half since 1 <= 5, sizes now 3-2 (differ by 1)
Step 6: Add 6 to MinHeap
MaxHeap: [5, 3, 1] ↑top=5  MinHeap: [6, 8, 9] ↑top=6
Why: Add to larger half since 6 > 5, sizes now equal (3-3), no rebalance needed
Result:
MaxHeap: [5, 3, 1] ↑top=5  MinHeap: [6, 8, 9] ↑top=6
Median after each insertion: 5, 4, 5, 6.5, 5, 5.5
Annotated Code
DSA Go
package main

import (
	"container/heap"
	"fmt"
)

// IntHeap is a min-heap of ints.
type IntHeap []int

func (h IntHeap) Len() int           { return len(h) }
func (h IntHeap) Less(i, j int) bool { return h[i] < h[j] }
func (h IntHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }

func (h *IntHeap) Push(x interface{}) {
	*h = append(*h, x.(int))
}

func (h *IntHeap) Pop() interface{} {
	n := len(*h)
	x := (*h)[n-1]
	*h = (*h)[:n-1]
	return x
}

// MaxHeap is a max-heap of ints by inverting Less
type MaxHeap []int

func (h MaxHeap) Len() int           { return len(h) }
func (h MaxHeap) Less(i, j int) bool { return h[i] > h[j] }
func (h MaxHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }

func (h *MaxHeap) Push(x interface{}) {
	*h = append(*h, x.(int))
}

func (h *MaxHeap) Pop() interface{} {
	n := len(*h)
	x := (*h)[n-1]
	*h = (*h)[:n-1]
	return x
}

// MedianFinder holds two heaps
// maxHeap for smaller half, minHeap for larger half
// sizes differ by at most 1

type MedianFinder struct {
	maxHeap *MaxHeap
	minHeap *IntHeap
}

func Constructor() MedianFinder {
	maxH := &MaxHeap{}
	minH := &IntHeap{}
	heap.Init(maxH)
	heap.Init(minH)
	return MedianFinder{maxHeap: maxH, minHeap: minH}
}

func (mf *MedianFinder) AddNum(num int) {
	if mf.maxHeap.Len() == 0 || num <= (*mf.maxHeap)[0] {
		heap.Push(mf.maxHeap, num) // add to smaller half
	} else {
		heap.Push(mf.minHeap, num) // add to larger half
	}

	// Balance sizes
	if mf.maxHeap.Len() > mf.minHeap.Len()+1 {
		val := heap.Pop(mf.maxHeap).(int) // move top from maxHeap to minHeap
		heap.Push(mf.minHeap, val)
	} else if mf.minHeap.Len() > mf.maxHeap.Len() {
		val := heap.Pop(mf.minHeap).(int) // move top from minHeap to maxHeap
		heap.Push(mf.maxHeap, val)
	}
}

func (mf *MedianFinder) FindMedian() float64 {
	if mf.maxHeap.Len() == 0 {
		return 0.0
	}
	if mf.maxHeap.Len() > mf.minHeap.Len() {
		return float64((*mf.maxHeap)[0])
	} else {
		return (float64((*mf.maxHeap)[0]) + float64((*mf.minHeap)[0])) / 2.0
	}
}

func main() {
	mf := Constructor()
	nums := []int{5, 3, 8, 9, 1, 6}
	for _, num := range nums {
		mf.AddNum(num)
		median := mf.FindMedian()
		fmt.Printf("Added %d, current median: %.1f\n", num, median)
	}
}
if mf.maxHeap.Len() == 0 || num <= (*mf.maxHeap)[0] {
decide which heap to add new number to based on comparison with maxHeap top
heap.Push(mf.maxHeap, num)
add number to smaller half maxHeap
heap.Push(mf.minHeap, num)
add number to larger half minHeap
if mf.maxHeap.Len() > mf.minHeap.Len()+1 {
check if smaller half is too big and rebalance
val := heap.Pop(mf.maxHeap).(int)
remove top from smaller half to move to larger half
heap.Push(mf.minHeap, val)
add removed top to larger half
else if mf.minHeap.Len() > mf.maxHeap.Len() {
check if larger half is too big and rebalance
val := heap.Pop(mf.minHeap).(int)
remove top from larger half to move to smaller half
heap.Push(mf.maxHeap, val)
add removed top to smaller half
if mf.maxHeap.Len() > mf.minHeap.Len() {
if smaller half bigger, median is top of maxHeap
return float64((*mf.maxHeap)[0])
return median as top of smaller half
return (float64((*mf.maxHeap)[0]) + float64((*mf.minHeap)[0])) / 2.0
if equal size, median is average of tops
OutputSuccess
Added 5, current median: 5.0 Added 3, current median: 4.0 Added 8, current median: 5.0 Added 9, current median: 6.5 Added 1, current median: 5.0 Added 6, current median: 5.5
Complexity Analysis
Time: O(log n) per insertion because heap push/pop operations take logarithmic time
Space: O(n) because all numbers are stored in the two heaps
vs Alternative: Compared to sorting all numbers each time (O(n log n)), using two heaps is faster for streaming data
Edge Cases
empty stream
FindMedian returns 0 if called before any number is added
DSA Go
if mf.maxHeap.Len() == 0 { return 0.0 }
all numbers equal
Heaps balance normally, median is that number
DSA Go
if mf.maxHeap.Len() > mf.minHeap.Len()+1 {
stream with one number
Median is that single number
DSA Go
if mf.maxHeap.Len() > mf.minHeap.Len() {
When to Use This Pattern
When you need to find the median of a growing list efficiently, use two heaps to keep track of smaller and larger halves for quick median retrieval.
Common Mistakes
Mistake: Not balancing the heaps after each insertion, causing incorrect median calculation
Fix: Always rebalance heaps so their sizes differ by at most one after each insertion
Mistake: Adding all numbers to one heap without comparison
Fix: Compare new number with maxHeap top to decide which heap to add to
Summary
Keeps track of median in a data stream using two heaps for smaller and larger halves.
Use when you want to find median quickly after each new number in a stream.
The median is always at the top of one or both heaps because they split the data into two balanced halves.