0
0
DSA Cprogramming

Huffman Encoding in DSA C

Choose your learning style9 modes available
Mental Model
Huffman encoding builds a tree to assign shorter codes to frequent letters and longer codes to rare letters, saving space.
Analogy: Imagine packing a suitcase: you put small, frequent items in easy-to-reach spots and bigger, rare items in harder spots to use space efficiently.
Frequency table: a b c d e
[5] [9] [12] [13] [16]

Initial nodes:
5(a) 9(b) 12(c) 13(d) 16(e)

Tree:
         [55]
        /    \
   [25(cd)]   [30(abe)]
   /    \     /     \
 12(c) 13(d) 14(ab) 16(e)
             /   \
           5(a)  9(b)
Dry Run Walkthrough
Input: string: 'abcde' with frequencies {a:5, b:9, c:12, d:13, e:16}
Goal: Build a Huffman tree to assign binary codes minimizing total bits for encoding
Step 1: Pick two smallest frequencies 5(a) and 9(b), combine into node with freq 14
Nodes: [14(ab)] [12(c)] [13(d)] [16(e)]
Why: Combine smallest to build tree bottom-up for optimal prefix codes
Step 2: Pick two smallest frequencies 12(c) and 13(d), combine into node with freq 25
Nodes: [14(ab)] [16(e)] [25(cd)]
Why: Continue combining smallest nodes to build tree
Step 3: Pick two smallest frequencies 14(ab) and 16(e), combine into node with freq 30
Nodes: [25(cd)] [30(abe)]
Why: Build higher level nodes by combining smallest frequencies
Step 4: Combine remaining nodes 25(cd) and 30(abe) into root node with freq 55
Root node: [55(abcde)]
Why: Final tree root represents total frequency sum
Step 5: Assign binary codes by traversing tree: left edge = 0, right edge = 1
Codes: a=100, b=101, c=00, d=01, e=11
Why: Assign codes so no code is prefix of another, shortest for frequent chars
Result:
Huffman codes: a=100, b=101, c=00, d=01, e=11
Annotated Code
DSA C
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_TREE_HT 100

struct MinHeapNode {
    char data;
    unsigned freq;
    struct MinHeapNode *left, *right;
};

struct MinHeap {
    unsigned size;
    unsigned capacity;
    struct MinHeapNode** array;
};

struct MinHeapNode* newNode(char data, unsigned freq) {
    struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
    temp->left = temp->right = NULL;
    temp->data = data;
    temp->freq = freq;
    return temp;
}

struct MinHeap* createMinHeap(unsigned capacity) {
    struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
    minHeap->size = 0;
    minHeap->capacity = capacity;
    minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
    return minHeap;
}

void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) {
    struct MinHeapNode* t = *a;
    *a = *b;
    *b = t;
}

void minHeapify(struct MinHeap* minHeap, int idx) {
    int smallest = idx;
    int left = 2 * idx + 1;
    int right = 2 * idx + 2;

    if (left < (int)minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
        smallest = left;
    if (right < (int)minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
        smallest = right;
    if (smallest != idx) {
        swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
        minHeapify(minHeap, smallest);
    }
}

int isSizeOne(struct MinHeap* minHeap) {
    return (minHeap->size == 1);
}

struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
    struct MinHeapNode* temp = minHeap->array[0];
    minHeap->array[0] = minHeap->array[minHeap->size - 1];
    minHeap->size--;
    minHeapify(minHeap, 0);
    return temp;
}

void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) {
    minHeap->size++;
    int i = minHeap->size - 1;
    while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
        minHeap->array[i] = minHeap->array[(i - 1) / 2];
        i = (i - 1) / 2;
    }
    minHeap->array[i] = minHeapNode;
}

void buildMinHeap(struct MinHeap* minHeap) {
    int n = minHeap->size - 1;
    for (int i = (n - 1) / 2; i >= 0; --i)
        minHeapify(minHeap, i);
}

int isLeaf(struct MinHeapNode* root) {
    return !(root->left) && !(root->right);
}

struct MinHeap* createAndBuildMinHeap(char data[], int freq[], int size) {
    struct MinHeap* minHeap = createMinHeap(size);
    for (int i = 0; i < size; ++i)
        minHeap->array[i] = newNode(data[i], freq[i]);
    minHeap->size = size;
    buildMinHeap(minHeap);
    return minHeap;
}

struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) {
    struct MinHeapNode *left, *right, *top;
    struct MinHeap* minHeap = createAndBuildMinHeap(data, freq, size);
    while (!isSizeOne(minHeap)) {
        left = extractMin(minHeap);
        right = extractMin(minHeap);
        top = newNode('$', left->freq + right->freq);
        top->left = left;
        top->right = right;
        insertMinHeap(minHeap, top);
    }
    return extractMin(minHeap);
}

void printCodes(struct MinHeapNode* root, int arr[], int top) {
    if (root->left) {
        arr[top] = 0;
        printCodes(root->left, arr, top + 1);
    }
    if (root->right) {
        arr[top] = 1;
        printCodes(root->right, arr, top + 1);
    }
    if (isLeaf(root)) {
        printf("%c=", root->data);
        for (int i = 0; i < top; ++i)
            printf("%d", arr[i]);
        printf("\n");
    }
}

int main() {
    char arr[] = {'a', 'b', 'c', 'd', 'e'};
    int freq[] = {5, 9, 12, 13, 16};
    int size = sizeof(arr) / sizeof(arr[0]);
    struct MinHeapNode* root = buildHuffmanTree(arr, freq, size);
    int huffmanCode[MAX_TREE_HT], top = 0;
    printCodes(root, huffmanCode, top);
    return 0;
}
while (!isSizeOne(minHeap)) {
combine two smallest nodes repeatedly to build Huffman tree
left = extractMin(minHeap);
extract smallest frequency node
right = extractMin(minHeap);
extract second smallest frequency node
top = newNode('$', left->freq + right->freq);
create new internal node with combined frequency
top->left = left; top->right = right;
assign children to new node
insertMinHeap(minHeap, top);
insert combined node back into min heap
printCodes(root, huffmanCode, top);
traverse tree to print codes for each character
OutputSuccess
c=00 d=01 a=100 b=101 e=11
Complexity Analysis
Time: O(n log n) because we build a min heap and extract/insert nodes n-1 times
Space: O(n) for storing nodes and the heap
vs Alternative: Compared to fixed-length encoding, Huffman reduces average bits by assigning shorter codes to frequent chars, saving space
Edge Cases
Single character input
Code assigns 0 to the single character without tree building
DSA C
int size = sizeof(arr) / sizeof(arr[0]);
All characters have same frequency
Tree builds but codes may have same length; still valid prefix codes
DSA C
while (!isSizeOne(minHeap)) {
Empty input array
No tree built, program exits or prints nothing
DSA C
int size = sizeof(arr) / sizeof(arr[0]);
When to Use This Pattern
When you need to compress data by assigning variable-length codes based on frequency, reach for Huffman Encoding because it builds an optimal prefix code tree.
Common Mistakes
Mistake: Not combining the two smallest frequency nodes each time
Fix: Always extract and combine the two nodes with smallest frequencies from the min heap
Mistake: Assigning codes without ensuring prefix property
Fix: Use tree traversal assigning 0 to left edges and 1 to right edges to guarantee prefix-free codes
Summary
It builds a tree to assign shorter binary codes to frequent characters for efficient encoding.
Use it when you want to compress data by minimizing the total bits needed based on character frequencies.
The key insight is combining the two smallest frequency nodes repeatedly to build an optimal prefix code tree.