Huffman Compress

Huffman coding is a data compression algorithm. It is based on the idea that frequently-appearing letters should have shorter bit representations and less common letters should have longer representations.

For Huffman Decompress algorithm click here.



									private const int MAX_TREE_NODES = 511;

public class BitStream
{
	public byte[] BytePointer;
	public uint BitPosition;
	public uint Index;
}

public struct Symbol
{
	public int Sym;
	public uint Count;
	public uint Code;
	public uint Bits;
}

public class EncodeNode
{
	public EncodeNode ChildA;
	public EncodeNode ChildB;
	public int Count;
	public int Symbol;
}

private static void initBitstream(ref BitStream stream, byte[] buffer)
{
	stream.BytePointer = buffer;
	stream.BitPosition = 0;
}

private static void writeBits(ref BitStream stream, uint x, uint bits)
{
	byte[] buffer = stream.BytePointer;
	uint bit = stream.BitPosition;
	uint mask = (uint)(1 << (int)(bits - 1));

	for (uint count = 0; count < bits; ++count)
	{
		buffer[stream.Index] = (byte)((buffer[stream.Index] & (0xff ^ (1 << (int)(7 - bit)))) + ((Convert.ToBoolean(x & mask) ? 1 : 0) << (int)(7 - bit)));
		x <<= 1;
		bit = (bit + 1) & 7;

		if (!Convert.ToBoolean(bit))
		{
			++stream.Index;
		}
	}

	stream.BytePointer = buffer;
	stream.BitPosition = bit;
}

private static void histogram(byte[] input, Symbol[] sym, uint size)
{
	int i;
	int index = 0;

	for (i = 0; i < 256; ++i)
	{
		sym[i].Sym = i;
		sym[i].Count = 0;
		sym[i].Code = 0;
		sym[i].Bits = 0;
	}

	for (i = (int)size; Convert.ToBoolean(i); --i, ++index)
	{
		sym[input[index]].Count++;
	}
}

private static void storeTree(ref EncodeNode node, Symbol[] sym, ref BitStream stream, uint code, uint bits)
{
	uint symbolIndex;

	if (node.Symbol >= 0)
	{
		writeBits(ref stream, 1, 1);
		writeBits(ref stream, (uint)node.Symbol, 8);

		for (symbolIndex = 0; symbolIndex < 256; ++symbolIndex)
		{
			if (sym[symbolIndex].Sym == node.Symbol)
				break;
		}

		sym[symbolIndex].Code = code;
		sym[symbolIndex].Bits = bits;
		return;
	}
	else
	{
		writeBits(ref stream, 0, 1);
	}

	storeTree(ref node.ChildA, sym, ref stream, (code << 1) + 0, bits + 1);
	storeTree(ref node.ChildB, sym, ref stream, (code << 1) + 1, bits + 1);
}

private static void makeTree(Symbol[] sym, ref BitStream stream)
{
	EncodeNode[] nodes = new EncodeNode[MAX_TREE_NODES];

	for (int counter = 0; counter < nodes.Length; ++counter)
	{
		nodes[counter] = new EncodeNode();
	}

	EncodeNode node1, node2, root;
	uint i, numSymbols = 0, nodesLeft, nextIndex;

	for (i = 0; i < 256; ++i)
	{
		if (sym[i].Count > 0)
		{
			nodes[numSymbols].Symbol = sym[i].Sym;
			nodes[numSymbols].Count = (int)sym[i].Count;
			nodes[numSymbols].ChildA = null;
			nodes[numSymbols].ChildB = null;
			++numSymbols;
		}
	}

	root = null;
	nodesLeft = numSymbols;
	nextIndex = numSymbols;

	while (nodesLeft > 1)
	{
		node1 = null;
		node2 = null;

		for (i = 0; i < nextIndex; ++i)
		{
			if (nodes[i].Count > 0)
			{
				if (node1 == null  || (nodes[i].Count <= node1.Count))
				{
					node2 = node1;
					node1 = nodes[i];
				}
				else if (node2 == null || (nodes[i].Count <= node2.Count))
				{
					node2 = nodes[i];
				}
			}
		}

		root = nodes[nextIndex];
		root.ChildA = node1;
		root.ChildB = node2;
		root.Count = node1.Count + node2.Count;
		root.Symbol = -1;
		node1.Count = 0;
		node2.Count = 0;
		++nextIndex;
		--nodesLeft;
	}

	if (root != null)
	{
		storeTree(ref root, sym, ref stream, 0, 0);
	}
	else
	{
		root = nodes[0];
		storeTree(ref root, sym, ref stream, 0, 1);
	}
}

public static int Compress(byte[] input, byte[] output, uint inputSize)
{
	Symbol[] sym = new Symbol[256];
	Symbol temp;
	BitStream stream = new BitStream();
	uint i, totalBytes, swaps, symbol;

	if (inputSize < 1)
		return 0;

	initBitstream(ref stream, output);
	histogram(input, sym, inputSize);
	makeTree(sym, ref stream);

	do
	{
		swaps = 0;

		for (i = 0; i < 255; ++i)
		{
			if (sym[i].Sym > sym[i + 1].Sym)
			{
				temp = sym[i];
				sym[i] = sym[i + 1];
				sym[i + 1] = temp;
				swaps = 1;
			}
		}
	} while (Convert.ToBoolean(swaps));

	for (i = 0; i < inputSize; ++i)
	{
		symbol = input[i];
		writeBits(ref stream, sym[symbol].Code, sym[symbol].Bits);
	}

	totalBytes = stream.Index;

	if (stream.BitPosition > 0)
	{
		++totalBytes;
	}

	return (int)totalBytes;
}
								


Example

									string str = "This is an example for Huffman coding";
byte[] originalData = Encoding.Default.GetBytes(str);
uint originalDataSize = (uint)str.Length;
byte[] compressedData = new byte[originalDataSize * (101 / 100) + 320];

int compressedDataSize = Compress(originalData, compressedData, originalDataSize);