Shannon–Fano Compress

Shannon–Fano is data compression algorithm, which replaces each symbol with an alternate binary representation. Common symbols are represented by few bits and uncommon symbols are represented by many bits.

For Shannon–Fano Decompress algorithm click here.



									Public Class BitStream
	Public BytePointer As Byte()
	Public BitPosition As UInteger
	Public Index As UInteger
End Class

Public Structure Symbol
	Public Sym As UInteger
	Public Count As UInteger
	Public Code As UInteger
	Public Bits As UInteger
End Structure

Private Shared Sub initBitStream(ByRef stream As BitStream, buffer As Byte())
	stream.BytePointer = buffer
	stream.BitPosition = 0
End Sub

Private Shared Sub writeBits(ByRef stream As BitStream, x As UInteger, bits As UInteger)
	Dim buffer As Byte() = stream.BytePointer
	Dim bit As UInteger = stream.BitPosition
	Dim mask As UInteger = CUInt(1 << CInt(bits - 1))

	For count As UInteger = 0 To bits - 1
		buffer(stream.Index) = CByte((buffer(stream.Index) And (&HFF Xor (1 << CInt(7 - bit)))) + ((If(Convert.ToBoolean(x And mask), 1, 0)) << CInt(7 - bit)))
		x <<= 1
		bit = (bit + 1) And 7

		If Not Convert.ToBoolean(bit) Then
			stream.Index += 1
		End If
	Next

	stream.BytePointer = buffer
	stream.BitPosition = bit
End Sub

Private Shared Sub histogram(input As Byte(), sym As Symbol(), size As UInteger)
	Dim temp As Symbol
	Dim i As Integer, swaps As Integer
	Dim index As Integer = 0

	For i = 0 To 255
		sym(i).Sym = CUInt(i)
		sym(i).Count = 0
		sym(i).Code = 0
		sym(i).Bits = 0
	Next

	i = CInt(size)
	While Convert.ToBoolean(i)
		sym(input(index)).Count += 1
		i -= 1
		index += 1
	End While

	Do
		swaps = 0

		For i = 0 To 254
			If sym(i).Count < sym(i + 1).Count Then
				temp = sym(i)
				sym(i) = sym(i + 1)
				sym(i + 1) = temp
				swaps = 1
			End If
		Next
	Loop While Convert.ToBoolean(swaps)
End Sub

Private Shared Sub makeTree(sym As Symbol(), ByRef stream As BitStream, code As UInteger, bits As UInteger, first As UInteger, last As UInteger)
	Dim i As UInteger, size As UInteger, sizeA As UInteger, sizeB As UInteger, lastA As UInteger, firstB As UInteger

	If first = last Then
		writeBits(stream, 1, 1)
		writeBits(stream, sym(first).Sym, 8)
		sym(first).Code = code
		sym(first).Bits = bits
		Return
	Else
		writeBits(stream, 0, 1)
	End If

	size = 0

	For i = first To last
		size += sym(i).Count
	Next

	sizeA = 0

	i = first
	While sizeA < ((size + 1) >> 1) AndAlso i < last
		sizeA += sym(i).Count
		i += 1
	End While

	If sizeA > 0 Then
		writeBits(stream, 1, 1)

		lastA = i - 1

		makeTree(sym, stream, (code << 1) + 0, bits + 1, first, lastA)
	Else
		writeBits(stream, 0, 1)
	End If

	sizeB = size - sizeA

	If sizeB > 0 Then
		writeBits(stream, 1, 1)

		firstB = i

		makeTree(sym, stream, (code << 1) + 1, bits + 1, firstB, last)
	Else
		writeBits(stream, 0, 1)
	End If
End Sub

Public Shared Function Compress(input As Byte(), output As Byte(), inputSize As UInteger) As Integer
	Dim sym As Symbol() = New Symbol(255) {}
	Dim temp As Symbol
	Dim stream As New BitStream()
	Dim i As UInteger, totalBytes As UInteger, swaps As UInteger, symbol As UInteger, lastSymbol As UInteger

	If inputSize < 1 Then
		Return 0
	End If

	initBitStream(stream, output)
	histogram(input, sym, inputSize)

	lastSymbol = 255
	While sym(lastSymbol).Count = 0


		lastSymbol -= 1
	End While

	If lastSymbol = 0 Then
		lastSymbol += 1
	End If

	makeTree(sym, stream, 0, 0, 0, lastSymbol)

	Do
		swaps = 0

		For i = 0 To 254
			If sym(i).Sym > sym(i + 1).Sym Then
				temp = sym(i)
				sym(i) = sym(i + 1)
				sym(i + 1) = temp
				swaps = 1
			End If
		Next
	Loop While Convert.ToBoolean(swaps)

	For i = 0 To inputSize - 1
		symbol = input(i)
		writeBits(stream, sym(symbol).Code, sym(symbol).Bits)
	Next

	totalBytes = stream.Index

	If stream.BitPosition > 0 Then
		totalBytes += 1
	End If

	Return CInt(totalBytes)
End Function
								


Example

									Dim str As String = "This is an example for Shannon–Fano coding"
Dim originalData As Byte() = Encoding.[Default].GetBytes(str)
Dim originalDataSize As UInteger = CUInt(str.Length)
Dim compressedData As Byte() = New Byte(originalDataSize * (101 \ 100) + 383) {}

Dim compressedDataSize As Integer = Compress(originalData, compressedData, originalDataSize)