RLE Compress

RLE (Run-length encoding) is a very simple form of lossless data compression in which runs of data (that is, sequences in which the same data value occurs in many consecutive data elements) are stored as a single data value and count, rather than as the original run. This is most useful on data that contains many such runs.

For RLE Decompress algorithm click here.



									/*****Please include following header files*****/
// stdint.h
/***********************************************/

void encodeRepetition(uint8_t* output, uint32_t* outputPosition, uint8_t marker, uint8_t symbol, uint32_t count)
{
	uint32_t index = *outputPosition;

	if (count <= 3)
	{
		if (symbol == marker)
		{
			output[index++] = marker;
			output[index++] = count - 1;
		}
		else
		{
			for (uint32_t i = 0; i < count; ++i)
			{
				output[index++] = symbol;
			}
		}
	}
	else
	{
		output[index++] = marker;
		--count;

		if (count >= 128)
		{
			output[index++] = (count >> 8) | 0x80;
		}

		output[index++] = count & 0xff;
		output[index++] = symbol;
	}

	*outputPosition = index;
}

void encodeNonRepetition(uint8_t* output, uint32_t* outputPosition, uint8_t marker, uint8_t symbol)
{
	uint32_t index = *outputPosition;

	if (symbol == marker)
	{
		output[index++] = marker;
		output[index++] = 0;
	}
	else
	{
		output[index++] = symbol;
	}

	*outputPosition = index;
}

int Compress(uint8_t* input, uint8_t* output, uint32_t inputSize) {
	uint8_t byte1, byte2, marker;
	uint32_t i, inputPosition, outputPosition, count, histogram[256];

	if (inputSize < 1)
	{
		return 0;
	}

	for (i = 0; i < 256; ++i)
	{
		histogram[i] = 0;
	}

	for (i = 0; i < inputSize; ++i)
	{
		++histogram[input[i]];
	}

	marker = 0;

	for (i = 1; i < 256; ++i)
	{
		if (histogram[i] < histogram[marker])
		{
			marker = i;
		}
	}

	output[0] = marker;
	outputPosition = 1;

	byte1 = input[0];
	inputPosition = 1;
	count = 1;

	if (inputSize >= 2)
	{
		byte2 = input[inputPosition++];
		count = 2;

		do
		{
			if (byte1 == byte2)
			{
				while ((inputPosition < inputSize) && (byte1 == byte2) && (count < 32768))
				{
					byte2 = input[inputPosition++];
					++count;
				}

				if (byte1 == byte2)
				{
					encodeRepetition(output, &outputPosition, marker, byte1, count);

					if (inputPosition < inputSize)
					{
						byte1 = input[inputPosition++];
						count = 1;
					}
					else
					{
						count = 0;
					}
				}
				else
				{
					encodeRepetition(output, &outputPosition, marker, byte1, count - 1);
					byte1 = byte2;
					count = 1;
				}
			}
			else
			{
				encodeNonRepetition(output, &outputPosition, marker, byte1);
				byte1 = byte2;
				count = 1;
			}
			if (inputPosition < inputSize)
			{
				byte2 = input[inputPosition++];
				count = 2;
			}
		} while ((inputPosition < inputSize) || (count >= 2));
	}

	if (count == 1)
	{
		encodeNonRepetition(output, &outputPosition, marker, byte1);
	}

	return outputPosition;
}
								


Example

									/*****Please include following header files*****/
// string.h
// stdlib.h
/***********************************************/

char* str = "aaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbNNNNNNNNNNNNNNNNPPPPPPPPPPP12888888888888@@@@@@@@@@@@@";
uint8_t* originalData = (uint8_t*)str;
int originalDataSize = strlen(str);
uint8_t* compressedData = (uint8_t*)malloc(originalDataSize * (257 / 256) + 1);

int compressedDataSize = Compress(originalData, compressedData, originalDataSize);