include "bitdata.s7i";
include "huffman.s7i";
include "crc32.s7i";
const string: BZIP2_MAGIC is "BZh";
const integer: BZIP2_HEADER_SIZE is 4;
const integer: BZIP2_BLOCK_HEADER_MAGIC is 16#314159265359;
const integer: BZIP2_EOS_MAGIC is 16#177245385090;
const integer: BZIP2_MINIMUM_HUFFMAN_GROUPS is 2;
const integer: BZIP2_MAXIMUM_HUFFMAN_GROUPS is 6;
const integer: BZIP2_HUFFMAN_GROUP_SIZE is 50;
const integer: BZIP2_MAXIMUM_HUFFMAN_CODE_LENGTH is 20;
const integer: BZIP2_MTF_DATA_SIZE is 4096;
const integer: BZIP2_MTF_BLOCK_SIZE is 16;
const type: bzip2MtfBlockStartTable is array [0 .. 15] integer;
const type: bzip2MtfDataType is array [0 .. pred(BZIP2_MTF_DATA_SIZE)] integer;
const integer: BZIP2_RUN_A is 0;
const integer: BZIP2_RUN_B is 1;
const type: bzip2DecoderArray is array [0 ..] msbHuffmanDecoder;
const type: bzip2SelectorArray is array [0 ..] integer;
const type: bzip2MapToUsedByteCode is array [0 .. 255] integer;
const type: bzip2CodeFrequencies is array [0 .. 255] integer;
const type: bzip2NextByteIndex is array [0 ..] integer;
const type: bzip2Header is new struct
var string: magic is "";
var integer: hundredKBlockSize is 0;
end struct;
const proc: showHeader (in bzip2Header: header) is func
begin
writeln("magic: " <& literal(header.magic));
writeln("hundredKBlockSize: " <& header.hundredKBlockSize);
end func;
const func bzip2Header: readBzip2Header (in string: stri) is func
result
var bzip2Header: header is bzip2Header.value;
begin
if length(stri) >= BZIP2_HEADER_SIZE and
startsWith(stri, BZIP2_MAGIC) and stri[4] in {'0' .. '9'} then
header.magic := BZIP2_MAGIC;
header.hundredKBlockSize := ord(stri[4]) - ord('0');
end if;
end func;
const func bzip2Header: readBzip2Header (inout file: compressed) is func
result
var bzip2Header: header is bzip2Header.value;
local
var string: stri is "";
begin
stri := gets(compressed, BZIP2_HEADER_SIZE);
if length(stri) = BZIP2_HEADER_SIZE and
startsWith(stri, BZIP2_MAGIC) and stri[4] in {'0' .. '9'} then
header.magic := BZIP2_MAGIC;
header.hundredKBlockSize := ord(stri[4]) - ord('0');
end if;
end func;
const type: bzip2BlockHeader is new struct
var integer: storedCrc is 0;
var boolean: randomized is FALSE;
var integer: bwtStartIndex is 0;
var integer: huffmanGroups is 0;
var integer: endOfData is 0;
var bzip2MapToUsedByteCode: mapToUsedByteCode is bzip2MapToUsedByteCode.value;
end struct;
const proc: showHeader (in bzip2BlockHeader: blockHeader) is func
begin
writeln("storedCrc: " <& blockHeader.storedCrc);
writeln("randomized: " <& blockHeader.randomized);
writeln("bwtStartIndex: " <& blockHeader.bwtStartIndex);
writeln("huffmanGroups: " <& blockHeader.huffmanGroups);
writeln("endOfData: " <& blockHeader.endOfData);
end func;
const proc: readMappingTable (inout msbInBitStream: compressedStream,
inout bzip2BlockHeader: blockHeader) is func
local
var integer: mapIndex is 0;
var bitset: huffmanUsedMap is bitset.value;
var integer: bitIndex is 0;
var integer: codesInUse is 0;
begin
for mapIndex range 0 to 15 do
if odd(getBit(compressedStream)) then
incl(huffmanUsedMap, mapIndex);
end if;
end for;
for mapIndex range 0 to 15 do
if mapIndex in huffmanUsedMap then
for bitIndex range 0 to 15 do
if odd(getBit(compressedStream)) then
blockHeader.mapToUsedByteCode[codesInUse] := mapIndex * 16 + bitIndex;
incr(codesInUse);
end if;
end for;
end if;
end for;
blockHeader.endOfData := codesInUse + 1;
end func;
const proc: readHuffmanGroups (inout msbInBitStream: compressedStream,
inout bzip2BlockHeader: blockHeader) is func
begin
blockHeader.huffmanGroups := getBits(compressedStream, 3);
if blockHeader.huffmanGroups < BZIP2_MINIMUM_HUFFMAN_GROUPS or
blockHeader.huffmanGroups > BZIP2_MAXIMUM_HUFFMAN_GROUPS then
raise RANGE_ERROR;
end if;
end func;
const type: bzip2HuffmanDecoder is new struct
var bzip2DecoderArray: decoderTable is bzip2DecoderArray.value;
var bzip2SelectorArray: selector is bzip2SelectorArray.value;
end struct;
const type: bzip2DecoderState is new struct
var integer: selectorNumber is -1;
var integer: decoderTableIndex is 0;
var integer: symbolCountdown is 0;
end struct;
const func integer: getHuffmanSymbol (inout msbInBitStream: compressedStream,
in bzip2HuffmanDecoder: decoder, inout bzip2DecoderState: state) is func
result
var integer: nextSym is 0;
begin
if state.symbolCountdown = 0 then
incr(state.selectorNumber);
state.decoderTableIndex := decoder.selector[state.selectorNumber];
state.symbolCountdown := BZIP2_HUFFMAN_GROUP_SIZE;
end if;
decr(state.symbolCountdown);
nextSym := getHuffmanSymbol(compressedStream, decoder.decoderTable[state.decoderTableIndex]);
end func;
const proc: readSelectors (inout msbInBitStream: compressedStream,
in bzip2BlockHeader: blockHeader, inout bzip2HuffmanDecoder: decoder) is func
local
var integer: selectorsUsed is 0;
var bzip2SelectorArray: mtfSelectorList is bzip2SelectorArray.value;
var integer: index is 0;
var integer: selector is 0;
var array integer: mtfSelector is 0 times 0;
var integer: selectorIndex is 0;
begin
selectorsUsed := getBits(compressedStream, 15);
mtfSelectorList := bzip2SelectorArray[.. pred(selectorsUsed)] times 0;
for index range 0 to pred(selectorsUsed) do
selector := 0;
while odd(getBit(compressedStream)) do
incr(selector);
end while;
mtfSelectorList[index] := selector;
end for;
mtfSelector := [0 .. pred(blockHeader.huffmanGroups)] times 0;
for index range 1 to pred(blockHeader.huffmanGroups) do
mtfSelector[index] := index;
end for;
decoder.selector := bzip2SelectorArray[.. pred(selectorsUsed)] times 0;
for index range 0 to pred(selectorsUsed) do
selectorIndex := mtfSelectorList[index];
selector := mtfSelector[selectorIndex];
while selectorIndex <> 0 do
mtfSelector[selectorIndex] := mtfSelector[pred(selectorIndex)];
decr(selectorIndex);
end while;
mtfSelector[0] := selector;
decoder.selector[index] := selector;
end for;
end func;
const proc: readHuffmanDecoders (inout msbInBitStream: compressedStream,
in bzip2BlockHeader: blockHeader, inout bzip2HuffmanDecoder: decoder) is func
local
var integer: index is 0;
var integer: codeLength is 0;
var array integer: codeLengths is [0 .. -1] times 0;
var integer: symbol is 0;
begin
decoder.decoderTable :=
bzip2DecoderArray[.. pred(blockHeader.huffmanGroups)] times
msbHuffmanDecoder.value;
for index range 0 to pred(blockHeader.huffmanGroups) do
codeLength := getBits(compressedStream, 5);
if codeLength not in {1 .. BZIP2_MAXIMUM_HUFFMAN_CODE_LENGTH} then
raise RANGE_ERROR;
end if;
codeLengths := [0 .. blockHeader.endOfData] times 0;
for symbol range 0 to blockHeader.endOfData do
while odd(getBit(compressedStream)) do
if odd(getBit(compressedStream)) then
decr(codeLength);
else
incr(codeLength);
end if;
end while;
if codeLength not in {1 .. BZIP2_MAXIMUM_HUFFMAN_CODE_LENGTH} then
raise RANGE_ERROR;
end if;
codeLengths[symbol] := codeLength;
end for;
decoder.decoderTable[index] := createMsbHuffmanDecoder(codeLengths);
end for;
end func;
const proc: initMoveToFront (inout bzip2MtfBlockStartTable: mtfBlockStart,
inout bzip2MtfDataType: mtfData) is func
local
var integer: blockNumber is 0;
var integer: index is 0;
var integer: dataIndex is pred(BZIP2_MTF_DATA_SIZE);
begin
for blockNumber range maxIdx(bzip2MtfBlockStartTable) downto 0 do
for index range pred(BZIP2_MTF_BLOCK_SIZE) downto 0 do
mtfData[dataIndex] := blockNumber * BZIP2_MTF_BLOCK_SIZE + index;
decr(dataIndex);
end for;
mtfBlockStart[blockNumber] := succ(dataIndex);
end for;
end func;
const proc: moveMtfDataToTheEnd (inout bzip2MtfBlockStartTable: mtfBlockStart,
inout bzip2MtfDataType: mtfData) is func
local
var integer: blockNumber is 0;
var integer: srcBlockStart is 0;
var integer: destBlockStart is 0;
var integer: srcIdx is 0;
var integer: destIdx is 0;
begin
destBlockStart := BZIP2_MTF_DATA_SIZE - BZIP2_MTF_BLOCK_SIZE;
for blockNumber range maxIdx(bzip2MtfBlockStartTable) downto 0 do
srcBlockStart := mtfBlockStart[blockNumber];
if srcBlockStart < destBlockStart then
destIdx := pred(destBlockStart + BZIP2_MTF_BLOCK_SIZE);
for srcIdx range pred(srcBlockStart + BZIP2_MTF_BLOCK_SIZE)
downto srcBlockStart do
mtfData[destIdx] := mtfData[srcIdx];
decr(destIdx);
end for;
end if;
mtfBlockStart[blockNumber] := destBlockStart;
destBlockStart -:= BZIP2_MTF_BLOCK_SIZE;
end for;
end func;
const func integer: decodeMoveToFront (in integer: mtfIndex,
inout bzip2MtfBlockStartTable: mtfBlockStart,
inout bzip2MtfDataType: mtfData) is func
result
var integer: mtfCode is 0;
local
var integer: blockStart is 0;
var integer: dataIndex is 0;
var integer: blockNumber is 0;
var integer: blockOffset is 0;
begin
if mtfIndex < BZIP2_MTF_BLOCK_SIZE then
blockStart := mtfBlockStart[0];
dataIndex := blockStart + mtfIndex;
mtfCode := mtfData[dataIndex];
repeat
mtfData[dataIndex] := mtfData[pred(dataIndex)];
decr(dataIndex);
until dataIndex = blockStart;
mtfData[blockStart] := mtfCode;
else
blockNumber := mtfIndex div BZIP2_MTF_BLOCK_SIZE;
blockOffset := mtfIndex mod BZIP2_MTF_BLOCK_SIZE;
blockStart := mtfBlockStart[blockNumber];
dataIndex := blockStart + blockOffset;
mtfCode := mtfData[dataIndex];
while dataIndex <> blockStart do
mtfData[dataIndex] := mtfData[pred(dataIndex)];
decr(dataIndex);
end while;
repeat
mtfData[mtfBlockStart[blockNumber]] := mtfData[
mtfBlockStart[pred(blockNumber)] + BZIP2_MTF_BLOCK_SIZE - 1];
decr(blockNumber);
decr(mtfBlockStart[blockNumber]);
until blockNumber = 0;
mtfData[mtfBlockStart[0]] := mtfCode;
if mtfBlockStart[0] = 0 then
moveMtfDataToTheEnd(mtfBlockStart, mtfData);
end if;
end if;
end func;
const proc: processInputData (inout msbInBitStream: compressedStream,
in bzip2BlockHeader: blockHeader, in bzip2HuffmanDecoder: decoder,
inout string: byteStri, inout bzip2CodeFrequencies: codeFrequencies) is func
local
var bzip2DecoderState: state is bzip2DecoderState.value;
var bzip2MtfBlockStartTable: mtfBlockStart is bzip2MtfBlockStartTable.value;
var bzip2MtfDataType: mtfData is bzip2MtfDataType.value;
var integer: huffmanSymbol is 0;
var integer: repeatFactor is 0;
var integer: bitPos is 0;
var integer: mtfCode is 0;
var integer: code is 0;
begin
initMoveToFront(mtfBlockStart, mtfData);
huffmanSymbol := getHuffmanSymbol(compressedStream, decoder, state);
while huffmanSymbol <> blockHeader.endOfData do
if huffmanSymbol <= BZIP2_RUN_B then
repeatFactor := 0;
bitPos := 0;
repeat
repeatFactor +:= succ(huffmanSymbol) << bitPos;
incr(bitPos);
huffmanSymbol := getHuffmanSymbol(compressedStream, decoder, state);
until huffmanSymbol > BZIP2_RUN_B;
codeFrequencies[code] +:= repeatFactor;
byteStri &:= str(char(code)) mult repeatFactor;
else
mtfCode := decodeMoveToFront(pred(huffmanSymbol), mtfBlockStart, mtfData);
code := blockHeader.mapToUsedByteCode[mtfCode];
incr(codeFrequencies[code]);
byteStri &:= char(code);
huffmanSymbol := getHuffmanSymbol(compressedStream, decoder, state);
end if;
end while;
end func;
const func bzip2CodeFrequencies: computeFrequenciesSum (
in bzip2CodeFrequencies: codeFrequencies) is func
result
var bzip2CodeFrequencies: codeFrequenciesSum is bzip2CodeFrequencies.value;
local
var integer: index is 0;
var integer: codeFrequency is 0;
var integer: sumOfFrequencies is 0;
begin
for codeFrequency key index range codeFrequencies do
codeFrequenciesSum[index] := sumOfFrequencies;
sumOfFrequencies +:= codeFrequency;
end for;
end func;
const func bzip2NextByteIndex: inverseBurrowsWheelerTransform (in string: byteStri,
in bzip2CodeFrequencies: codeFrequencies) is func
result
var bzip2NextByteIndex: nextByteIndex is bzip2NextByteIndex.value;
local
var bzip2CodeFrequencies: codeFrequenciesSum is bzip2CodeFrequencies.value;
var char: aByte is ' ';
var integer: byteIndex is 0;
var integer: index is 0;
begin
codeFrequenciesSum := computeFrequenciesSum(codeFrequencies);
nextByteIndex := bzip2NextByteIndex[.. pred(length(byteStri))] times 0;
for aByte range byteStri do
index := codeFrequenciesSum[ord(aByte)];
incr(codeFrequenciesSum[ord(aByte)]);
nextByteIndex[index] := byteIndex;
incr(byteIndex);
end for;
end func;
const func char: getBwtByte (in string: byteStri,
in bzip2NextByteIndex: nextByteIndex, inout integer: currentByteIndex) is func
result
var char: aByte is ' ';
begin
aByte := byteStri[succ(currentByteIndex)];
currentByteIndex := nextByteIndex[currentByteIndex];
end func;
const func string: runLengthDecoding (in string: byteStri,
in bzip2NextByteIndex: nextByteIndex, in integer: bwtStartIndex) is func
result
var string: uncompressed is "";
local
var integer: currentByteIndex is 0;
var integer: endByteIndex is 0;
var char: aByte is ' ';
var char: previousByte is ' ';
var integer: runLength is 0;
begin
currentByteIndex := nextByteIndex[bwtStartIndex];
endByteIndex := currentByteIndex;
if byteStri <> "" then
previousByte := getBwtByte(byteStri, nextByteIndex, currentByteIndex);
if currentByteIndex <> endByteIndex then
repeat
aByte := getBwtByte(byteStri, nextByteIndex, currentByteIndex);
while aByte <> previousByte and currentByteIndex <> endByteIndex do
uncompressed &:= previousByte;
previousByte := aByte;
aByte := getBwtByte(byteStri, nextByteIndex, currentByteIndex);
end while;
if currentByteIndex = endByteIndex then
uncompressed &:= previousByte;
uncompressed &:= aByte;
else
runLength := 2;
repeat
aByte := getBwtByte(byteStri, nextByteIndex, currentByteIndex);
incr(runLength);
until aByte <> previousByte or runLength = 4 or currentByteIndex = endByteIndex;
if aByte <> previousByte then
uncompressed &:= str(previousByte) mult pred(runLength);
if currentByteIndex = endByteIndex then
uncompressed &:= aByte;
else
previousByte := aByte;
end if;
elsif currentByteIndex <> endByteIndex then
runLength +:= ord(getBwtByte(byteStri, nextByteIndex, currentByteIndex));
uncompressed &:= str(previousByte) mult runLength;
if currentByteIndex <> endByteIndex then
previousByte := getBwtByte(byteStri, nextByteIndex, currentByteIndex);
end if;
else
uncompressed &:= str(previousByte) mult runLength;
end if;
end if;
until currentByteIndex = endByteIndex;
else
uncompressed &:= previousByte;
end if;
end if;
end func;
const func string: decompressBzip2Block (inout msbInBitStream: compressedStream) is func
result
var string: uncompressed is "";
local
var bzip2BlockHeader: blockHeader is bzip2BlockHeader.value;
var bzip2HuffmanDecoder: decoder is bzip2HuffmanDecoder.value;
var string: byteStri is "";
var bzip2CodeFrequencies: codeFrequencies is bzip2CodeFrequencies.value;
var bzip2NextByteIndex: nextByteIndex is bzip2NextByteIndex.value;
begin
blockHeader.storedCrc := getBits(compressedStream, 32);
blockHeader.randomized := odd(getBit(compressedStream));
blockHeader.bwtStartIndex := getBits(compressedStream, 24);
readMappingTable(compressedStream, blockHeader);
readHuffmanGroups(compressedStream, blockHeader);
readSelectors(compressedStream, blockHeader, decoder);
readHuffmanDecoders(compressedStream, blockHeader, decoder);
processInputData(compressedStream, blockHeader, decoder, byteStri, codeFrequencies);
nextByteIndex := inverseBurrowsWheelerTransform(byteStri, codeFrequencies);
uncompressed := runLengthDecoding(byteStri, nextByteIndex, blockHeader.bwtStartIndex);
if blockHeader.storedCrc <> ord(bzip2Crc32(uncompressed)) then
raise RANGE_ERROR;
end if;
end func;
const func string: bzip2Decompress (in string: stri) is func
result
var string: uncompressed is "";
local
var bzip2Header: header is bzip2Header.value;
var msbInBitStream: compressedStream is msbInBitStream.value;
var integer: blockMagic is 0;
begin
header := readBzip2Header(stri);
if header.magic = BZIP2_MAGIC then
compressedStream := openMsbInBitStream(stri);
ignore(gets(compressedStream, 4));
blockMagic := getBits(compressedStream, 32) << 16 +
getBits(compressedStream, 16);
while blockMagic = BZIP2_BLOCK_HEADER_MAGIC do
uncompressed &:= decompressBzip2Block(compressedStream);
blockMagic := getBits(compressedStream, 32) << 16 +
getBits(compressedStream, 16);
end while;
if blockMagic <> BZIP2_EOS_MAGIC then
raise RANGE_ERROR;
end if;
end if;
end func;
const type: bzip2File is sub null_file struct
var msbInBitStream: compressedStream is msbInBitStream.value;
var integer: blockMagic is 0;
var string: uncompressed is "";
var integer: position is 1;
end struct;
type_implements_interface(bzip2File, file);
const func file: openBzip2File (inout file: compressed, READ) is func
result
var file: newFile is STD_NULL;
local
var bzip2Header: header is bzip2Header.value;
var bzip2File: new_bzip2File is bzip2File.value;
begin
header := readBzip2Header(compressed);
if header.magic = BZIP2_MAGIC then
new_bzip2File.compressedStream := openMsbInBitStream(compressed);
new_bzip2File.blockMagic :=
getBits(new_bzip2File.compressedStream, 32) << 16 +
getBits(new_bzip2File.compressedStream, 16);
newFile := toInterface(new_bzip2File);
end if;
end func;
const proc: close (in bzip2File: aFile) is noop;
const func char: getc (inout bzip2File: inFile) is func
result
var char: charRead is ' ';
begin
while inFile.position > length(inFile.uncompressed) and
inFile.blockMagic = BZIP2_BLOCK_HEADER_MAGIC do
inFile.uncompressed &:= decompressBzip2Block(inFile.compressedStream);
inFile.blockMagic := getBits(inFile.compressedStream, 32) << 16 +
getBits(inFile.compressedStream, 16);
end while;
if inFile.position <= length(inFile.uncompressed) then
charRead := inFile.uncompressed[inFile.position];
incr(inFile.position);
elsif inFile.blockMagic <> BZIP2_EOS_MAGIC then
raise RANGE_ERROR;
else
charRead := EOF;
end if;
end func;
const func string: gets (inout bzip2File: inFile, in integer: maxLength) is func
result
var string: striRead is "";
begin
if maxLength <= 0 then
if maxLength <> 0 then
raise RANGE_ERROR;
end if;
else
while maxLength > succ(length(inFile.uncompressed) - inFile.position) and
inFile.blockMagic = BZIP2_BLOCK_HEADER_MAGIC do
inFile.uncompressed &:= decompressBzip2Block(inFile.compressedStream);
inFile.blockMagic := getBits(inFile.compressedStream, 32) << 16 +
getBits(inFile.compressedStream, 16);
end while;
if maxLength <= succ(length(inFile.uncompressed) - inFile.position) then
striRead := inFile.uncompressed[inFile.position fixLen maxLength];
inFile.position +:= maxLength;
elsif inFile.blockMagic <> BZIP2_EOS_MAGIC then
raise RANGE_ERROR;
else
striRead := inFile.uncompressed[inFile.position ..];
inFile.position := succ(length(inFile.uncompressed));
end if;
end if;
end func;
const func boolean: eof (in bzip2File: inFile) is
return inFile.position > length(inFile.uncompressed) and
inFile.blockMagic <> BZIP2_BLOCK_HEADER_MAGIC;
const func boolean: hasNext (inout bzip2File: inFile) is func
result
var boolean: hasNext is FALSE;
begin
while inFile.position > length(inFile.uncompressed) and
inFile.blockMagic = BZIP2_BLOCK_HEADER_MAGIC do
inFile.uncompressed &:= decompressBzip2Block(inFile.compressedStream);
inFile.blockMagic := getBits(inFile.compressedStream, 32) << 16 +
getBits(inFile.compressedStream, 16);
end while;
if inFile.blockMagic <> BZIP2_EOS_MAGIC then
raise RANGE_ERROR;
else
hasNext := inFile.position <= length(inFile.uncompressed);
end if;
end func;
const func integer: length (inout bzip2File: aFile) is func
result
var integer: length is 0;
begin
while aFile.blockMagic = BZIP2_BLOCK_HEADER_MAGIC do
aFile.uncompressed &:= decompressBzip2Block(aFile.compressedStream);
aFile.blockMagic := getBits(aFile.compressedStream, 32) << 16 +
getBits(aFile.compressedStream, 16);
end while;
if aFile.blockMagic <> BZIP2_EOS_MAGIC then
raise RANGE_ERROR;
else
length := length(aFile.uncompressed);
end if;
end func;
const boolean: seekable (in bzip2File: aFile) is TRUE;
const proc: seek (inout bzip2File: aFile, in integer: position) is func
begin
if position <= 0 then
raise RANGE_ERROR;
else
aFile.position := position;
end if;
end func;
const func integer: tell (in bzip2File: aFile) is
return aFile.position;