Seed7 - The extensible programming language
Seed7 FAQ Manual Screenshots Examples Libraries Algorithms Download Links
Algorithms Sorting Searching Date & Time String Float Mathematics Message digest Graphics File Puzzles Others
Algorithms
String
 previous   up   next 

Replace tabs with the corresponding number of spaces

const proc: delTabs (inout string: stri, in integer: tabJump) is func
  local
    var integer: tabPos is 0;
    var integer: blankCount is 0;
  begin
    tabPos := pos(stri, "\t");
    while tabPos <> 0 do
      blankCount := tabJump - pred(tabPos) rem tabJump;
      stri := stri[ .. pred(tabPos)] &
          " " mult blankCount & stri[succ(tabPos) .. ];
      tabPos := pos(stri, "\t");
    end while;
  end func;

Replace leading spaces with the corresponding number of tabs

const proc: insertLeadingTabs (inout string: stri, in integer: tabJump) is func
  local
    var integer: blankCount is 1;
    var integer: tabCount is 0;
  begin
    while blankCount <= length(stri) and stri[blankCount] = ' ' do
      incr(blankCount);
    end while;
    decr(blankCount);
    if blankCount >= tabJump then
      tabCount := blankCount div tabJump;
      stri := "\t" mult tabCount & stri[succ(tabCount * tabJump) .. ];
    end if;
  end func;

Replace spaces with the corresponding number of tabs

const proc: insertTabs (inout string: stri, in integer: tabJump) is func
  local
    var integer: blankPos is 0;
    var integer: pos is 0;
    var integer: tabCount1 is 0;
    var integer: tabCount2 is 0;
  begin
    blankPos := rpos(stri, ' ');
    while blankPos <> 0 do
      pos := pred(blankPos);
      while pos >= 1 and stri[pos] = ' ' do
        decr(pos);
      end while;
      tabCount1 := pos div tabJump;
      tabCount2 := blankPos div tabJump;
      if blankPos - pos >= 2 and tabCount2 > tabCount1 then
        stri := stri[.. pos] & "\t" mult (tabCount2 - tabCount1) &
            stri[succ(tabCount2 * tabJump) ..];
      end if;
      blankPos := rpos(stri, ' ', pos);
    end while;
  end func;

Convert an Unicode UTF-32 string to UTF-8

The "string.s7i" library defines 'striToUtf8' which converts an UTF-32 string to UTF-8. The function 'striToUtf8_2' below shows how 'striToUtf8' works:

const func string: striToUtf8_2 (in string: stri) is func
  result
    var string: stri8 is "";
  local
    var char: ch is ' ';
    var integer: pos is 1;
  begin
    stri8 := " " mult (6 * length(stri));
    for ch range stri do
      if ch <= '\16#7f;' then
        stri8 @:= [pos] ch;
        incr(pos);
      elsif ch <= '\16#7ff;' then
        stri8 @:= [pos    ] chr(16#C0 + ( ord(ch) >>  6));
        stri8 @:= [pos + 1] chr(16#80 + ( ord(ch)        mod 16#40));
        pos +:= 2;
      elsif ch <= '\16#ffff;' then
        stri8 @:= [pos    ] chr(16#E0 + ( ord(ch) >> 12));
        stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >>  6) mod 16#40));
        stri8 @:= [pos + 2] chr(16#80 + ( ord(ch)        mod 16#40));
        pos +:= 3;
      elsif ch <= '\16#1fffff;' then
        stri8 @:= [pos    ] chr(16#F0 + ( ord(ch) >> 18));
        stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >> 12) mod 16#40));
        stri8 @:= [pos + 2] chr(16#80 + ((ord(ch) >>  6) mod 16#40));
        stri8 @:= [pos + 3] chr(16#80 + ( ord(ch)        mod 16#40));
        pos +:= 4;
      elsif ch <= '\16#3ffffff;' then
        stri8 @:= [pos    ] chr(16#F8 + ( ord(ch) >> 24));
        stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >> 18) mod 16#40));
        stri8 @:= [pos + 2] chr(16#80 + ((ord(ch) >> 12) mod 16#40));
        stri8 @:= [pos + 3] chr(16#80 + ((ord(ch) >>  6) mod 16#40));
        stri8 @:= [pos + 4] chr(16#80 + ( ord(ch)        mod 16#40));
        pos +:= 5;
      else
        stri8 @:= [pos    ] chr(16#FC + ( ord(ch) >> 30));
        stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >> 24) mod 16#40));
        stri8 @:= [pos + 2] chr(16#80 + ((ord(ch) >> 18) mod 16#40));
        stri8 @:= [pos + 3] chr(16#80 + ((ord(ch) >> 12) mod 16#40));
        stri8 @:= [pos + 4] chr(16#80 + ((ord(ch) >>  6) mod 16#40));
        stri8 @:= [pos + 5] chr(16#80 + ( ord(ch)        mod 16#40));
        pos +:= 6;
      end if;
    end for;
    stri8 := stri8[.. pred(pos)];
  end func;

Convert an Unicode UTF-8 string to UTF-32

The "string.s7i" library defines 'utf8ToStri' which converts an UTF-8 string to UTF-32. The function 'utf8ToStri_2' below shows how 'utf8ToStri' works:

const func string: utf8ToStri_2 (in string: stri8) is func
  result
    var string: stri is "";
  local
    var integer: length is 0;
    var integer: pos8 is 0;
    var integer: pos is 0;
    var boolean: okay is TRUE;
  begin
    length := length(stri8);
    stri := " " mult length;
    pos8 := 1;
    pos := 0;
    while length > 0 do
      incr(pos);
      if ord(stri8[pos8]) <= 16#7F then
        stri @:= [pos] stri8[pos8];
        incr(pos8);
        decr(length);
      elsif ord(stri8[pos8    ]) >> 5 = 16#06 and length >= 2 and
            ord(stri8[pos8 + 1]) >> 6 = 16#02 then
        stri @:= [pos] chr((ord(stri8[pos8    ]) mod 16#20 << 6) +
                           (ord(stri8[pos8 + 1]) mod 16#40));
        pos8 +:= 2;
        length -:= 2;
      elsif ord(stri8[pos8    ]) >> 4 = 16#0E and length >= 3 and
            ord(stri8[pos8 + 1]) >> 6 = 16#02 and
            ord(stri8[pos8 + 2]) >> 6 = 16#02 then
        stri @:= [pos] chr((ord(stri8[pos8    ]) mod 16#10 << 12) +
                           (ord(stri8[pos8 + 1]) mod 16#40 <<  6) +
                           (ord(stri8[pos8 + 2]) mod 16#40));
        pos8 +:= 3;
        length -:= 3;
      elsif ord(stri8[pos8    ]) >> 3 = 16#1E and length >= 4 and
            ord(stri8[pos8 + 1]) >> 6 = 16#02 and
            ord(stri8[pos8 + 2]) >> 6 = 16#02 and
            ord(stri8[pos8 + 3]) >> 6 = 16#02 then
        stri @:= [pos] chr((ord(stri8[pos8    ]) mod 16#08 << 18) +
                           (ord(stri8[pos8 + 1]) mod 16#40 << 12) +
                           (ord(stri8[pos8 + 2]) mod 16#40 <<  6) +
                           (ord(stri8[pos8 + 3]) mod 16#40));
        pos8 +:= 4;
        length -:= 4;
      elsif ord(stri8[pos8    ]) >> 2 = 16#3E and length >= 5 and
            ord(stri8[pos8 + 1]) >> 6 = 16#02 and
            ord(stri8[pos8 + 2]) >> 6 = 16#02 and
            ord(stri8[pos8 + 3]) >> 6 = 16#02 and
            ord(stri8[pos8 + 4]) >> 6 = 16#02 then
        stri @:= [pos] chr((ord(stri8[pos8    ]) mod 16#04 << 24) +
                           (ord(stri8[pos8 + 1]) mod 16#40 << 18) +
                           (ord(stri8[pos8 + 2]) mod 16#40 << 12) +
                           (ord(stri8[pos8 + 3]) mod 16#40 <<  6) +
                           (ord(stri8[pos8 + 4]) mod 16#40));
        pos8 +:= 5;
        length -:= 5;
      elsif ord(stri8[pos8    ]) >> 2 = 16#3F and length >= 6 and
            ord(stri8[pos8 + 1]) >> 6 = 16#02 and
            ord(stri8[pos8 + 2]) >> 6 = 16#02 and
            ord(stri8[pos8 + 3]) >> 6 = 16#02 and
            ord(stri8[pos8 + 4]) >> 6 = 16#02 and
            ord(stri8[pos8 + 5]) >> 6 = 16#02 then
        stri @:= [pos] chr((ord(stri8[pos8    ]) mod 16#04 << 30) +
                           (ord(stri8[pos8 + 1]) mod 16#40 << 24) +
                           (ord(stri8[pos8 + 2]) mod 16#40 << 18) +
                           (ord(stri8[pos8 + 3]) mod 16#40 << 12) +
                           (ord(stri8[pos8 + 4]) mod 16#40 <<  6) +
                           (ord(stri8[pos8 + 5]) mod 16#40));
        pos8 +:= 6;
        length -:= 6;
      else
        okay := FALSE;
        length := 0;
      end if;
    end while;
    if okay then
      stri := stri[.. pos];
    else
      raise RANGE_ERROR;
    end if;
  end func;

Encode a string with the Base64 encoding

The function toBase64 is part of the "encoding.s7i" library. Base64 encodes a byte string as ASCII string. This is done by taking packs of 6-bits and translating them into a radix-64 representation. The radix-64 digits are encoded with letters, digits and the characters '+' and '/'.

const func string: toBase64 (in string: byteStri) is func
  result
    var string: base64 is "";
  local
    const string: coding is "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    var integer: index is 1;
    var integer: subIndex is 1;
    var char: ch is ' ';
    var integer: threeBytes is 0;
    var string: fourBytes is "    ";
    var integer: posToAddNewline is 58;
  begin
    for index range 1 to length(byteStri) step 3 do
      threeBytes := 0;
      for subIndex range index to index + 2 do
        threeBytes <<:= 8;
        if subIndex <= length(byteStri) then
          ch := byteStri[subIndex];
          if ch >= '\256;' then
            raise RANGE_ERROR;
          end if;
          threeBytes +:= ord(ch);
        end if;
      end for;
      fourBytes @:= [1] coding[succ( threeBytes >> 18)];
      fourBytes @:= [2] coding[succ((threeBytes >> 12) mod 64)];
      fourBytes @:= [3] coding[succ((threeBytes >>  6) mod 64)];
      fourBytes @:= [4] coding[succ( threeBytes        mod 64)];
      if index = posToAddNewline then
        base64 &:= "\n";
        posToAddNewline +:= 57;
      end if;
      base64 &:= fourBytes;
    end for;
    index := length(base64);
    if length(byteStri) rem 3 = 2 then
      base64 @:= [index] '=';
    elsif length(byteStri) rem 3 = 1 then
      base64 @:= [pred(index)] "==";
    end if;
  end func;

Decode a Base64 encoded string

The function fromBase64 is part of the "encoding.s7i" library.

const func string: fromBase64 (in string: base64) is func
  result
    var string: decoded is "";
  local
    const array integer: decode is [] (                      # -1 is illegal
        62, -1, -1, -1, 63,                                  # + /
        52, 53, 54, 55, 56, 57, 58, 59, 60, 61,              # 0 - 9
        -1, -1, -1,  0, -1, -1, -1,                          # =
         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,  # A - M
        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,  # N - Z
        -1, -1, -1, -1, -1, -1,
        26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,  # a - m
        39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51); # n - z
    var integer: index is 1;
    var integer: subIndex is 1;
    var integer: number is 0;
    var integer: fourBytes is 0;
    var string: threeBytes is "   ";
  begin
    while index <= length(base64) - 3 do
      if base64[index] >= '+' then
        fourBytes := 0;
        for subIndex range index to index + 3 do
          number := decode[ord(base64[subIndex]) - ord(pred('+'))];
          if number = -1 then
            raise RANGE_ERROR;
          end if;
          fourBytes := (fourBytes << 6) + number;
        end for;
        threeBytes @:= [1] chr( fourBytes >> 16);
        threeBytes @:= [2] chr((fourBytes >>  8) mod 256);
        threeBytes @:= [3] chr( fourBytes        mod 256);
        decoded &:= threeBytes;
        index +:= 4;
      elsif base64[index] = '\n' then
        incr(index);
      elsif base64[index] = '\r' or base64[succ(index)] = '\n' then
        index +:= 2;
      else
        raise RANGE_ERROR;
      end if;
    end while;
    if index <> succ(length(base64)) then
      raise RANGE_ERROR;
    end if;
    if base64[length(base64) - 1 len 2] = "==" then
      decoded := decoded[.. length(decoded) - 2];
    elsif length(base64) >= 1 and base64[length(base64)] = '=' then
      decoded := decoded[.. pred(length(decoded))];
    end if;
  end func;

Encode a string with the Quoted-printable encoding

The function toQuotedPrintable is part of the "encoding.s7i" library. Quoted-printable encodes a byte string as ASCII string. This is done by encoding printable ASCII characters except '=' as themself. Other byte values are encoded with '=' followed by two hexadecimal digits representing the byte's numeric value.

const func string: toQuotedPrintable (in string: byteStri) is func
  result
    var string: quoted is "";
  local
    var integer: index is 0;
    var integer: startPos is 1;
    var integer: counter is 1;
    var char: ch is ' ';
  begin
    for index range 1 to length(byteStri) do
      ch := byteStri[index];
      if ch >= '\256;' then
        raise RANGE_ERROR;
      elsif ch = '\n' or (ch = '\r' and
          index < length(byteStri) and byteStri[succ(index)] = '\n') then
        if index > 1 then
          ch := byteStri[pred(index)];
          if ch = ' ' or ch = '\t' then
            quoted &:= byteStri[startPos .. index - 2];
            if counter >= 76 then
              quoted &:= "=\n";
              counter := 1;
            end if;
            quoted &:= "=" <& ord(byteStri[pred(index)]) RADIX 16 lpad0 2;
            counter +:= 3;
            startPos := index;
          end if;
        end if;
        counter := 1;
      elsif ch >= '\127;' or ch = '=' or (ch < ' ' and ch <> '\9;') then
        quoted &:= byteStri[startPos .. pred(index)];
        if counter >= 74 then
          quoted &:= "=\n";
          counter := 1;
        end if;
        quoted &:= "=" <& ord(ch) RADIX 16 lpad0 2;
        startPos := succ(index);
        counter +:= 3;
      elsif counter >= 76 then
        quoted &:= byteStri[startPos .. pred(index)] & "=\n";
        startPos := index;
        counter := 2;
      else
        incr(counter);
      end if;
    end for;
    quoted &:= byteStri[startPos ..];
  end func;

Decode a Quoted-printable encoded string

The function fromQuotedPrintable is part of the "encoding.s7i" library.

const func string: fromQuotedPrintable (in string: quoted) is func
  result
    var string: decoded is "";
  local
    var integer: startPos is 1;
    var integer: equalSignPos is 0;
    var string: twoChars is "";
  begin
    equalSignPos := pos(quoted, "=");
    while equalSignPos <> 0 do
      decoded &:= quoted[startPos .. pred(equalSignPos)];
      if equalSignPos < length(quoted) and
          quoted[succ(equalSignPos)] = '\n' then
        startPos := equalSignPos + 2;
      elsif equalSignPos <= length(quoted) - 2 then
        twoChars := quoted[succ(equalSignPos) len 2];
        if twoChars[1] in hexdigit_char then
          decoded &:= chr(integer(twoChars, 16));
        elsif twoChars <> "\r\n" then
          raise RANGE_ERROR;
        end if;
        startPos := equalSignPos + 3;
      else
        raise RANGE_ERROR;
      end if;
      equalSignPos := pos(quoted, "=", startPos);
    end while;
    decoded &:= quoted[startPos ..];
  end func;

Uuencode a string

The function toUuencoded is part of the "encoding.s7i" library. Uuencode encodes a byte string as ASCII string. This is done by taking packs of 6-bits and translating them into a radix-64 representation. The radix-64 digits are encoded with consecutive ASCII characters starting from ' ' (which represents 0). Every line starts with a radix-64 digit character indicating the number of data bytes encoded on that line.

const func string: toUuencoded (in string: byteStri) is func
  result
    var string: uuencoded is "";
  local
    var integer: index is 1;
    var integer: subIndex is 1;
    var char: ch is ' ';
    var integer: threeBytes is 0;
    var string: fourBytes is "    ";
    var integer: posToAddNewline is 43;
  begin
    if length(byteStri) <> 0 then
      if length(byteStri) < 45 then
        uuencoded &:= chr(32 + length(byteStri));
      else
        uuencoded &:= "M";
      end if;
      for index range 1 to length(byteStri) step 3 do
        threeBytes := 0;
        for subIndex range index to index + 2 do
          threeBytes <<:= 8;
          if subIndex <= length(byteStri) then
            ch := byteStri[subIndex];
            if ch >= '\256;' then
              raise RANGE_ERROR;
            end if;
            threeBytes +:= ord(ch);
          end if;
        end for;
        fourBytes @:= [1] chr(32 + (threeBytes >> 18));
        fourBytes @:= [2] chr(32 + (threeBytes >> 12) mod 64);
        fourBytes @:= [3] chr(32 + (threeBytes >>  6) mod 64);
        fourBytes @:= [4] chr(32 +  threeBytes        mod 64);
        uuencoded &:= fourBytes;
        if index = posToAddNewline and length(byteStri) > index + 2 then
          if length(byteStri) - index - 2 < 45 then
            uuencoded &:= "\n" <& chr(32 + length(byteStri) - index - 2);
          else
            uuencoded &:= "\nM";
          end if;
          posToAddNewline +:= 45;
        end if;
      end for;
      uuencoded &:= "\n";
    end if;
    uuencoded &:= "`\n";
  end func;

Decode an uuencoded string

The function fromUuencoded is part of the "encoding.s7i" library.

const func string: fromUuencoded (in string: uuencoded) is func
  result
    var string: decoded is "";
  local
    var integer: lineLength is 1;
    var integer: index is 1;
    var integer: subIndex is 1;
    var integer: number is 0;
    var integer: fourBytes is 0;
    var string: threeBytes is "   ";
  begin
    lineLength := ord(uuencoded[1]) - 32;
    while lineLength <> 0 and lineLength <> 64 do
      incr(index);
      while lineLength >= 1 do
        fourBytes := 0;
        for subIndex range index to index + 3 do
          number := ord(uuencoded[subIndex]) - 32;
          if number = 64 then
            number := 0;
          elsif number < 0 or number > 64 then
            raise RANGE_ERROR;
          end if;
          fourBytes := (fourBytes << 6) + number;
        end for;
        threeBytes @:= [1] chr( fourBytes >> 16);
        threeBytes @:= [2] chr((fourBytes >>  8) mod 256);
        threeBytes @:= [3] chr( fourBytes        mod 256);
        decoded &:= threeBytes[ .. lineLength];
        lineLength -:= 3;
        index +:= 4;
      end while;
      while index <= length(uuencoded) and uuencoded[index] <> '\n' do
        incr(index);
      end while;
      if index < length(uuencoded) then
        incr(index);
        lineLength := ord(uuencoded[index]) - 32;
      else
        lineLength := 0;
      end if;
    end while;
  end func;

Compress a string using the Lempel Ziv Welch (LZW) compression

This algorithm compresses a byte string to a string of tokens which contains characters >= 256. To write this to a byte file it is necessary to add an algorithm which writes the tokens with 9 or more bits.

const func string: lzwCompress (in string: uncompressed) is func
  result
    var string: compressed is "";
  local
    var char: ch is ' ';
    var hash [string] char: mydict is (hash [string] char).value;
    var string: buffer is "";
    var string: xstr is "";
  begin
    for ch range chr(0) to chr(255) do
      mydict @:= [str(ch)] ch;
    end for;
    for ch range uncompressed do
      xstr := buffer & str(ch);
      if xstr in mydict then
        buffer &:= str(ch)
      else
        compressed &:= str(mydict[buffer]);
        mydict @:= [xstr] chr(length(mydict));
        buffer := str(ch);
      end if;
    end for;
    if buffer <> "" then
      compressed &:= str(mydict[buffer]);
    end if;
  end func;

Decompress a Lempel Ziv Welch (LZW) compressed string

The compressed string consists of a sequence of tokens (which contain also characters >= 256). The decompress algorithm produces a byte string.

const func string: lzwDecompress (in string: compressed) is func
  result
    var string: uncompressed is "";
  local
    var char: ch is ' ';
    var hash [char] string: mydict is (hash [char] string).value;
    var string: buffer is "";
    var string: current is "";
    var string: chain is "";
  begin
    for ch range chr(0) to chr(255) do
      mydict @:= [ch] str(ch);
    end for;
    for ch range compressed do
      if buffer = "" then
        buffer := mydict[ch];
        uncompressed &:= buffer;
      elsif ch <= chr(255) then
        current := mydict[ch];
        uncompressed &:= current;
        chain := buffer & current;
        mydict @:= [chr(length(mydict))] chain;
        buffer := current;
      else
        if ch in mydict then
          chain := mydict[ch];
        else
          chain := buffer & str(buffer[1]);
        end if;
        uncompressed &:= chain;
        mydict @:= [chr(length(mydict))] buffer & str(chain[1]);
        buffer := chain;
      end if;
    end for;
  end func;

Compress a string using the run length encoding of bzip2

A sequence of 4 to 259 identical characters is replaced by four identical characters followed by a repeat length between 0 and 255. This run length encoding is used as first compression technique in the bzip2 compression.

const func string: bzip2RleCompress (in string: uncompressed) is func
  result
    var string: compressed is "";
  local
    var integer: index is 1;
    var integer: oldpos is 1;
    var char: ch is ' ';
  begin
    while index <= length(uncompressed) - 3 do
      ch := uncompressed[index];
      if  uncompressed[succ(index)] = ch and
          uncompressed[index + 2] = ch and
          uncompressed[index + 3] = ch then
        index +:= 4;
        compressed &:= uncompressed[oldpos .. pred(index)];
        oldpos := index;
        while index <= length(uncompressed) and
            uncompressed[index] = ch do
          incr(index);
        end while;
        compressed &:= chr(index - oldpos);
        oldpos := index;
      else
        incr(index);
      end if;
    end while;
    compressed &:= uncompressed[oldpos ..];
  end func;

Decompress a string using the run length encoding of bzip2

A sequence of 4 identical characters followed by a repeat length between 0 and 255 is replaced by 4 to 259 identical characters. This run length decoding is used as last decompression technique in the bzip2 decompression.

const func string: bzip2RleDecompress (in string: compressed) is func
  result
    var string: uncompressed is "";
  local
    var integer: index is 1;
    var integer: oldpos is 1;
    var char: ch is ' ';
  begin
    while index <= length(compressed) - 3 do
      ch := compressed[index];
      if  compressed[succ(index)] = ch and
          compressed[index + 2] = ch and
          compressed[index + 3] = ch then
        index +:= 4;
        uncompressed &:= compressed[oldpos .. pred(index)] & str(ch) mult ord(compressed[index]);
        incr(index);
        oldpos := index;
      else
        incr(index);
      end if;
    end while;
    uncompressed &:= compressed[oldpos ..];
  end func;

Compress a string using the run length encoding of PackBits

const func string: packBits (in string: uncompressed) is func
  result
    var string: compressed is "";
  local
    var integer: index is 1;
    var integer: oldpos is 1;
    var char: ch is ' ';
  begin
    while index <= length(uncompressed) - 2 do
      ch := uncompressed[index];
      if  uncompressed[succ(index)] = ch and
          uncompressed[index + 2] = ch then
        while index - oldpos >= 128 do
          compressed &:= "\127;" & uncompressed[oldpos len 128];
          oldpos +:= 128;
        end while;
        if index > oldpos then
          compressed &:= str(chr(pred(index - oldpos))) & uncompressed[oldpos .. pred(index)];
          oldpos := index;
        end if;
        index +:= 3;
        while index <= length(uncompressed) and uncompressed[index] = ch do
          incr(index);
        end while;
        while index - oldpos >= 128 do
          compressed &:= "\129;" & str(ch);
          oldpos +:= 128;
        end while;
        if pred(index) > oldpos then
          compressed &:= str(chr(257 - (index - oldpos))) & str(ch);
          oldpos := index;
        end if;
      else
        incr(index);
      end if;
    end while;
    index := succ(length(uncompressed));
    while index - oldpos >= 128 do
      compressed &:= "\127;" & uncompressed[oldpos len 128];
      oldpos +:= 128;
    end while;
    if index > oldpos then
      compressed &:= str(chr(pred(index - oldpos))) & uncompressed[oldpos ..];
    end if;
  end func;

Decompress a string using the run length encoding of PackBits

const func string: unpackBits (in string: compressed) is func
  result
    var string: uncompressed is "";
  local
    var integer: index is 1;
    var integer: oldpos is 1;
    var char: ch is ' ';
  begin
    while index <= length(compressed) do
      ch := compressed[index];
      if ch <= chr(127) then
        uncompressed &:= compressed[succ(index) len succ(ord(ch))];
        index +:= ord(ch) + 2;
      else
        uncompressed &:= str(compressed[succ(index)]) mult (257 - ord(ch));
        index +:= 2;
      end if;
    end while;
  end func;

Burrows-Wheeler transform (basic concept)

This algorithm demonstrates the basic concept of the Burrows-Wheeler transform. It is not intended to be used for big data blocks.

const func string: burrowsWheelerTransformConcept (in string: stri) is func
  result
    var string: transformed is "";
  local
    var integer: length is 0;
    var integer: index is 0;
    var array string: rotations is 0 times "";
  begin
    length := succ(length(stri));
    rotations := length times "";
    for index range 1 to length do
      rotations[index] := stri[index ..] & "\256;" & stri[.. pred(index)];
    end for;
    rotations := sort(rotations);
    for index range 1 to length do
      transformed &:= str(rotations[index][length]);
    end for;
  end func;

Inverse Burrows-Wheeler transform (basic concept)

This algorithm demonstrates the basic concept of the Burrows-Wheeler transform. It is not intended to be used for big data blocks.

const func string: inverseBurrowsWheelerTransformConcept (in string: transformed) is func
  result
    var string: stri is "";
  local
    var integer: length is 0;
    var integer: count is 0;
    var integer: index is 0;
    var array string: rotations is 0 times "";
  begin
    length := length(transformed);
    rotations := length times "";
    for count range 1 to length do
      for index range 1 to length do
        rotations[index] := str(transformed[index]) & rotations[index];
      end for;
      rotations := sort(rotations);
    end for;
    stri := rotations[1];
    index := pos(stri, "\256;");
    stri := stri[succ(index) ..] & stri[.. pred(index)];
  end func;

Convert a character string to morse code

const func string: charToMorse (in char: ch) is func
  result
    var string: morseCode is "";
  begin
    case ch of
      when {'a', 'A'}: morseCode := ".-";
      when {'b', 'B'}: morseCode := "-...";
      when {'c', 'C'}: morseCode := "-.-.";
      when {'d', 'D'}: morseCode := "-..";
      when {'e', 'E'}: morseCode := ".";
      when {'f', 'F'}: morseCode := "..-.";
      when {'g', 'G'}: morseCode := "--.";
      when {'h', 'H'}: morseCode := "....";
      when {'i', 'I'}: morseCode := "..";
      when {'j', 'J'}: morseCode := ".---";
      when {'k', 'K'}: morseCode := "-.-";
      when {'l', 'L'}: morseCode := ".-..";
      when {'m', 'M'}: morseCode := "--";
      when {'n', 'N'}: morseCode := "-.";
      when {'o', 'O'}: morseCode := "---";
      when {'p', 'P'}: morseCode := ".--.";
      when {'q', 'Q'}: morseCode := "--.-";
      when {'r', 'R'}: morseCode := ".-.";
      when {'s', 'S'}: morseCode := "...";
      when {'t', 'T'}: morseCode := "-";
      when {'u', 'U'}: morseCode := "..-";
      when {'v', 'V'}: morseCode := "...-";
      when {'w', 'W'}: morseCode := ".--";
      when {'x', 'X'}: morseCode := "-..-";
      when {'y', 'Y'}: morseCode := "-.--";
      when {'z', 'Z'}: morseCode := "--..";
      when {'Ä', 'Æ'}: morseCode := ".-.-";
      when {'À', 'Å'}: morseCode := ".--.-";
      when {'Ç', 'Ĉ'}: morseCode := "-.-..";
      when {'Ð'}:      morseCode := "..--.";
      when {'È'}:      morseCode := ".-..-";
      when {'É'}:      morseCode := "..-..";
      when {'Ĝ'}:      morseCode := "--.-.";
      when {'Ĵ'}:      morseCode := ".---.";
      when {'Ñ'}:      morseCode := "--.--";
      when {'Ö', 'Ø'}: morseCode := "---.";
      when {'Ŝ'}:      morseCode := "...-.";
      when {'Ü', 'Ŭ'}: morseCode := "..--";
      when {'Þ'}:      morseCode := ".--..";
      when {'0'}:      morseCode := "-----";
      when {'1'}:      morseCode := ".----";
      when {'2'}:      morseCode := "..---";
      when {'3'}:      morseCode := "...--";
      when {'4'}:      morseCode := "....-";
      when {'5'}:      morseCode := ".....";
      when {'6'}:      morseCode := "-....";
      when {'7'}:      morseCode := "--...";
      when {'8'}:      morseCode := "---..";
      when {'9'}:      morseCode := "----.";
      when {'!'}:      morseCode := "-.-.--";
      when {'"'}:      morseCode := ".-..-.";
      when {'$'}:      morseCode := "...-..-";
      when {'''}:      morseCode := ".----.";
      when {'('}:      morseCode := "-.--.";
      when {')'}:      morseCode := "-.--.-";
      when {'+'}:      morseCode := ".-.-.";
      when {','}:      morseCode := "--..--";
      when {'-'}:      morseCode := "-....-";
      when {'.'}:      morseCode := ".-.-.-";
      when {'/'}:      morseCode := "-..-.";
      when {':'}:      morseCode := "---...";
      when {';'}:      morseCode := "-.-.-.";
      when {'='}:      morseCode := "-...-";
      when {'?'}:      morseCode := "..--..";
      when {'@'}:      morseCode := ".--.-.";
      when {' '}:      morseCode := " ";
    end case;
  end func;

const func string: stringToMorse (in string: stri) is func
  result
    var string: morseCode is "";
  local
    var char: ch is ' ';
  begin
    for ch range stri do
      morseCode &:= charToMorse(ch) & " ";
    end for;
  end func;

Convert morse code to a character string

const func char: morseToChar (in string: morseLetter) is func
  result
    var char: ch is ' ';
  begin
    if    morseLetter = "" then        ch := ' ';
    elsif morseLetter = "." then       ch := 'E';
    elsif morseLetter = "-" then       ch := 'T';
    elsif morseLetter = ".." then      ch := 'I';
    elsif morseLetter = ".-" then      ch := 'A';
    elsif morseLetter = "-." then      ch := 'N';
    elsif morseLetter = "--" then      ch := 'M';
    elsif morseLetter = "..." then     ch := 'S';
    elsif morseLetter = "..-" then     ch := 'U';
    elsif morseLetter = ".-." then     ch := 'R';
    elsif morseLetter = ".--" then     ch := 'W';
    elsif morseLetter = "-.." then     ch := 'D';
    elsif morseLetter = "-.-" then     ch := 'K';
    elsif morseLetter = "--." then     ch := 'G';
    elsif morseLetter = "---" then     ch := 'O';
    elsif morseLetter = "...." then    ch := 'H';
    elsif morseLetter = "...-" then    ch := 'V';
    elsif morseLetter = "..-." then    ch := 'F';
    elsif morseLetter = "..--" then    ch := 'Ü'; # also 'Ŭ'
    elsif morseLetter = ".-.." then    ch := 'L';
    elsif morseLetter = ".-.-" then    ch := 'Ä'; # also 'Æ'
    elsif morseLetter = ".--." then    ch := 'P';
    elsif morseLetter = ".---" then    ch := 'J';
    elsif morseLetter = "-..." then    ch := 'B';
    elsif morseLetter = "-..-" then    ch := 'X';
    elsif morseLetter = "-.-." then    ch := 'C';
    elsif morseLetter = "-.--" then    ch := 'Y';
    elsif morseLetter = "--.." then    ch := 'Z';
    elsif morseLetter = "--.-" then    ch := 'Q';
    elsif morseLetter = "---." then    ch := 'Ö'; # also 'Ø'
    elsif morseLetter = "----" then    ch := ' '; # 'ch'
    elsif morseLetter = "....." then   ch := '5';
    elsif morseLetter = "....-" then   ch := '4';
    elsif morseLetter = "...-." then   ch := 'Ŝ';
    elsif morseLetter = "...--" then   ch := '3';
    elsif morseLetter = "..-.." then   ch := 'É';
    elsif morseLetter = "..-.-" then   ch := ' '; # unused
    elsif morseLetter = "..--." then   ch := 'Ð';
    elsif morseLetter = "..---" then   ch := '2';
    elsif morseLetter = ".-..." then   ch := ' '; # unused
    elsif morseLetter = ".-..-" then   ch := 'È';
    elsif morseLetter = ".-.-." then   ch := '+';
    elsif morseLetter = ".-.--" then   ch := ' '; # unused
    elsif morseLetter = ".--.." then   ch := 'Þ';
    elsif morseLetter = ".--.-" then   ch := 'À'; # also 'Å'
    elsif morseLetter = ".---." then   ch := 'Ĵ';
    elsif morseLetter = ".----" then   ch := '1';
    elsif morseLetter = "-...." then   ch := '6';
    elsif morseLetter = "-...-" then   ch := '=';
    elsif morseLetter = "-..-." then   ch := '/';
    elsif morseLetter = "-..--" then   ch := ' '; # unused
    elsif morseLetter = "-.-.." then   ch := 'Ç'; # also 'Ĉ'
    elsif morseLetter = "-.-.-" then   ch := ' '; # Start of message
    elsif morseLetter = "-.--." then   ch := '('; # also 'Ĥ'
    elsif morseLetter = "-.---" then   ch := ' '; # unused
    elsif morseLetter = "--..." then   ch := '7';
    elsif morseLetter = "--..-" then   ch := ' '; # unused
    elsif morseLetter = "--.-." then   ch := 'Ĝ';
    elsif morseLetter = "--.--" then   ch := 'Ñ';
    elsif morseLetter = "---.." then   ch := '8';
    elsif morseLetter = "---.-" then   ch := ' '; # unused
    elsif morseLetter = "----." then   ch := '9';
    elsif morseLetter = "-----" then   ch := '0';
    elsif morseLetter = "..--.." then  ch := '?';
    elsif morseLetter = ".-..-." then  ch := '"';
    elsif morseLetter = ".-.-.-" then  ch := '.';
    elsif morseLetter = ".--.-." then  ch := '@';
    elsif morseLetter = ".----." then  ch := ''';
    elsif morseLetter = "-....-" then  ch := '-';
    elsif morseLetter = "-.-.--" then  ch := '!';
    elsif morseLetter = "-.-.-." then  ch := ';';
    elsif morseLetter = "-.--.-" then  ch := ')';
    elsif morseLetter = "--..--" then  ch := ',';
    elsif morseLetter = "---..." then  ch := ':';
    elsif morseLetter = "...-..-" then ch := '$';
    else                               ch := ' ';
    end if;
  end func;

const func string: morseToString (in string: morseCode) is func
  result
    var string: stri is "";
  local
    var array string: letters is 0 times "";
    var string: letter is "";
  begin
    letters := split(replace(morseCode, "  ", " "), ' ');
    for letter range letters do
      stri &:= str(morseToChar(letter));
    end for;
  end func;

Wildcard match used in command shells

const func boolean: wildcard_match (in string: main_stri, in string: pattern) is func
  result
    var boolean: doesMatch is FALSE;
  local
    var integer: main_length is 0;
    var integer: main_index is 1;
    var string: pattern_tail is "";
  begin
    if pattern = "" then
      doesMatch := main_stri = "";
    else
      case pattern[1] of
        when {'*'}:
          if pattern = "*" then
            doesMatch := TRUE;
          else
            main_length := length(main_stri);
            pattern_tail := pattern[2 .. ];
            while main_index <= main_length and not doesMatch do
              doesMatch := wildcard_match(main_stri[main_index .. ],
                  pattern_tail);
              incr(main_index);
            end while;
          end if;
        when {'?'}:
          if main_stri <> "" then
            doesMatch := wildcard_match(main_stri[2 .. ], pattern[2 .. ]);
          end if;
        otherwise:
          if main_stri <> "" and main_stri[1] = pattern[1] then
            doesMatch := wildcard_match(main_stri[2 .. ], pattern[2 .. ]);
          end if;
      end case;
    end if;
  end func;

String compare function where digit sequences are compared numerically

include "scanstri.s7i";

const func integer: cmpNumeric (in var string: stri1, in var string: stri2) is func
  result
    var integer: signumValue is 0;
  local
    var string: part1 is "";
    var string: part2 is "";
  begin
    while signumValue = 0 and (stri1 <> "" or stri2 <> "") do
      part1 := getDigits(stri1);
      part2 := getDigits(stri2);
      if part1 <> "" and part2 <> "" then
        signumValue := compare(part1 lpad0 length(part2), part2 lpad0 length(part1));
        if signumValue = 0 then
          signumValue := compare(length(part1), length(part2));
        end if;
      elsif part1 <> "" then
        signumValue := compare(part1, stri2);
      elsif part2 <> "" then
        signumValue := compare(stri1, part2);
      end if;
      if signumValue = 0 then
        part1 := getNonDigits(stri1);
        part2 := getNonDigits(stri2);
        if part1 <> "" and part2 <> "" then
          signumValue := compare(part1, part2);
        elsif part1 <> "" then
          signumValue := compare(part1, stri2);
        elsif part2 <> "" then
          signumValue := compare(stri1, part2);
        end if;
      end if;
    end while;
  end func;

 previous   up   next