(********************************************************************)
(*                                                                  *)
(*  scanjson.s7i  String and file scanner functions for JSON        *)
(*  Copyright (C) 2024  Thomas Mertes                               *)
(*                                                                  *)
(*  This file is part of the Seed7 Runtime Library.                 *)
(*                                                                  *)
(*  The Seed7 Runtime Library is free software; you can             *)
(*  redistribute it and/or modify it under the terms of the GNU     *)
(*  Lesser General Public License as published by the Free Software *)
(*  Foundation; either version 2.1 of the License, or (at your      *)
(*  option) any later version.                                      *)
(*                                                                  *)
(*  The Seed7 Runtime Library is distributed in the hope that it    *)
(*  will be useful, but WITHOUT ANY WARRANTY; without even the      *)
(*  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR *)
(*  PURPOSE.  See the GNU Lesser General Public License for more    *)
(*  details.                                                        *)
(*                                                                  *)
(*  You should have received a copy of the GNU Lesser General       *)
(*  Public License along with this program; if not, write to the    *)
(*  Free Software Foundation, Inc., 51 Franklin Street,             *)
(*  Fifth Floor, Boston, MA  02110-1301, USA.                       *)
(*                                                                  *)
(********************************************************************)


include "chartype.s7i";


(**
 *  Reads a JSON string from a [[string]].
 *  When the function is called it is assumed that stri[1] contains
 *  the introducing " . When the function is left ''stri'' is empty or
 *  stri[1] contains the character after the closing " .
 *   stri := "\"\"";         getJsonString(stri)  returns  "\"\""      and stri = ""
 *   stri := "\"x\"&ab";     getJsonString(stri)  returns  "\"x\""     and stri = "&ab"
 *   stri := "\"\\\\\"+";    getJsonString(stri)  returns  "\"\\\""    and stri = "+"
 *   stri := "\"\\\"\"/";    getJsonString(stri)  returns  "\"\"\""    and stri = "/"
 *   stri := "\"\\/\"*";     getJsonString(stri)  returns  "\"/\""     and stri = "*"
 *   stri := "\"\\b\\f\"";   getJsonString(stri)  returns  "\"\b\f\""  and stri = ""
 *   stri := "\"\\n\\r\"";   getJsonString(stri)  returns  "\"\n\r\""  and stri = ""
 *   stri := "\"\\t$\"-";    getJsonString(stri)  returns  "\"\t$\""   and stri = "-"
 *   stri := "\"\\u20ac\"";  getJsonString(stri)  returns  "\"€\""     and stri = ""
 *  @return the JSON string including the introducing " and the
 *          closing " .
 *)
const func string: getJsonString (inout string: stri) is func
  result
    var string: symbol is "\"";
  local
    const set of char: special_stri_char is {'"', '\\'} | control_char;
    var integer: leng is 0;
    var integer: startPos is 2;
    var integer: pos is 2;
    var integer: charValue is 0;
  begin
    leng := length(stri);
    repeat
      startPos := pos;
      while pos <= leng and stri[pos] not in special_stri_char do
        incr(pos);
      end while;
      symbol &:= stri[startPos .. pred(pos)];
      if pos > leng or stri[pos] in control_char then
        raise RANGE_ERROR;
      elsif stri[pos] = '\\' then
        incr(pos);
        if pos <= leng then
          case stri[pos] of
            when {'"', '\\', '/'}:
              symbol &:= stri[pos];
              incr(pos);
            when {'b'}:
              symbol &:= "\b";
              incr(pos);
            when {'f'}:
              symbol &:= "\f";
              incr(pos);
            when {'n'}:
              symbol &:= "\n";
              incr(pos);
            when {'r'}:
              symbol &:= "\r";
              incr(pos);
            when {'t'}:
              symbol &:= "\t";
              incr(pos);
            when {'u'}:
              if pos + 4 <= leng then
                symbol &:= char(integer(stri[succ(pos) fixLen 4], 16));
                pos +:= 5;
              else
                raise RANGE_ERROR;
              end if;
            otherwise:
              raise RANGE_ERROR;
          end case;
        else
          raise RANGE_ERROR;
        end if;
      end if;
    until pos <= leng and stri[pos] = '\"';
    symbol &:= '"';
    stri := stri[succ(pos) ..];
  end func;


(**
 *  Reads a JSON string from a [[file]].
 *  When the function is called it is assumed that the introducing "
 *  is in inFile.bufferChar. When the function is left the character
 *  after the closing " is in inFile.bufferChar.
 *   f := initScan("\"\"");        getJsonString(f) returns "\"\""     and f.bufferChar = EOF
 *   f := initScan("\"x\"&ab");    getJsonString(f) returns "\"x\""    and f.bufferChar = '&'
 *   f := initScan("\"\\\\\"+");   getJsonString(f) returns "\"\\\""   and f.bufferChar = '+'
 *   f := initScan("\"\\\"\"/");   getJsonString(f) returns "\"\"\""   and f.bufferChar = '/'
 *   f := initScan("\"\\/\"*");    getJsonString(f) returns "\"/\""    and f.bufferChar = '*'
 *   f := initScan("\"\\b\\f\"");  getJsonString(f) returns "\"\b\f\"" and f.bufferChar = EOF
 *   f := initScan("\"\\n\\r\"");  getJsonString(f) returns "\"\n\r\"" and f.bufferChar = EOF
 *   f := initScan("\"\\t$\"-");   getJsonString(f) returns "\"\t$\""  and f.bufferChar = '-'
 *   f := initScan("\"\\u20ac\""); getJsonString(f) returns "\"€\""    and f.bufferChar = EOF
 *  @return the JSON string including the introducing " and the
 *          closing " .
 *)
const func string: getJsonString (inout file: inFile) is func
  result
    var string: symbol is "\"";
  local
    const set of char: special_stri_char is {'"', '\\'} | control_char | {EOF};
    var char: character is ' ';
    var string: fourBytes is "";
  begin
    character := getc(inFile);
    repeat
      while character not in special_stri_char do
        symbol &:= character;
        character := getc(inFile);
      end while;
      if character in control_char or character = EOF then
        raise RANGE_ERROR;
      elsif character = '\\' then
        character := getc(inFile);
        case character of
          when {'"', '\\', '/'}:
            symbol &:= character;
            character := getc(inFile);
          when {'b'}:
            symbol &:= "\b";
            character := getc(inFile);
          when {'f'}:
            symbol &:= "\f";
            character := getc(inFile);
          when {'n'}:
            symbol &:= "\n";
            character := getc(inFile);
          when {'r'}:
            symbol &:= "\r";
            character := getc(inFile);
          when {'t'}:
            symbol &:= "\t";
            character := getc(inFile);
          when {'u'}:
            fourBytes := gets(inFile, 4);
            if length(fourBytes) = 4 then
              symbol &:= char(integer(fourBytes, 16));
              character := getc(inFile);
            else
              raise RANGE_ERROR;
            end if;
          otherwise:
            raise RANGE_ERROR;
        end case;
      end if;
    until character = '\"';
    symbol &:= '"';
    inFile.bufferChar := getc(inFile);
  end func;


(**
 *  Reads a JSON number from a [[string]].
 *  When the function is called it is assumed that stri[1] contains
 *  the introducing digit or an introducing minus sign (-). When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the number.
 *   stri := "12";      getJsonNumber(stri)  returns  "12"      and stri = ""
 *   stri := "123*2";   getJsonNumber(stri)  returns  "123"     and stri = "*2"
 *   stri := "-2+3";    getJsonNumber(stri)  returns  "-2"      and stri = "+3"
 *   stri := "-01";     getJsonNumber(stri)  returns  "-0"      and stri = "1"
 *   stri := "1x";      getJsonNumber(stri)  returns  "1"       and stri = "x"
 *   stri := "1.0+";    getJsonNumber(stri)  returns  "1.0"     and stri = "+"
 *   stri := "1.0E1-";  getJsonNumber(stri)  returns  "1.0E1"   and stri = "-"
 *   stri := "1.0e-1";  getJsonNumber(stri)  returns  "1.0e-1"  and stri = ""
 *   stri := "1e2y";    getJsonNumber(stri)  returns  "1e2"     and stri = "y"
 *   stri := "1E+3z";   getJsonNumber(stri)  returns  "1E+3"    and stri = "z"
 *   stri := "1234_/";  getJsonNumber(stri)  returns  "1234"    and stri = "_/"
 *  @return the JSON number.
 *)
const func string: getJsonNumber (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    if pos <= leng and stri[pos] = '-' then
      incr(pos);
    end if;
    if pos > leng or stri[pos] not in digit_char then
      raise RANGE_ERROR;
    else
      if stri[pos] = '0' then
        incr(pos);
      else
        repeat
          incr(pos);
        until pos > leng or stri[pos] not in digit_char;
      end if;
      if pos <= leng and stri[pos] = '.' then
        incr(pos);
        if pos > leng or stri[pos] not in digit_char then
          raise RANGE_ERROR;
        else
          repeat
            incr(pos);
          until pos > leng or stri[pos] not in digit_char;
        end if;
      end if;
      if pos <= leng and (stri[pos] = 'E' or stri[pos] = 'e') then
        incr(pos);
        if pos <= leng and (stri[pos] = '+' or stri[pos] = '-') then
          incr(pos);
        end if;
        if pos > leng or stri[pos] not in digit_char then
          raise RANGE_ERROR;
        else
          repeat
            incr(pos);
          until pos > leng or stri[pos] not in digit_char;
        end if;
      end if;
    end if;
    symbol := stri[.. pred(pos)];
    stri := stri[pos ..];
  end func;


(**
 *  Reads a JSON number from a [[file]].
 *  When the function is called it is assumed that inFile.bufferChar
 *  contains the introducing digit or an introducing minus sign (-).
 *  When the function is left the character after the number is in
 *  inFile.bufferChar.
 *   f := initScan("12");     getJsonNumber(f) returns "12"     and f.bufferChar = EOF
 *   f := initScan("123*2");  getJsonNumber(f) returns "123"    and f.bufferChar = '*'
 *   f := initScan("-2+3");   getJsonNumber(f) returns "-2"     and f.bufferChar = '+'
 *   f := initScan("-01");    getJsonNumber(f) returns "-0"     and f.bufferChar = '1'
 *   f := initScan("1x");     getJsonNumber(f) returns "1"      and f.bufferChar = 'x'
 *   f := initScan("1.0+");   getJsonNumber(f) returns "1.0"    and f.bufferChar = '+'
 *   f := initScan("1.0E1-"); getJsonNumber(f) returns "1.0E1"  and f.bufferChar = '-'
 *   f := initScan("1.0e-1"); getJsonNumber(f) returns "1.0e-1" and f.bufferChar = EOF
 *   f := initScan("1e2y");   getJsonNumber(f) returns "1e2"    and f.bufferChar = 'y'
 *   f := initScan("1E+3z");  getJsonNumber(f) returns "1E+3"   and f.bufferChar = 'z'
 *   f := initScan("1234_/"); getJsonNumber(f) returns "1234"   and f.bufferChar = '_'
 *  @return the JSON number.
 *)
const func string: getJsonNumber (inout file: inFile) is func
  result
    var string: symbol is "";
  local
    var char: character is ' ';
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    character := inFile.bufferChar;
    if character = '-' then
      symbol := "-";
      character := getc(inFile);
    end if;
    if character not in digit_char then
      raise RANGE_ERROR;
    else
      if character = '0' then
        symbol &:= "0";
        character := getc(inFile);
      else
        repeat
          symbol &:= character;
          character := getc(inFile);
        until character not in digit_char;
      end if;
      if character = '.' then
        symbol &:= character;
        character := getc(inFile);
        if character not in digit_char then
          raise RANGE_ERROR;
        else
          repeat
            symbol &:= character;
            character := getc(inFile);
          until character not in digit_char;
        end if;
      end if;
      if character = 'E' or character = 'e' then
        symbol &:= character;
        character := getc(inFile);
        if character = '+' or character = '-' then
          symbol &:= character;
          character := getc(inFile);
        end if;
        if character not in digit_char then
          raise RANGE_ERROR;
        else
          repeat
            symbol &:= character;
            character := getc(inFile);
          until character not in digit_char;
        end if;
      end if;
    end if;
    inFile.bufferChar := character;
  end func;


(**
 *  Reads a JSON symbol from a [[string]].
 *  Before reading the symbol it skips whitespace characters. A symbol
 *  can be a literal (number or string), a name, a special character
 *  (comma (,) or colon (:)) or a parenthesis. When the function is
 *  called it is assumed that ''stri'' is empty or stri[1] contains a
 *  whitespace character or the first character of a symbol. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the symbol.
 *   stri := " null ";  getJsonSymbol(stri)  returns  "null"   and stri = " "
 *   stri := "\ntrue";  getJsonSymbol(stri)  returns  "true"   and stri = ""
 *   stri := ",-9";     getJsonSymbol(stri)  returns  ","      and stri = "-9"
 *   stri := ":true";   getJsonSymbol(stri)  returns  ":"      and stri = "true"
 *   stri := "\r{ } ";  getJsonSymbol(stri)  returns  "{"      and stri = " } "
 *   stri := " [123]";  getJsonSymbol(stri)  returns  "["      and stri = "123]"
 *   stri := "\t987 ";  getJsonSymbol(stri)  returns  "987"    and stri = " "
 *   stri := " -123,";  getJsonSymbol(stri)  returns  "-123"   and stri = ","
 *   stri := " \"x\"";  getJsonSymbol(stri)  returns  "\"x\""  and stri = ""
 *  @return the symbol, and
 *          "" if end of ''stri'' was reached.
 *)
const func string: getJsonSymbol (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: start is 1;
    var integer: pos is 0;
  begin
    leng := length(stri);
    while start <= leng and stri[start] in white_space_char do
      incr(start);
    end while;
    if start <= leng then
      pos := start;
      case stri[pos] of
        when letter_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] not in name_char;
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
        when {',', ':'} | paren_char:
          symbol := stri[pos fixLen 1];
          stri := stri[succ(pos) ..];
        when digit_char | {'-'}:
          stri := stri[pos ..];
          symbol := getJsonNumber(stri);
        when double_quotation_char:
          stri := stri[pos ..];
          symbol := getJsonString(stri);
        otherwise:
          raise RANGE_ERROR;
      end case;
    else
      stri := "";
    end if;
  end func;


(**
 *  Reads a JSON symbol from a [[file]].
 *  Before reading the symbol it skips whitespace characters. A symbol
 *  can be a literal (number or string), a name, a special character
 *  (comma (,) or colon (:)) or a parenthesis. When the function is
 *  called it is assumed that inFile.bufferChar contains a whitespace
 *  character or the first character of a symbol. When the function is
 *  left the character after the symbol is in inFile.bufferChar.
 *   f := initScan(" null "); getJsonSymbol(f) returns "null"  and f.bufferChar = ' '
 *   f := initScan("\ntrue"); getJsonSymbol(f) returns "true"  and f.bufferChar = EOF
 *   f := initScan(",-9");    getJsonSymbol(f) returns ","     and f.bufferChar = '-'
 *   f := initScan(":true");  getJsonSymbol(f) returns ":"     and f.bufferChar = 't'
 *   f := initScan("\r{ } "); getJsonSymbol(f) returns "{"     and f.bufferChar = ' '
 *   f := initScan(" [123]"); getJsonSymbol(f) returns "["     and f.bufferChar = '1'
 *   f := initScan("\t987 "); getJsonSymbol(f) returns "987"   and f.bufferChar = ' '
 *   f := initScan(" -123,"); getJsonSymbol(f) returns "-123"  and f.bufferChar = ','
 *   f := initScan(" \"x\""); getJsonSymbol(f) returns "\"x\"" and f.bufferChar = EOF
 *  @return the symbol, and
 *          "" if [[char#EOF|EOF]] was reached.
 *)
const func string: getJsonSymbol (inout file: inFile) is func
  result
    var string: symbol is "";
  begin
    while inFile.bufferChar in white_space_char do
      inFile.bufferChar := getc(inFile);
    end while;
    case inFile.bufferChar of
      when letter_char:
        symbol := str(inFile.bufferChar);
        inFile.bufferChar := getc(inFile);
        while inFile.bufferChar in name_char do
          symbol &:= inFile.bufferChar;
          inFile.bufferChar := getc(inFile);
        end while;
      when {',', ':'} | paren_char:
        symbol := str(inFile.bufferChar);
        inFile.bufferChar := getc(inFile);
      when digit_char | {'-'}:
        symbol := getJsonNumber(inFile);
      when double_quotation_char:
        symbol := getJsonString(inFile);
      when {EOF}:
        symbol := "";
      otherwise:
        raise RANGE_ERROR;
    end case;
  end func;