
/* YACC.Y: Yacc grammar for Yacc main program. 2-17-91, 4-30-91 AG
   To bootstrap Yacc, use Yacc iself to compile this grammar, then
   run tpc on the generated program.

   Note:

   This is not entirely the `official' syntax introduced by Johnson, but it
   should be compatible with UNIX Yacc (except for the differences specified
   in the program header, below), as described in the UNIX manual, including
   the language elements entitled as "old features supported but not
   encouraged."

   Bugs:

   - Processes $$'s, $i's, %} and } inside of comments in Turbo Pascal code
     (instead of ignoring them).

   Shift/reduce conflicts:

   This grammar will produce a number of shift/reduce conflicts caused by
   the error productions, since it does not specify unambigiously whether
   errors are to be handled in global structures (definitions and rules)
   or by enclosed syntactic constructs (e.g. symbols). Yacc will resolve
   these conflicts in favour of shift, which is o.k. (it means that
   errors will be caught in the innermost constructs with error handling,
   thus reducing the amount of skipped symbols in resynchronization).

   Error handling is done using the general method of Schreiner/Friedman
   (see Schreiner/Friedman, "Introduction to compiler construction with
   UNIX," 1985).

*/

%{

{$X+}
program Yacc;

(* Copyright (c) 1990,91 by Albert Graef, Schillerstr. 18,
   6509 Schornsheim/Germany
   All rights reserved *)

uses YaccLib, YaccBase, YaccMsgs, YaccSem, YaccTables, YaccParseTable;

(* TP Yacc - Yet Another Compiler Compiler for MS-DOS/Turbo Pascal

   Version 3.0 as of April 91
   Version 3.0a as of May 92 (bug fixes in precedence and type information
     updates)

   Author

   Albert Graef
   Schillerstr. 18
   6509 Schornsheim/Germany

   Graef@DMZRZU71.bitnet

   Synopsis   YACC [options] yacc-file[.Y] [output-file[.PAS]]

   Options

   /v  "Verbose:" Yacc generates a readable description of the generated
       parser, written to yacc-file with new extension .LST.

   /d  "Debug:" Yacc generates parser with debugging output.

   Description

   This is a reimplementation of the popular UNIX compiler generator
   Yacc for MS-DOS and Turbo Pascal.

   Differences from UNIX Yacc:

   - Produces output code for Turbo Pascal, rather than for C.

   - Does not support %union definitions. Instead, a value type is declared
     by specifying the type identifier *itself* as the tag of a %token
     or %type definition. Yacc will automatically generate an appropriate
     yylval variable of a variant record type (YYSType) which is capable of
     holding values of any of the types used in %token and %type.

     Type checking is *very* strict. If you use type definitions, then
     any symbol referred to in an action *must* have a type introduced
     in a type definition. Either the symbol must have been assigned a
     type in the definitions section, or the $<type-identifier> notation
     must be used. The syntax of the %type definition has been changed
     slightly to allow definitions of the form
       %type <type-identifier>
     (omitting the nonterminals) which may be used to declare types which
     are not assigned to any grammar symbol, but are used with the
     $<...> construct.

   - The parse tables constructed by this Yacc version are slightly greater
     than those constructed by UNIX Yacc, since a reduce action will only be
     chosen as the default action if it is the *only* action in the state.
     In difference, UNIX Yacc chooses a reduce action as the default action
     whenever it is the only *reduce* action of the state (even if there are
     other shift actions).

     This solves a bug in UNIX Yacc that makes the generated parser start
     error recovery too late with certain types of error productions (see
     also Schreiner/Friedman, "Introduction to compiler construction with
     UNIX," 1985). Also, errors will be caught sooner in most cases where
     standard Yacc would carry out an additional (default) reduction before
     detecting the error.

*)

%}

/* Lexical part of the Yacc language: */

%token
  ID		/* identifiers: {letter}{letter_or_digit}* */
  C_ID		/* identifier which forms left side of rule, i.e. is
		   followed by a colon */
  LITERAL       /* single character literal */
  LITID         /* multiple character literal */
  NUMBER	/* nonnegative integers: {digit}+ */
  PTOKEN PLEFT PRIGHT PNONASSOC PTYPE PSTART PPREC
  		/* reserved words: PTOKEN=%token, etc. */
  PP		/* source sections separator %% */
  LCURL		/* curly braces: %{ and %} */
  RCURL
  ',' ':' ';' '|' '{' '}' '<' '>' '='
		/* literals */
  ILLEGAL	/* illegal input character */

%start grammar

%%

/* Lexical entities, those that may give rise to syntax errors are augmented
   with error productions, and important symbols call yyerrok. */

id		: ID
c_id		: C_ID
literal         : LITERAL
litid           : LITID
number		: NUMBER
ptoken		: PTOKEN        { yyerrok; }
pleft		: PLEFT	        { yyerrok; }
pright		: PRIGHT        { yyerrok; }
pnonassoc	: PNONASSOC	{ yyerrok; }
ptype		: PTYPE	        { yyerrok; }
pstart		: PSTART        { yyerrok; }
pprec		: PPREC
pp		: PP	        { yyerrok; }
lcurl		: LCURL
rcurl		: RCURL
		| error	        { error(rcurl_expected); }
comma		: ','
colon		: ':'	        { yyerrok; }
semicolon	: ';'	        { yyerrok; }
bar		: '|'	        { yyerrok; }
lbrace		: '{'
rbrace		: '}'
		| error	        { error(rbrace_expected); }
langle		: '<'
rangle		: '>'
		| error         { error(rangle_expected); }
eq		: '='

/* Syntax and semantic routines: */

grammar		: defs pp
		  		{ sort_types;
                                  definitions;
                                  next_section; }
		  rules
		  		{ next_section;
                                  generate_parser;
                                  next_section; }
		  aux_procs
		;

aux_procs	: /* empty: aux_procs is optional */

		| pp { copy_rest_of_file; }

		;


defs		: /* empty */
		| defs def	{ yyerrok; }
		| defs error	{ error(error_in_def); }
		;

def		: pstart id
			 	{ startnt := ntsym($2); }
		| pstart error
				{ error(ident_expected); }
		| lcurl { copy_code; } rcurl

		| ptoken
				{ act_prec := 0; }
		  tag token_list

		| pleft
				{ act_prec := new_prec_level(left); }
		  tag token_list

		| pright
				{ act_prec := new_prec_level(right); }
		  tag token_list

		| pnonassoc
				{ act_prec := new_prec_level(nonassoc); }
		  tag token_list

		| ptype tag nonterm_list

                | ptype tag

		;

tag		: /* empty: type tag is optional */
				{ act_type := 0; }
		| langle id rangle
				{ act_type := $2; add_type($2); }
		;

token_list	: token_num

		| token_list token_num
				{ yyerrok; }
		| token_list comma token_num
				{ yyerrok; }
		| error
				{ error(ident_expected); }
		| token_list error
				{ error(error_in_def); }
		| token_list comma error
				{ error(ident_expected); }
		;

token_num	: literal
				{ if act_type<>0 then
                                    sym_type^[$1] := act_type;
                                  if act_prec<>0 then
                                    sym_prec^[$1] := act_prec; }
               	| litid
				{ litsym($1, 0);
                                  if act_type<>0 then
                                    sym_type^[litsym($1, 0)] := act_type;
                                  if act_prec<>0 then
                                    sym_prec^[litsym($1, 0)] := act_prec; }
               	| id
				{ litsym($1, 0);
                                  if act_type<>0 then
                                    sym_type^[litsym($1, 0)] := act_type;
                                  if act_prec<>0 then
                                    sym_prec^[litsym($1, 0)] := act_prec; }
               	| litid number
				{ litsym($1, 0);
                                  if act_type<>0 then
                                    sym_type^[litsym($1, $2)] := act_type;
                                  if act_prec<>0 then
                                    sym_prec^[litsym($1, 0)]  := act_prec; }
               	| id number
				{ litsym($1, 0);
                                  if act_type<>0 then
                                    sym_type^[litsym($1, $2)] := act_type;
                                  if act_prec<>0 then
                                    sym_prec^[litsym($1, 0)]  := act_prec; }
		;

nonterm_list	: nonterm
		| nonterm_list nonterm
				{ yyerrok; }
		| nonterm_list comma nonterm
				{ yyerrok; }
		| error
				{ error(ident_expected); }
		| nonterm_list error
				{ error(error_in_def); }
		| nonterm_list comma error
				{ error(ident_expected); }
		;

nonterm		: id
				{ if act_type<>0 then
                                    sym_type^[ntsym($1)] := act_type; }
		;


rules		: 		{ next_section; }
		  rule1

		| lcurl { copy_code; } rcurl
				{ next_section; }
		  rule1
					/* rules section may be prefixed
					   with `local' Turbo Pascal
					   declarations */
		| rules rule
				{ yyerrok; }
		| error
				{ error(error_in_rule); }
		| rules error
				{ error(error_in_rule); }
		;

rule1		: c_id
				{ start_rule(ntsym($1)); }
		  colon
		  		{ start_body; }
		  body prec
				{ end_body; }
		;

rule		: rule1

		| bar
				{ start_body; }
		  body prec
				{ end_body; }
		;

body		: /* empty */

		| body literal
				{ add_symbol($2); yyerrok; }
		| body litid
				{ add_symbol(sym($2)); yyerrok; }
		| body id
				{ add_symbol(sym($2)); yyerrok; }
                | body action
				{ add_action; yyerrok; }
		| body error
				{ error(error_in_rule); }
		;

action		: lbrace { copy_action; } rbrace

		| eq { copy_single_action; }
                		/* old language feature; code must be
				   single statement ending with `;' */
		;

prec		: /* empty */

		| pprec literal
				{ add_rule_prec($2); }
		  opt_action

		| pprec litid
				{ add_rule_prec(litsym($2, 0)); }
		  opt_action

		| pprec id
				{ add_rule_prec(litsym($2, 0)); }
		  opt_action

		| prec semicolon

		;

opt_action	: /* empty */

		| action
				{ add_action; }
		;


%%

(* Lexical analyzer (implemented in Turbo Pascal for maximum efficiency): *)

function yylex : integer;
  function end_of_input : boolean;
    begin
      end_of_input := (cno>length(line)) and eof(yyin)
    end(*end_of_input*);
  procedure scan;
    (* scan for nonempty character, skip comments *)
    procedure scan_comment;
      var p : integer;
      begin
        p := pos('*/', copy(line, cno, length(line)));
        if p>0 then
          cno := cno+succ(p)
        else
          begin
            while (p=0) and not eof(yyin) do
              begin
                readln(yyin, line);
                inc(lno);
                p := pos('*/', line)
              end;
            if p=0 then
              begin
                cno := succ(length(line));
                error(open_comment_at_eof);
              end
            else
              cno := succ(succ(p))
          end
      end(*scan_comment*);
    begin
      while not end_of_input do
        if cno<=length(line) then
          case line[cno] of
            ' ', tab : inc(cno);
            '/' :
              if (cno<length(line)) and (line[succ(cno)]='*') then
                begin
                  inc(cno, 2);
                  scan_comment
                end
              else
                exit
            else
              exit
          end
        else
          begin
            readln(yyin, line);
            inc(lno); cno := 1;
          end
    end(*scan*);
  function scan_ident : integer;
    (* scan an identifier *)
    var
      idstr : String;
    begin
      idstr := line[cno];
      inc(cno);
      while (cno<=length(line)) and (
            ('A'<=upCase(line[cno])) and (upCase(line[cno])<='Z') or
            ('0'<=line[cno]) and (line[cno]<='9') or
            (line[cno]='_') or
            (line[cno]='.') ) do
	begin
	  idstr := idstr+line[cno];
	  inc(cno)
	end;
      yylval := get_key(idstr);
      scan;
      if not end_of_input and (line[cno]=':') then
        scan_ident := C_ID
      else
        scan_ident := ID
    end(*scan_ident*);
  function scan_literal: integer;
    (* scan a literal, i.e. string *)
    var
      idstr : String;
      oct_val : Byte;
    begin
      idstr := line[cno];
      inc(cno);
      while (cno<=length(line)) and (line[cno]<>idstr[1]) do
        if line[cno]='\' then
          if cno<length(line) then
            begin
              inc(cno);
              case line[cno] of
                'n' :
                  begin
                    idstr := idstr+nl;
                    inc(cno)
                  end;
                'r' :
                  begin
                    idstr := idstr+cr;
                    inc(cno)
                  end;
                't' :
                  begin
                    idstr := idstr+tab;
                    inc(cno)
                  end;
                'b' :
                  begin
                    idstr := idstr+bs;
                    inc(cno)
                  end;
                'f' :
                  begin
                    idstr := idstr+ff;
                    inc(cno)
                  end;
                '0'..'7' :
                  begin
                    oct_val := ord(line[cno])-ord('0');
                    inc(cno);
                    while (cno<=length(line)) and
                          ('0'<=line[cno]) and
                          (line[cno]<='7') do
                      begin
                        oct_val := oct_val*8+ord(line[cno])-ord('0');
                        inc(cno)
                      end;
                    idstr := idstr+chr(oct_val)
                  end
                else
                  begin
                    idstr := idstr+line[cno];
                    inc(cno)
                  end
              end
            end
          else
            inc(cno)
        else
          begin
            idstr := idstr+line[cno];
            inc(cno)
          end;
      if cno>length(line) then
        error(missing_string_terminator)
      else
        inc(cno);
      if length(idstr)=2 then
        begin
          yylval := ord(idstr[2]);
          scan_literal := LITERAL;
        end
      else if length(idstr)>1 then
        begin
          yylval := get_key(''''+copy(idstr, 2, pred(length(idstr)))+'''');
          scan_literal := LITID;
        end
      else
        scan_literal := ILLEGAL;
    end(*scan_literal*);
  function scan_num : integer;
    (* scan an unsigned integer *)
    var
      numstr : String;
      code : integer;
    begin
      numstr := line[cno];
      inc(cno);
      while (cno<=length(line)) and
            ('0'<=line[cno]) and (line[cno]<='9') do
        begin
          numstr := numstr+line[cno];
          inc(cno)
        end;
      val(numstr, yylval, code);
      if code=0 then
        scan_num := NUMBER
      else
        scan_num := ILLEGAL;
    end(*scan_num*);
  function scan_keyword : integer;
    (* scan %xy *)
    function lookup(key : String; var tok : integer) : boolean;
      (* table of Yacc keywords (unstropped): *)
      const
        no_of_entries = 11;
        max_entry_length = 8;
        keys : array [1..no_of_entries] of String[max_entry_length] = (
          '0', '2', 'binary', 'left', 'nonassoc', 'prec', 'right',
          'start', 'term', 'token', 'type');
        toks : array [1..no_of_entries] of integer = (
          PTOKEN, PNONASSOC, PNONASSOC, PLEFT, PNONASSOC, PPREC, PRIGHT,
          PSTART, PTOKEN, PTOKEN, PTYPE);
      var m, n, k : integer;
      begin
        (* binary search: *)
        m := 1; n := no_of_entries;
        lookup := true;
        while m<=n do
          begin
            k := m+(n-m) div 2;
            if key=keys[k] then
              begin
                tok := toks[k];
                exit
              end
            else if key>keys[k] then
              m := k+1
            else
              n := k-1
          end;
        lookup := false
      end(*lookup*);
    var
      keywstr : String;
      tok : integer;
    begin
      inc(cno);
      if cno<=length(line) then
        case line[cno] of
          '<' :
            begin
              scan_keyword := PLEFT;
              inc(cno)
            end;
          '>' :
            begin
              scan_keyword := PRIGHT;
              inc(cno)
            end;
          '=' :
            begin
              scan_keyword := PPREC;
              inc(cno)
            end;
          '%', '\' :
            begin
              scan_keyword := PP;
              inc(cno)
            end;
          '{' :
            begin
              scan_keyword := LCURL;
              inc(cno)
            end;
          '}' :
            begin
              scan_keyword := RCURL;
              inc(cno)
            end;
          'A'..'Z', 'a'..'z', '0'..'9' :
            begin
              keywstr := line[cno];
              inc(cno);
              while (cno<=length(line)) and (
                    ('A'<=upCase(line[cno])) and (upCase(line[cno])<='Z') or
                    ('0'<=line[cno]) and (line[cno]<='Z') ) do
                begin
                  keywstr := keywstr+line[cno];
                  inc(cno)
                end;
              if lookup(keywstr, tok) then
                scan_keyword := tok
              else
                scan_keyword := ILLEGAL
            end;
          else scan_keyword := ILLEGAL
        end
      else
        scan_keyword := ILLEGAL;
    end(*scan_keyword*);
  function scan_char : integer;
    (* scan any single character *)
    begin
      scan_char := ord(line[cno]);
      inc(cno)
    end(*scan_char*);
  var lno0, cno0 : integer;
  begin
    tokleng := 0;
    scan;
    lno0 := lno; cno0 := cno;
    if end_of_input then
      yylex := 0
    else
      case line[cno] of
        'A'..'Z', 'a'..'z', '_' : yylex := scan_ident;
	'''', '"' : yylex := scan_literal;
	'0'..'9' : yylex := scan_num;
	'%', '\' : yylex := scan_keyword;
        '=' :
          if (cno<length(line)) and (line[succ(cno)]='{') then
            begin
              inc(cno);
              yylex := scan_char
            end
          else
            yylex := scan_char;
	else yylex := scan_char;
      end;
    if lno=lno0 then
      tokleng := cno-cno0
  end(*yylex*);

(* Main program: *)

var i : Integer;

begin

  (* sign-on: *)

  writeln(sign_on);

  (* parse command line: *)

  if paramCount=0 then
    begin
      writeln(usage);
      writeln(options);
      halt(0);
    end;

  yfilename := '';
  pasfilename := '';

  for i := 1 to paramCount do
    if copy(paramStr(i), 1, 1)='/' then
      if upper(paramStr(i))='/V' then
        verbose := true
      else if upper(paramStr(i))='/D' then
        debug := true
      else
        begin
          writeln(invalid_option, paramStr(i));
          halt(1);
        end
    else if yfilename='' then
      yfilename := addExt(upper(paramStr(i)), 'Y')
    else if pasfilename='' then
      pasfilename := addExt(upper(paramStr(i)), 'PAS')
    else
      begin
        writeln(illegal_no_args);
        halt(1);
      end;

  if yfilename='' then
    begin
      writeln(illegal_no_args);
      halt(1);
    end;

  if pasfilename='' then pasfilename := root(yfilename)+'.PAS';
  lstfilename := root(yfilename)+'.LST';

  (* open files: *)

  assign(yyin, yfilename);
  assign(yyout, pasfilename);
  assign(yylst, lstfilename);

  reset(yyin);    if ioresult<>0 then fatal(cannot_open_file+yfilename);
  rewrite(yyout); if ioresult<>0 then fatal(cannot_open_file+pasfilename);
  rewrite(yylst); if ioresult<>0 then fatal(cannot_open_file+lstfilename);

  (* search code template in current directory, then on path where Yacc
     was executed from: *)
  codfilename := 'YYPARSE.COD';
  assign(yycod, codfilename);
  reset(yycod);
  if ioresult<>0 then
    begin
      codfilename := upper(path(paramStr(0)))+'YYPARSE.COD';
      assign(yycod, codfilename);
      reset(yycod);
      if ioresult<>0 then fatal(cannot_open_file+codfilename);
    end;

  (* parse source grammar: *)

  write('parse ... ');

  lno := 0; cno := 1; line := '';

  next_section;
  if debug then writeln(yyout, '{$define yydebug}');

  if yyparse=0 then
    { done }
  else if yychar=0 then
    error(unexpected_eof)
  else
    error(syntax_error);

  if errors=0 then writeln('DONE');

  (* close files: *)

  close(yyin); close(yyout); close(yylst); close(yycod);

  (* print statistics: *)

  if errors>0 then
    writeln( lno, ' lines, ',
             errors, ' errors found.' )
  else
    begin
      writeln( lno, ' lines, ',
               n_rules-1, '/', max_rules-1, ' rules, ',
               n_states, '/', max_states, ' s, ',
               n_items, '/', max_items, ' i, ',
               n_trans, '/', max_trans, ' t, ',
               n_redns, '/', max_redns, ' r.');
      if shift_reduce>0 then
        writeln(shift_reduce, ' shift/reduce conflicts.');
      if reduce_reduce>0 then
        writeln(reduce_reduce, ' reduce/reduce conflicts.');
      if never_reduced>0 then
        writeln(never_reduced, ' rules never reduced.');
    end;

  if warnings>0 then writeln(warnings, ' warnings.');

  writeln( n_bytes, '/', max_bytes, ' bytes of memory used.');

  (* terminate: *)

  if errors>0 then
    begin
      erase(yyout);
      if ioresult<>0 then ;
    end;

  if file_size(lstfilename)=0 then
    erase(yylst)
  else
    writeln('(see ', lstfilename, ' for more information)');

  halt(errors);

end(*Yacc*).
