/* * Copyright (c) 2003-2008 The University of Wroclaw. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the University may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ using System; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; using System.Text; using Nemerle.Collections; using Nemerle.Utility; using NStack = Nemerle.Collections.Stack; namespace Nemerle.Compiler { public variant Token : IEnumerable [Token] { | Identifier { name : string; } | QuotedIdentifier { name : string; } | IdentifierToComplete { prefix : string; } | Keyword { name : string; } | Operator { name : string; } | StringLiteral { value : string; rawString : string; this(val : string) { this(val, "") } } | CharLiteral { value : char; } | IntegerLiteral { lit : Literal.Integer; cast_to : Parsetree.PExpr } | FloatLiteral { value : float; } | DoubleLiteral { value : Double; } | DecimalLiteral { value : Decimal; } | Comment { value : string; } | Semicolon { generated : bool; } | Comma | BeginBrace { generated : bool; } // { | EndBrace { generated : bool; } // } | BeginRound // ( | EndRound // ) | BeginSquare // [ | EndSquare // ] | BeginQuote // <[ | EndQuote // ]> | Indent { value : string; } | WhiteSpace { value : string; } | NewLine { value : string; } | RoundGroup { Child : Token; } // ( ... ) | BracesGroup { public this(child : Token) { Child = child; } Child : Token; generated : bool; } // { ... } | SquareGroup { mutable Child : Token; } // [ ... ] | QuoteGroup { Child : Token; } // <[ ... ]> | LooseGroup { mutable Child : Token; } // ; ... ; | Namespace { Env : GlobalEnv; Body : Token; } | Using { Env : GlobalEnv; } | EndOfFile | EndOfGroup public mutable Location : Nemerle.Compiler.Location; public mutable Next : Token; public this () { } public this (loc : Location) { this.Location = loc; } public override ToString () : string { match (this) { | QuotedIdentifier (name) => "@" + name | Identifier (name) => name | IdentifierToComplete (name) => $""; | Keyword (name) => name | Operator (name : string) => name | StringLiteral (_, raw) => raw; | CharLiteral (value) => "'" + value.ToString () + "'" | IntegerLiteral (value, _) => value.ToString () | FloatLiteral (value) => value.ToString () | DoubleLiteral (value) => value.ToString () | DecimalLiteral (value) => value.ToString () | Comment (value) => "/*" + value + "*/" | EndOfFile => " /*EOF*/ " | EndOfGroup => " /*EOG*/ " | Semicolon => ";" | Comma => "," | BeginBrace => "{" | EndBrace => "}" | BeginRound => "(" | EndRound => ")" | BeginSquare => "[" | EndSquare => "]" | BeginQuote => "<[" | EndQuote => "]>" | Indent => "indentation" | RoundGroup | BracesGroup | SquareGroup | QuoteGroup | LooseGroup => PreParser.Dump (this, "") | Namespace (env, body) => "namespace " + env.CurrentNamespace.GetDisplayName () + " { " + PreParser.Dump (body, "") + " }" | Using => "using import;" | WhiteSpace(name) => name | NewLine => "\\r\\n"; } } public GetEnumerator () : IEnumerator [Token] { match (this) { | RoundGroup (child) | BracesGroup (child, _) | SquareGroup (child) | QuoteGroup (child) | LooseGroup (child) | Namespace (_, child) => def loop (current) { when (current != null) { yield current; loop (current.Next) } } loop (child) | _ => throw System.ArgumentException ("this is not a group token") } } public ToString (describe : bool) : string { if (describe) match (this) { | Keyword (x) => "keyword `" + x + "'" | QuotedIdentifier (x) => "quoted identifier `" + x + "'" | Identifier (x) => "identifier `" + x + "'" | IdentifierToComplete (x) => "identifier to complete `" + x + "'" | Operator (x) => "operator `" + x + "'" | StringLiteral => "string literal" | IntegerLiteral => "integer number literal" | FloatLiteral => "float literal" | DoubleLiteral => "double literal" | DecimalLiteral => "decimal literal" | CharLiteral => "character literal" | EndOfFile => "end of file" | EndOfGroup => "separator or closing bracket" | Comment => "documentation comment" | Comma | BeginRound | EndRound | BeginSquare | EndSquare | BeginQuote | EndQuote => "operator `" + ToString () + "'" | BeginBrace (generated) => if (generated) "group begin" else "operator `" + ToString () + "'" | EndBrace (generated) => if (generated) "group end" else "operator `" + ToString () + "'" | Semicolon (generated) => if (generated) "end of statement" else "operator `" + ToString () + "'" | Indent (value) => $"indentation `$value'" | WhiteSpace => "" | NewLine(value) => $"newline char: '$value'"; | BracesGroup => "`{...}' group" | RoundGroup => "`(...)' group" | SquareGroup => "`[...]' group" | QuoteGroup => "`<[...]>' group" | LooseGroup (body) => if (body == null) "token group" else body.ToString (true) | Namespace => "namespace scoping" | Using => "using declaration" } else ToString () } } public class Region { public this(location : Location, text : string) { this.location = location; this.text = if (text == null) string.Empty else text; } [Accessor] mutable isComplete : bool; [Accessor] mutable location : Location; [Accessor] text : string; internal SetEndRegion (loc : Location) : void { when (isComplete) throw System.InvalidOperationException ("region is complete."); when (location.FileIndex != loc.FileIndex) throw System.InvalidOperationException ("invalid endregion."); isComplete = true; location = Location (location.FileIndex, location.Line, location.Column, loc.Line, loc.Column) } } [ManagerAccess] public abstract class LexerBase : IDisposable { internal enum NumberMode { | Float | Decimal | Octal | Hex | Binary } protected mutable isPendingChar : bool; // is there already some first char protected mutable pendingChar : char; protected mutable line : int; protected mutable col : int; protected mutable file_idx : int; protected mutable _linesLengths : List[int] = List(256); public LinesLengths : array[int] { get { _linesLengths.ToArray() } } public LinesCount : int { get { _linesLengths.Count - 1 } } public IsEof : bool { get; private set; } protected static opchars : array [bool]; public static BaseKeywords : Set [string]; public mutable Keywords : Set [string]; protected id_buffer : StringBuilder = StringBuilder (); #region Indentation syntax // Variables for indentation-based syntax // The location where a token should be automatically inserted protected mutable insert_location : Location; // Used by GetToken() to store that token that it postpones when insert_indent = true protected mutable tok_pending : Token; // Whether we are currently adding whitespaces to indentString protected mutable counting_indentation : bool; // Whether GetToken() should return a Token.Indent protected mutable insert_indent : bool; // String holding the current line's indent protected mutable indent_string : string; [Accessor] protected mutable in_indentation_mode : bool; public SwitchToIndentationMode () : void { in_indentation_mode = true; counting_indentation = true; indent_string = ""; } pragma_indent () : void { unless (in_indentation_mode) { SwitchToIndentationMode (); _ = read_to_the_end_of_line (); throw PragmaIndent () } } #endregion #region PREPROCESSOR VARIABLES /** if there was only white chars from beginnig of line */ protected mutable white_beginning : bool; protected mutable eating_stack : NStack[int]; // stack of states (nested #if) /** states: now processing: -4 region -3 else -1 if/elif 0 no preprocessor not processing: 1 if/elif, but some other block possibly will be processed 2 if/elif, and other blocks won't be processed (there already was processed block, or parent block isn't processed) 3 else 4 region */ protected mutable eating_now : int; mutable line_stack : int; // real line number at moment of `#line 3' occurence mutable line_start : int; // how to compute real line after `#line default' occurence protected file_real : string; // real filename to revert after `#line 4 "bla"' protected mutable defines : Map [string, bool]; mutable is_check_wrong_chars : bool; #endregion PREPROCESSOR VARIABLES [Accessor] mutable incompleteRegions : list[Region] = []; [Accessor] mutable regions : list[Region] = []; public class Error : System.Exception { public name : string; public this (name : string) { this.name = name; } } public class PragmaIndent : System.Exception { } protected this (manager : ManagerClass, fn : string) { Manager = manager; line = 1; col = 1; isPendingChar = false; white_beginning = true; defines = Manager.Options.CommandDefines; eating_stack = Stack (); eating_now = 0; line_stack = -1; file_real = fn; } abstract protected read_from_input () : char; abstract protected peek_or_white () : char; abstract protected peek () : char; public abstract Dispose () : void; public BeginParseFile() : void { IsEof = false; is_check_wrong_chars = Manager.Options.Warnings.IsEnabled (10002); when (_linesLengths != null) { _linesLengths.Clear(); _linesLengths.Add(-1); // fake line! } } protected read_or_eol () : char { def ch = read (); if (ch == '\0') '\n' else ch } protected read () : char { mutable ch = read_from_input (); // Message.Debug (Location, $"ch = '$ch'"); match (ch) { | '\n' => if (_linesLengths == null) ++line; else { _linesLengths.Add(col); ++line; when (_linesLengths.Count != line) // maybe it cause by "line" directive... assert2(_linesLengths.Count == line - line_start + line_stack); } col = 1; | '\t' => ++col; when (is_check_wrong_chars) Message.Warning (10002, this.Location, "tab character found in input stream") | '\r' => if (peek_or_white () != '\n') ch = '\n'; // \r is alone here, so we change it to newline else when (is_check_wrong_chars) Message.Warning (10002, this.Location, "CR character found in input stream"); | '\0' => unless (IsEof) { IsEof = true; when (_linesLengths != null) _linesLengths.Add(col); } | _ => ++col; }; when (in_indentation_mode) { // Start counting indentation anew after a newline if (ch == '\n') { // Message.Debug (Location, $"start"); indent_string = ""; counting_indentation = true; insert_indent = true; //Message.Debug (Location (file_idx, line, col), "countingIndentation ON"); } else when (counting_indentation) { if (ch == '\t' || ch == ' ') indent_string += ch.ToString(); // We have a non-space character, so stop counting indentation // and indicate that we have a new line to process else { // Message.Debug (Location, $"stop"); counting_indentation = false; } } } ch } public static IsIdBeginning (ch : char) : bool { Char.IsLetter (ch) || ch == '_' } public static IsOperatorChar (ch : char) : bool { def chint = ch :> int; if (chint > 255) false else opchars [chint]; } public IsKeyword (str : string) : bool { when(Keywords == null) throw InvalidOperationException("Keywords property of lexer is not initialized!"); Keywords.Contains (str) } protected clear_id_buffer () : void { id_buffer.Length = 0; } protected get_op (first_ch : char) : Token { clear_id_buffer (); _ = id_buffer.Append (first_ch); continue_get_op () } protected get_op (first_ch : char, second_ch : char) : Token { clear_id_buffer (); _ = id_buffer.Append (first_ch); _ = id_buffer.Append (second_ch); continue_get_op () } continue_get_op () : Token { mutable go = true; while (go) { if (IsOperatorChar (peek_or_white ())) { def c = read (); if (c == '/') if (comment_beginning () == '/') ignore (id_buffer.Append ('/')) else go = false else ignore (id_buffer.Append (c)) } else go = false; }; Token.Operator (id_buffer.ToString ()) } protected get_number (first_ch : char) : Token { clear_id_buffer (); mutable already_seen_type = false; // for the case 0b0 vs 0b mutable mode = match (first_ch) { | '.' => NumberMode.Float | '0' => match (peek_or_white ()) { | 'x' | 'X' => ignore (read ()); NumberMode.Hex | 'o' | 'O' => ignore (read ()); NumberMode.Octal | 'b' | 'B' => ignore (read ()); unless (Char.IsDigit (peek_or_white ())) already_seen_type = true; NumberMode.Binary | '0' => Message.Warning (this.Location, "leading zeros look like" " octal modifiers, but they are not"); NumberMode.Decimal | _ => NumberMode.Decimal } | _ => NumberMode.Decimal }; mutable last_was_digit = true; _ = id_buffer.Append (first_ch); // read digits and . between them if it is present def loop () { match (peek_or_white ()) { | '.' => when (mode == NumberMode.Decimal) { mode = NumberMode.Float; ignore (read ()); if (Char.IsDigit (peek_or_white ())) { ignore (id_buffer.Append ('.')); loop () } else { isPendingChar = true; pendingChar = '.'; last_was_digit = false; mode = NumberMode.Decimal; } } | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D' | 'e' | 'E' | 'f' | 'F' => when (mode == NumberMode.Hex) { ignore (id_buffer.Append (read ())); loop () } | '_' => _ = read (); def pw = char.ToLower (peek_or_white ()); if (char.IsDigit (pw)) loop () else if (mode == NumberMode.Hex && pw >= 'a' && pw <= 'f') loop () else { isPendingChar = true; pendingChar = '_'; } | x when Char.IsDigit (x) => ignore (id_buffer.Append (read ())); loop () | _ => () } }; loop (); when ((mode == NumberMode.Hex || mode == NumberMode.Octal) && id_buffer.Length <= 1) Message.Error (this.Location, "expected value letters in numeric literal"); def exponent_part (only_realsuf) { when (!only_realsuf) { match (peek_or_white ()) { | 'E' | 'e' => ignore (id_buffer.Append (read ())); match (peek_or_white ()) { | '+' | '-' => ignore (id_buffer.Append (read ())); | _ => () }; if (Char.IsDigit (peek_or_white ())) do { ignore (id_buffer.Append (read ())); } while (Char.IsDigit (peek_or_white ())) else throw Error ("no digits after exponent sign in float literal") | _ => () } }; match (Char.ToLower (peek_or_white (), CultureInfo.InvariantCulture)) { | 'f' => ignore (read ()); Token.FloatLiteral (Single.Parse (id_buffer.ToString (), NumberFormatInfo.InvariantInfo)) | 'd' => ignore (read ()); Token.DoubleLiteral (Double.Parse (id_buffer.ToString (), NumberFormatInfo.InvariantInfo)) | 'm' => ignore (read ()); Token.DecimalLiteral (Decimal.Parse (id_buffer.ToString (), NumberStyles.Float, NumberFormatInfo.InvariantInfo)) | _ => Token.DoubleLiteral (Double.Parse (id_buffer.ToString (), NumberFormatInfo.InvariantInfo)) } }; // convert given object to appropriate integer value according to type suffix def check_type_suffix (val : ulong) { /// we should have integer number here def special (c) { | 'l' | 's' | 'b' | 'u' => true | _ => false }; // check suffixes to make special types conversions mutable ch = if (already_seen_type) 'b' else Char.ToLower (peek_or_white (), CultureInfo.InvariantCulture); if (special (ch)) { unless (already_seen_type) ignore (read ()); mutable unsigned = ch == 'u'; // we can have two letter suffixes def ch' = Char.ToLower (peek_or_white (), CultureInfo.InvariantCulture); if (special (ch')) { ignore (read ()); if (ch' == 'u') { when (unsigned) throw Error ("bad integer suffix (unsigned twice)"); unsigned = true; } else { // both chars can be different than 'u' and then they must be 'bs' match ((ch, ch')) { | ('s', 'b') | ('b', 's') => ch = 'b' | _ => unless (unsigned) throw Error ("bad integer suffix (type contraint twice)"); ch = ch'; } } } else // only 'b' suffix, so it is unsigned byte when (ch == 'b') unsigned = true; // [ch] now contains type meaning or 'u' (then it is uint for sure) def parms = match (ch) { | 'b' => if (unsigned) (Literal.FromByte (val :> byte), <[ byte ]>) else // workaround mono #74925 (Literal.FromSByte ((val :> int) :> sbyte), <[ sbyte ]>) | 's' => if (unsigned) (Literal.FromUShort (val :> ushort), <[ ushort ]>) else // workaround mono #74925 (Literal.FromShort ((val :> int) :> short), <[ short ]>) | 'l' => if (unsigned) (Literal.FromULong (val), <[ ulong ]>) else (Literal.FromLong (val :> long), <[ long ]>) | _ => (Literal.FromUInt (val :> uint), <[ uint ]>) } Token.IntegerLiteral (parms); } else Token.IntegerLiteral (Literal.Integer (val, false, null), null) }; try { match (mode) { | NumberMode.Float => match (peek_or_white ()) { | 'E' | 'e' => exponent_part (false) | _ => exponent_part (true) } | NumberMode.Decimal => if (last_was_digit) match (Char.ToLower (peek_or_white (), CultureInfo.InvariantCulture)) { | 'e' => exponent_part (false) | 'f' | 'd' | 'm' => exponent_part (true) | _ => check_type_suffix (ulong.Parse (id_buffer.ToString ())) } else Token.IntegerLiteral (Literal.Integer (ulong.Parse (id_buffer.ToString ()), false, null), null) | NumberMode.Hex => def val = UInt64.Parse (id_buffer.ToString (), Globalization.NumberStyles.HexNumber, CultureInfo.InvariantCulture); check_type_suffix (val) | NumberMode.Binary => mutable value = 0ul; for (mutable i = 0; i < id_buffer.Length; ++i) match (id_buffer [i]) { | '0' => value <<= 1 | '1' => value <<= 1; value += 1ul | x => Message.Error (this.Location, $"binary literal numer must be" " composed of 1 and 0s, while there is `$(x)'") }; check_type_suffix (value) | NumberMode.Octal => mutable value = 0ul; for (mutable i = 0; i < id_buffer.Length; ++i) match (Convert.ToUInt64 (id_buffer [i]) - 48ul) { | x when 0ul <= x && x <= 7ul => value <<= 3; value += x; | x => Message.Error (this.Location, $"octal literal number must be" " composed of 0 to 7 digits while it has `$(x)'") }; check_type_suffix (value) } } catch { _ is System.OverflowException => Message.Error (this.Location, $ "number literal $id_buffer is too large for given type"); Token.IntegerLiteral (Literal.Integer (0, false, null), null) } } protected get_id (first_ch : char) : Token { if (first_ch == '\'' && !IsIdBeginning (peek_or_white ())) { get_char () } else { clear_id_buffer (); _ = id_buffer.Append (first_ch); mutable next = peek_or_white (); while (IsIdBeginning (next) || Char.IsDigit (next) || next == '\'') { _ = id_buffer.Append (read ()); next = peek_or_white (); }; def str = System.String.Intern (id_buffer.ToString ()); if (first_ch == '\'' && str.Length == 3 && str[2] == '\'') Token.CharLiteral (str[1]) else if (IsKeyword (str)) Token.Keyword (str) else Token.Identifier (str) } } protected get_char_from_hex : int * StringBuilder -> char = GetCharFromHex(_, _, peek, read, Message.Error (this.Location, _)); public static GetCharFromHex( len : int, rawStringBuilder : StringBuilder, peek : void -> char, read : void -> char, error : string -> void ) : char { def max = if (len < 0) 4 else len; def buffer = StringBuilder (max); def loop (i) : bool { if (i < max) { def ch = peek (); match (ch) { | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D' | 'e' | 'E' | 'f' | 'F' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => _ = buffer.Append(read ()); loop(i + 1) | _ when (len == -1 && i != 0) => true | _ => error ($"bad escape character '$ch'"); false } } else true } def ok = loop (0); when (rawStringBuilder != null) _ = rawStringBuilder.Append(buffer); if (ok) try { Convert.ToChar (UInt64.Parse (buffer.ToString (), Globalization.NumberStyles.HexNumber, CultureInfo.InvariantCulture)); } catch { | _ is System.OverflowException => error ($"Character literal '$buffer' must be in the range U+0000 to U+FFFF"); '?' } else '?' } protected escape_value : char * StringBuilder -> char = EscapeValue(_, _, get_char_from_hex, Message.Error (Location, _)); public static EscapeValue ( ch : char, rawStringBuilder : StringBuilder, get_char_from_hex : int * StringBuilder -> char, error : string -> void ) : char { when (rawStringBuilder != null) _ = rawStringBuilder.Append(ch); match (ch) { | 'n' => '\n' // new line | 't' => '\t' // horizontal tab | 'r' => '\r' // carriage return | 'e' => '\e' | 'a' => '\u0007' // alert | 'b' => '\b' // backspace | 'f' => '\u000C' // form feed | 'v' => '\u000B' // vertical tab | '"' => '"' | '\'' => '\'' | '\\' => '\\' | '0' => '\0' // null (0) | 'x' => get_char_from_hex(-1, rawStringBuilder) | 'u' => get_char_from_hex(4, rawStringBuilder) | 'U' => get_char_from_hex(8, rawStringBuilder) | '$' => '$' | _ => error ($"bad escape character `\\$ch'"); ' ' } } protected get_char () : Token.CharLiteral { def v = get_string ('\'', false).value; if (v.Length == 0) throw Error ("empty character literal") else { if (v.Length != 1) throw Error ("character literal too long") else Token.CharLiteral (v[0]) } } virtual protected ignore_comments () : void { } protected get_string (end_ch : char, is_dollarized : bool) : Token.StringLiteral { mutable tok; def startLine = line; def startCol = col - 1; def buf = StringBuilder (); def rawString = StringBuilder (); _ = rawString.Append(end_ch); def loop () { def ch = read (); _ = rawString.Append(ch); match (ch) { | '\\' => def esc = escape_value (read (), rawString); _ = buf.Append (esc); loop (); | '$' when !is_dollarized && end_ch == '"' => def next = peek_or_white (); when (char.IsLetter (next) || next == '_' || next == '(') Message.Warning (10007, this.Location, "`$' occurs inside string literal, which is not prefixed itself with `$'"); _ = buf.Append ('$'); loop () | '\0' => rawString.Length--; Message.Error (this.Location, "Unterminated string literal") // like MS csc | '\n' => rawString.Length--; Message.Error (this.Location, "Newline in constant") // like MS csc | ch when ch != end_ch => _ = buf.Append (ch); loop (); | _ => tok = Token.StringLiteral (buf.ToString (), rawString.ToString ()); tok.Location = Location (file_idx, startLine, startCol, line, col); /*when (eat_whitespace () && peek_or_white () == end_ch) { tok = null; ignore_comments (); _ = read (); loop (); }*/ } } loop (); if (tok == null) { tok = Token.StringLiteral (buf.ToString (), rawString.ToString ()); tok.Location = Location (file_idx, startLine, startCol, line, col); tok } else tok; } protected get_monkey_string () : Token { def buf = StringBuilder (32); def startLine = line; def startCol = col - 2; def rawString = StringBuilder (); _ = rawString.Append(<#@"#>); def loop () { def ch = read (); _ = rawString.Append(ch); match (ch) { | '"' => match (peek_or_white ()) { | '"' => _ = buf.Append ('"'); _ = read (); loop () | _ => def tok = Token.StringLiteral (buf.ToString (), rawString.ToString ()); tok.Location = Location (file_idx, startLine, startCol, line, col); tok } | '\0' => Message.Error (this.Location, "Unterminated string literal"); // like MS csc rawString.Length--; def tok = Token.StringLiteral (buf.ToString (), rawString.ToString ()); tok.Location = Location (file_idx, startLine, startCol, line, col - 1); tok | ch => _ = buf.Append (ch); loop () } }; loop () } /// Read <# .... <# ... #> ... #> string protected get_recursive_string () : Token { def buf = StringBuilder (1024); def rawString = StringBuilder (); _ = rawString.Append("<#"); def startLine = line; def startCol = col - 2; def loop (nestingLevel = 1) { def ch = read (); _ = rawString.Append(ch); match (ch) { | '<' when peek () == '#' => _ = rawString.Append(read ()); _ = buf.Append ("<#"); loop (nestingLevel + 1) | '#' => def next = read (); _ = rawString.Append (next); if (next == '>') { if (nestingLevel == 1) { def tok = Token.StringLiteral (buf.ToString (), rawString.ToString ()); tok.Location = Location (file_idx, startLine, startCol, line, col); tok } else { _ = buf.Append ("#>"); loop (nestingLevel - 1) } } else { _ = buf.Append ('#'); _ = buf.Append (next); loop (nestingLevel) } | '\0' => Message.Error (this.Location, "Unterminated string literal"); // like MS csc rawString.Length--; def tok = Token.StringLiteral (buf.ToString (), rawString.ToString ()); tok.Location = Location (file_idx, startLine, startCol, line, col - 1); tok | _ => _ = buf.Append (ch); loop (nestingLevel) } } def tok = loop (); tok } abstract protected comment_beginning () : char; /// returns true if there is some character in the input pending /// or false if we are at the end of file protected virtual eat_whitespace () : bool { mutable eof = false; // Location where possible automatic token insertion will take place when (in_indentation_mode) insert_location = Location; def loop (shift : bool) { when (shift) ignore (read ()); def ch = try { peek () } catch { _ is LexerBase.Error => eof = true; '_' }; when (ch == '\0') eof = true; if (eof) { if (eating_now > 0) { Message.Error (this.Location, "unexpected end of file" " before finishing preprocessor directive"); eating_now = 0; } else when (!eating_stack.IsEmpty) Message.Error (this.Location, "there are still open" " preprocessing directives at the end of file"); false // eof, so there are no chars pending } else match (ch) { | ' ' | '\t' => loop (true); | '\n' | '\r' => // Single \r interpret as new line! white_beginning = true; loop (true) | '/' => white_beginning = false; _ = read (); match (comment_beginning ()) { | '/' => if (eating_now > 0) loop (false) else { pendingChar = '/'; isPendingChar = true; true } | _ => loop (false) } | '#' => if (white_beginning) { def loc = this.Location; _ = read (); parse_preprocessor(loc); loop (false) } else { Message.Error (this.Location, "preprocessor directives must occur only in" " lines beginning with whitespaces"); while (read_or_eol () != '\n') (); loop (false); } | _ => white_beginning = false; // eat futher or just return that there is nonwhite character next if (eating_now > 0) loop (true) else true } }; loop (false) } virtual protected do_get_token () : Token { def ch = if (isPendingChar) { isPendingChar = false; pendingChar } else { try { read () } catch { _ is LexerBase.Error => ' ' } } match (ch) { | '"' => get_string ('"', false) | '\'' => get_id ('\'') | '.' => def next = peek_or_white (); if (Char.IsDigit (next)) get_number (ch) else if (next == '.') { _ = read (); Token.Operator ("..") } else Token.Operator (".") | '{' => Token.BeginBrace (generated = false) | '}' => Token.EndBrace (generated = false) | '[' => Token.BeginSquare () | ']' => if (peek_or_white () == '>') { ignore (read ()); Token.EndQuote () } else Token.EndSquare () | '(' => Token.BeginRound () | ')' => Token.EndRound () | ',' => Token.Comma () | ';' => Token.Semicolon (generated = false) | '<' when peek () == '[' => ignore (read ()); Token.BeginQuote () | '<' when peek () == '#' => ignore (read ()); get_recursive_string () | '@' => def next = read (); if (next == '#') // Completion token. Token.IdentifierToComplete ("") else if (IsOperatorChar (next)) match (get_op (next)) { | Token.Operator (s) => Token.Identifier (s) | _ => Util.ice () } else if (IsIdBeginning (next)) match (get_id (next)) { | Token.Identifier (x) | Token.Keyword (x) => Token.QuotedIdentifier (x) | _ => Util.ice () } else if (next == '"') get_monkey_string () else throw Error ("expecting operator, identifier or string literal after '@'") | '*' => if (peek_or_white () == '*') get_op (ch) else get_op (ch) | '/' => get_op (ch) | '%' | '\\' => get_op (ch) | '-' => if (peek_or_white () != '.') get_op (ch) else Token.Operator ("-") | '+' => if (peek_or_white () != '.') get_op (ch) else Token.Operator ("+") | '$' => Fake(); def startLine = this.Location.Line; def startCol = this.Location.Column; def isWhitespaceExists = eat_whitespace (); def next = peek (); match (next) { | '"' | '@' | '<' => // we will not warn about $ in string literal in this mode def strStartLoc = this.Location; def c = read (); def next = peek (); if (c == '<' && next != '#') { // emulate parse operator if (isWhitespaceExists) { // emulate parse 2 operators ($ and <... def op = get_op ('<'); def dolLoc = Location(strStartLoc.FileIndex, startLine, startCol - 1, startLine, startCol); def groupLok = dolLoc + op.Location; def dolTok = Token.Operator (dolLoc, "$"); dolTok.Next = op; Token.RoundGroup (groupLok, Token.LooseGroup (groupLok, dolTok)) } else get_op ('$', '<') } else { def str = if (c == '"') get_string (c, true) else if (c == '<' && next == '#') { ignore (read ()); get_recursive_string () } else { unless (read () == '"') Message.Error (this.Location, "expected string literal after @"); get_monkey_string () } def dolLoc = Location(strStartLoc.FileIndex, startLine, startCol - 1, startLine, startCol); str.Location = strStartLoc + str.Location; def groupLok = dolLoc + str.Location; def dolTok = Token.Operator (dolLoc, "$"); dolTok.Next = str; Token.RoundGroup (groupLok, Token.LooseGroup (groupLok, dolTok)) } | _ => if (isWhitespaceExists) Token.Operator ("$") else get_op ('$') } | '^' | '~' | '?' | '#' => get_op (ch) | '=' | '<' | '>' | '!' => get_op (ch) | '&' => get_op (ch) | '|' => get_op (ch) | ':' => get_op (ch) // after executing eat_whitespace it is the only possibility for space // (try..catch above) | ' ' => Token.EndOfFile () | '\0' => Token.EndOfFile () | _ => if (Char.IsDigit (ch)) get_number (ch) else if (IsIdBeginning (ch)) get_id (ch) else throw Error ($"invalid character: `$ch'") } } static Fake() : void { assert(true); } public virtual GetToken () : Token { if (tok_pending != null) { def tok = tok_pending; tok_pending = null; tok } else { unless (isPendingChar) _ = eat_whitespace (); if (insert_indent) { insert_indent = false; Token.Indent (insert_location, indent_string) } else { def last_line = line; def last_col = col; def tok = do_get_token (); // IT: making last_line & last_col as class fields could be a better solution. if (tok.Location.FileIndex == 0) tok.Location = Location (file_idx, last_line, last_col, line, col); else tok.Location = Location (file_idx, last_line, last_col, tok.Location.EndLine, tok.Location.EndColumn); tok } } } public Location : Location { get { Location (file_idx, line, col) } } read_to_the_end_of_line () : string { mutable c = read_or_eol (); def line = StringBuilder (80); while (c != '\n') { if (c == '/') { c = read_or_eol (); unless (c == '/') when (Message.ErrorCount <= 0) Message.Warning (1696, "Single-line comment or end-of-line expected"); while (c != '\n') c = read_or_eol (); } else { ignore (line.Append (c)); c = read_or_eol (); } }; line.ToString () } parse_preprocessor (startLocation : Location) : void { def eat_spaces () : void { mutable c = peek (); while (Char.IsWhiteSpace (c) && c != '\n') { ignore (read ()); c = peek (); } } def read_word () : string { eat_spaces(); def word = StringBuilder (); try { while (IsIdBeginning (peek_or_white ()) || char.IsDigit (peek_or_white ())) _ = word.Append (read ()) } catch { _ is LexerBase.Error => () }; word.ToString () } def directive = read_word (); when (directive == "") throw LexerBase.Error ("expected preprocessing directive after `#'"); match (directive) { | "if" => eating_stack.Push (eating_now); def now = evaluate_preprocessing_expr (read_to_the_end_of_line ().Trim ()); if (eating_now > 0) eating_now = 2 else eating_now = if (now) -1 else 1 | "elif" => def now = evaluate_preprocessing_expr (read_to_the_end_of_line ().Trim ()); match (eating_now) { | -4 | 4 => throw LexerBase.Error ("unexpected #elif inside region") | -3 | 3 => throw LexerBase.Error ("unexpected #elif after #else") | 1 => eating_now = if (now) -1 else 1 | -1 | 2 => eating_now = 2 | _ => throw LexerBase.Error ("unbalanced preprocessing directives") } | "else" => eat_spaces (); when (read_to_the_end_of_line () != "") throw LexerBase.Error ("extra tokens after #else"); match (eating_now) { | -4 | 4 => throw LexerBase.Error ("unexpected #else inside region") | -3 | 3 => throw LexerBase.Error ("unexpected #else after #else") | 1 => eating_now = -3 | -1 | 2 => eating_now = 3 | _ => throw LexerBase.Error ("unbalanced preprocessing directives") } | "endif" => eat_spaces (); when (read_to_the_end_of_line () != "") throw LexerBase.Error ("extra tokens after #endif"); match (eating_now) { | -4 | 4 => throw LexerBase.Error ("unexpected #endif inside region") | 0 => throw LexerBase.Error ("unbalanced preprocessing directives") | _ => () } eating_now = eating_stack.Pop (); | "line" => eat_spaces (); mutable c = peek_or_white (); def (new_line, new_file) = if (c == 'd') { if (read_word () == "default") { eat_spaces (); when (read_to_the_end_of_line () != "") throw LexerBase.Error ("extra tokens after directive"); (-1, null) } else throw LexerBase.Error ("expecting line number or `default' indicator") } else { def num = StringBuilder (); def loop () { when (char.IsDigit (c)) { ignore (num.Append (read ())); c = peek_or_white (); loop (); } } loop (); if (num.Length > 0) (Int32.Parse (num.ToString ()), read_to_the_end_of_line ().Trim ()) else { Message.Error (this.Location, "expecting line number or `default' indicator"); _ = read_to_the_end_of_line (); (-1, null) } }; if (new_line == -1) { when (line_stack != -1) line = line - line_start + line_stack; file_idx = Location.GetFileIndex (file_real); line_stack = -1; } else { // if there is already something on stack, bring real line first when (line_stack != -1) line = line - line_start + line_stack; line_start = new_line; line_stack = line; line = new_line; when (new_file != "") file_idx = Location.GetFileIndex (new_file); } | "error" => Message.Error (this.Location, read_to_the_end_of_line ().Trim ()); | "warning" => Message.Warning (this.Location, read_to_the_end_of_line ().Trim ()); | "region" => eating_stack.Push (eating_now); eating_now = if (eating_now > 0) 4 else -4; incompleteRegions ::= Region (startLocation, read_to_the_end_of_line()); | "endregion" => when (eating_now != 4 && eating_now != -4) throw LexerBase.Error ("Unexpected #endregion"); eating_now = eating_stack.Pop (); match (incompleteRegions) { | h :: t => h.SetEndRegion(this.Location); regions ::= h; incompleteRegions = t; | [] => throw LexerBase.Error ("Unexpected preprocessor directive") } _ = read_to_the_end_of_line (); | "define" => def w = read_word (); when (w == "") throw LexerBase.Error ("#define should be followed by name to define"); when (w == "true" || w == "false") throw LexerBase.Error ($ "Attempt to define ``$w''"); defines = defines.Replace (w, true); _ = read_to_the_end_of_line () | "undef" => def w = read_word (); when (w == "") throw LexerBase.Error ("#undef should be followed by name to undefine"); when (w == "true" || w == "false") throw LexerBase.Error ($ "Attempt to undefine ``$w''"); defines = defines.Replace (w, false); _ = read_to_the_end_of_line () | "pragma" => match (read_word ()) { | "warning" => def negate = match (read_word ()) { | "disable" => 1 | "restore" => -1 | s => throw LexerBase.Error ($ "#pragma should be followed by " "``disable'' or ``restore'', not ``$s''") } def warnings = read_to_the_end_of_line ().Replace (',', ' ').Split (null); mutable did_something = false; foreach (warn in warnings) unless (warn == "") { did_something = true; def no = try { int.Parse (warn) } catch { | e => throw LexerBase.Error ($ "invalid warning number ``$warn'': $e") } Manager.Options.Warnings.AddPragmaWarning (file_idx, line, negate * no) } unless (did_something) Manager.Options.Warnings.AddPragmaWarning (file_idx, line, 1 * negate) | "indent" => pragma_indent (); | _ => Message.Warning (1633, this.Location, "unrecognized #pragma directive"); _ = read_to_the_end_of_line (); } | x => throw LexerBase.Error ($"unsupported preprocessing directive `$x'") }; } evaluate_preprocessing_expr (str : string) : bool { def traverse (line : string, acc) { mutable i = 0; while (i < line.Length && Char.IsWhiteSpace (line [i])) ++i; def line = line.Substring (i); match (line) { | x when x.StartsWith ("false") => traverse (x.Substring (5), "false" :: acc) | x when x.StartsWith ("true") => traverse (x.Substring (4), "true" :: acc) | x when x.StartsWith ("||") => traverse (x.Substring (2), "||" :: acc) | x when x.StartsWith ("&&") => traverse (x.Substring (2), "&&" :: acc) | x when x.StartsWith ("==") => traverse (x.Substring (2), "==" :: acc) | x when x.StartsWith ("!=") => traverse (x.Substring (2), "!=" :: acc) | x when x.StartsWith ("(") => traverse (x.Substring (1), "(" :: acc) | x when x.StartsWith (")") => traverse (x.Substring (1), ")" :: acc) | x when x.StartsWith ("!") => traverse (x.Substring (1), "!" :: acc) | "" => NList.Rev (acc) | x => mutable j = 0; while (j < x.Length && (IsIdBeginning (x[j]) || Char.IsDigit (x[j]))) ++j; when (j == 0) throw LexerBase.Error ("bad preprocessing condition format"); def val = match (defines.Find (x.Substring (0, j))) { | Some (v) => v | None => false }; if (val) traverse (x.Substring (j), "true" :: acc) else traverse (x.Substring (j), "false" :: acc) } }; mutable tokens = traverse (str, []); def lowest () { match (tokens) { | "(" :: xs => tokens = xs; def res = highest (); match (tokens) { | ")" :: xs => tokens = xs | _ => throw LexerBase.Error ("unbalanced parenthesis in preprocessing expression") }; res | "!" :: xs => tokens = xs; !lowest (); | "false" :: xs => tokens = xs; false | "true" :: xs => tokens = xs; true | _ => throw LexerBase.Error ("bad expression") } } and low () { mutable res = lowest (); def loop () { match (tokens) { | "&&" :: xs => tokens = xs; res = lowest () && res; loop () | _ => () } }; loop (); res } and high () { mutable res = low (); def loop () { match (tokens) { | "||" :: xs => tokens = xs; res = low () || res; loop () | _ => () } }; loop (); res } and highest () { mutable res = high (); def loop () { match (tokens) { | "==" :: xs => tokens = xs; res = high () == res; loop () | "!=" :: xs => tokens = xs; res = high () != res; loop () | _ => () } } loop (); res }; def res = highest (); when (tokens != []) throw LexerBase.Error ("Unexpected token after expression"); res } public static this () { def tab = array [ "_", "abstract", "and", "array", "as", "base", "catch", "class", "def", "delegate", "enum", "event", "false", "finally", "fun", "implements", "interface", "internal", "is", "macro", "match", "matches", "module", "mutable", "namespace", "new", "null", "out", "override", "params", "private", "protected", "public", "ref", "sealed", "static", "struct", "syntax", "this", "throw", "true", "try", "type", "typeof", "using", "variant", "virtual", "void", "volatile", "when", "where", "partial", "extern", "with" ]; mutable kes = Set (); foreach (el in tab) kes = kes.Add (el); BaseKeywords = kes; def tab = array ['=', '<', '>', '@', '^', '&', '-', '+', '|', '*', '/', '$', '%', '!', '?', '~', '.', ':', '#']; opchars = array (256); foreach (x in tab) opchars [x :> int] = true; } public static IsOperator (str : string) : bool { def len = str.Length - 1; foreach (i in [0 .. len]) { def c = str[i]; if (IsOperatorChar (c)) () else match (c) { | '(' | ')' | ';' | '[' | ']' => () | _ => Nemerle.Imperative.Return (false); } }; true } public static HasKeywordChars (str : string) : bool { return : { foreach (c in str) { unless (Char.IsLetterOrDigit(c) || c == '_') return(false); } true } } } // end class LexerBase public class LexerFile : LexerBase { reader : System.IO.TextReader; comment_store : StringBuilder; mutable comment_loc : Location; private check_last_line_for_lf (file : IO.FileStream) : void { _ = file.Seek (-1 :> Int64, IO.SeekOrigin.End); def inp = file.ReadByte (); if (inp >= 0) { def ch = Convert.ToChar (inp); when (ch != '\n') Message.Warning (this.Location, "no new line at the end of the file"); } else throw LexerBase.Error ("unexpected end of file"); _ = file.Seek (0 :> Int64, IO.SeekOrigin.Begin); } public override Dispose () : void { (reader : IDisposable).Dispose (); } public this (manager : ManagerClass, filePath : string) { base (manager, filePath); comment_store = StringBuilder (300); file_idx = Location.AddFile (filePath); try { def file = IO.FileStream (filePath, IO.FileMode.Open, IO.FileAccess.Read); def stream = IO.StreamReader (file, Text.UTF8Encoding (true, true)); reader = System.IO.StringReader(stream.ReadToEnd()); // cache text when (manager.Options.Warnings.IsEnabled (10002)) check_last_line_for_lf (file); } catch { | e => Message.FatalError (Location(filePath, 1, 1, 1, 1), $"cannot open file `$filePath': $(e.Message)") } } override protected peek () : char { def inp = reader.Peek (); if (inp >= 0) inp :> char else '\0' } override protected peek_or_white () : char { def inp = reader.Peek (); if (inp >= 0) inp :> char else ' ' } override protected read_from_input () : char { def inp = reader.Read (); if (inp >= 0) (inp :> char) else '\0' } override protected comment_beginning () : char { def startLocation = this.Location; match (peek_or_white ()) { | '/' => // we are for sure in one line comment _ = read (); try { if (Manager.Options.LexerStoreComments && peek () == '/') { comment_loc = Location (file_idx, line, col - 2); _ = read (); mutable cc = ' '; do { cc = read_or_eol (); _ = comment_store.Append (cc) } while (cc != '\n'); comment_loc = comment_loc + Location (file_idx, line, col); } else while (read_or_eol () != '\n') {}; } catch { _ is LexerBase.Error => () }; white_beginning = true; // pass whitespace, so next read would be eof checked ' ' | '*' => // multiline comment _ = read (); def loop1 (seen_star, store) { def cc = read (); when (store) ignore (comment_store.Append (cc)); match (cc) { | '*' => loop1 (true, store) | '/' when seen_star => () | '\0' => Message.Error (startLocation, "End-of-file found, '*/' expected!") // like MS csc | _ => loop1 (false, store) } }; if (Manager.Options.LexerStoreComments && peek_or_white () == '*') { comment_loc = Location (file_idx, line, col - 2); _ = read (); loop1 (true, true); if (comment_store.Length == 1) ignore (comment_store.Remove (0, 1)) else ignore (comment_store.Remove (comment_store.Length - 2, 2)); comment_loc = comment_loc + Location (file_idx, line, col); } else loop1 (false, false); // pass whitespace, so next read would be eof checked ' ' | _ => '/' } } override protected ignore_comments () : void { when (comment_store.Length > 0) { Message.Warning (this.Location, "documentation comments between literals are ignored"); comment_store.Length = 0; } } public override GetToken () : Token { unless (isPendingChar) _ = eat_whitespace (); if (comment_store.Length > 0) { def res = Token.Comment (comment_store.ToString ()); res.Location = comment_loc; comment_store.Length = 0; res } else base.GetToken () } } // end class LexerFile public class LexerString : LexerBase { protected mutable reader : string; protected mutable pos : int; public this (manager : ManagerClass, code : string, loc : Location) { base (manager, loc.File); reader = code; pos = 0; file_idx = loc.FileIndex; line = loc.Line; col = loc.Column; if (line > 0 && col > 0) repeat (line) _linesLengths.Add(-1); else { // this can happen only if it is a ScanLexer! assert2(line == 0 && col == 0 && GetType().Name == "ScanLexer"); _linesLengths = null; } } public override Dispose () : void { } override protected peek () : char { if (pos < reader.Length) reader[pos] else '\0' } override protected peek_or_white () : char { if (pos < reader.Length) reader[pos] else ' ' } override protected read_from_input () : char { if (pos < reader.Length) { def ch = reader[pos]; ++pos; ch } else '\0' } override protected comment_beginning () : char { def startLocation = this.Location; match (peek_or_white ()) { | '/' => // we are for sure in one line comment _ = read (); try { if (Manager.Options.LexerStoreComments && peek () == '/') { //comment_loc = Location (file_idx, line, col - 2); _ = read (); mutable cc = ' '; do { cc = read_or_eol (); //_ = comment_store.Append (cc) } while (cc != '\n'); //comment_loc = comment_loc + Location (file_idx, line, col); } else while (read_or_eol () != '\n') {}; } catch { _ is LexerBase.Error => () }; white_beginning = true; // pass whitespace, so next read would be eof checked ' ' | '*' => // multiline comment _ = read (); def loop1 (seen_star, store) { def cc = read (); //when (store) // ignore (comment_store.Append (cc)); match (cc) { | '*' => loop1 (true, store) | '/' when seen_star => () | '\0' => Message.Error (startLocation, "End-of-file found, '*/' expected!") // like MS csc | _ => loop1 (false, store) } }; if (Manager.Options.LexerStoreComments && peek_or_white () == '*') { //comment_loc = Location (file_idx, line, col - 2); _ = read (); loop1 (true, true); //if (comment_store.Length == 1) // ignore (comment_store.Remove (0, 1)) //else // ignore (comment_store.Remove (comment_store.Length - 2, 2)); //comment_loc = comment_loc + Location (file_idx, line, col); } else loop1 (false, false); // pass whitespace, so next read would be eof checked ' ' | _ => '/' } } } // LexerString public class LexerCompletion : LexerString { protected mutable CompletionMarkLine : int; protected mutable CompletionMarkChar : int; public this ( manager : ManagerClass, code : string, loc : Location, completionMarkLine : int, completionMarkChar : int ) { base (manager, code, loc); CompletionMarkLine = completionMarkLine; CompletionMarkChar = completionMarkChar; } public override GetToken () : Token { def tok = base.GetToken (); def resetCompletionMark() { CompletionMarkLine = -1; CompletionMarkChar = -1; } def loc = tok.Location; if (loc.Contains(CompletionMarkLine, CompletionMarkChar)) //TODO: Extract substring from token. match (tok) { | Token.StringLiteral => tok | Token.Identifier (name) => resetCompletionMark (); Token.IdentifierToComplete (name) | Token.Operator (".") | Token.Operator (":") => resetCompletionMark (); tok_pending = Token.IdentifierToComplete (""); tok | Token.Keyword | Token.Operator => resetCompletionMark(); Token.IdentifierToComplete(tok.ToString()) | _ => resetCompletionMark(); tok_pending = tok; Token.IdentifierToComplete("") } else if (CompletionMarkLine >= 0 && (loc.Line > CompletionMarkLine || loc.Line == CompletionMarkLine && loc.Column > CompletionMarkChar)) { resetCompletionMark (); tok_pending = tok; Token.IdentifierToComplete (""); } else tok } } } // end ns