[svn] r6350: nemerle/trunk/ncc/parsing: Lexer.n LexerColorizer.n
NoiseEHC
svnadmin at nemerle.org
Tue May 30 15:09:07 CEST 2006
Log:
LexerColorizer refactored to a new file.
LexerHack seems to initialize the new engine properly :)
Author: NoiseEHC
Date: Tue May 30 15:09:05 2006
New Revision: 6350
Added:
nemerle/trunk/ncc/parsing/LexerColorizer.n
Modified:
nemerle/trunk/ncc/parsing/Lexer.n
Modified: nemerle/trunk/ncc/parsing/Lexer.n
==============================================================================
--- nemerle/trunk/ncc/parsing/Lexer.n (original)
+++ nemerle/trunk/ncc/parsing/Lexer.n Tue May 30 15:09:05 2006
@@ -1638,406 +1638,4 @@
| EndOfFile
}
-[ManagerAccess]
-public class LexerColorizer
-{
- [Record]
- public class SyntaxToken {
- public StartPos : int;
- public EndPos : int;
- public Token : SyntaxType;
- }
-
- mutable reader : string;
- mutable pos : int;
-
- public this (fn : string)
- {
- reader = fn;
- pos = 0;
- }
-
- public SetString (text : string, offset : int) : void {
- reader = text;
- pos = offset;
- }
-
- read () : char
- {
- if (pos < reader.Length) {
- def ch = reader[pos];
- ++pos;
- ch
- } else throw LexerBase.Error ("unexpected end of code text")
- }
-
- peek () : char
- {
- if (pos < reader.Length)
- reader[pos]
- else
- (0 :> char)
- }
-
- get_op () : SyntaxType
- {
- def loop () {
- if (LexerBase.IsOperatorChar (peek ())) {
- def c = read ();
- if (c == '/' && (peek () == '/' || peek () == '*')) {
- --pos;
- SyntaxType.Operator
- }
- else
- loop ();
- }
- else
- SyntaxType.Operator
- };
- loop ();
- }
-
- get_number () : SyntaxType
- {
- mutable already_seen_type = false; // for the case 0b0 vs 0b
-
- mutable mode =
- match (read ()) {
- | '.' => LexerBase.NumberMode.Float
- | '0' =>
- match (peek ()) {
- | 'x' | 'X' => ++pos; LexerBase.NumberMode.Hex
- | 'o' | 'O' => ++pos; LexerBase.NumberMode.Octal
- | 'b' | 'B' =>
- ++pos;
- unless (Char.IsDigit (peek ())) already_seen_type = true;
- LexerBase.NumberMode.Binary
-
- | x when Char.IsDigit (x) =>
- Message.Warning ("trailing zeros look like"
- " octal modifiers, but they are not");
- LexerBase.NumberMode.Decimal
-
- | _ => LexerBase.NumberMode.Decimal
- }
- | _ => LexerBase.NumberMode.Decimal
- };
- mutable last_was_digit = true;
-
- // read digits and . between them if it is present
- def loop () {
- match (peek ()) {
- | '.' =>
- when (mode == LexerBase.NumberMode.Decimal) {
- mode = LexerBase.NumberMode.Float;
- ++pos;
- if (Char.IsDigit (peek ()))
- loop ()
- else {
- --pos;
- mode = LexerBase.NumberMode.Decimal;
- }
- }
-
- | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D'
- | 'e' | 'E' | 'f' | 'F' =>
- when (mode == LexerBase.NumberMode.Hex) {
- ++pos;
- loop ();
- }
-
- | x when Char.IsDigit (x) =>
- ++pos;
- loop ()
- | _ => ()
- }
- };
- loop ();
-
- def exponent_part (only_realsuf) {
- when (!only_realsuf) {
- match (peek ()) {
- | 'E' | 'e' =>
- ++pos;
- match (peek ()) {
- | '+' | '-' => ++pos;
- | _ => ()
- };
- if (Char.IsDigit (peek ()))
- do {
- ++pos;
- } while (Char.IsDigit (peek ()))
- else
- throw LexerBase.Error ("no digits after exponent sign in float literal")
- | _ => ()
- }
- };
- SyntaxType.FloatLiteral
- };
-
- def check_type_suffix () {
- /// we should have integer number here
-
- def special (c) {
- | 'l' | 's' | 'b' | 'u' => true
- | _ => false
- };
-
- // check suffixes to make special types conversions
- mutable ch =
- if (already_seen_type)
- 'b'
- else
- Char.ToLower (peek (), CultureInfo.InvariantCulture);
- if (special (ch)) {
- unless (already_seen_type) ++pos;
-
- // we can have two letter suffixes
- def ch' = Char.ToLower (peek (), CultureInfo.InvariantCulture);
- when (special (ch')) ++pos;
- SyntaxType.IntLiteral
- }
- else
- SyntaxType.IntLiteral
- };
-
- match (mode) {
- | LexerBase.NumberMode.Float =>
- match (peek ()) {
- | 'E' | 'e' =>
- exponent_part (false)
- | _ =>
- exponent_part (true)
- }
- | LexerBase.NumberMode.Decimal =>
- if (last_was_digit)
- match (Char.ToLower (peek (), CultureInfo.InvariantCulture)) {
- | 'e' =>
- exponent_part (false)
- | 'f' | 'd' | 'm' =>
- exponent_part (true)
- | _ =>
- check_type_suffix ()
- }
- else
- SyntaxType.IntLiteral
-
- | LexerBase.NumberMode.Hex
- | LexerBase.NumberMode.Binary
- | LexerBase.NumberMode.Octal => check_type_suffix ()
- }
- }
-
- get_id () : SyntaxType
- {
- def first_ch = read ();
- if (first_ch == '\'' && !LexerBase.IsIdBeginning (peek ()))
- get_char ()
- else {
- def id_buffer = StringBuilder ();
- _ = id_buffer.Append (first_ch);
-
- while (LexerBase.IsIdBeginning (peek ()) ||
- Char.IsDigit (peek ()) ||
- peek () == '\'')
- _ = id_buffer.Append (read ());
-
- def str = id_buffer.ToString ();
-
- if (first_ch == '\'' && str.Length == 3 && str[2] == '\'')
- SyntaxType.CharLiteral
- else if (Manager.CoreEnv.IsKeyword (str))
- SyntaxType.Keyword
- else
- SyntaxType.Identifier
- }
- }
-
-
- get_char () : SyntaxType
- {
- pos += 2;
- SyntaxType.CharLiteral
- }
-
- get_string (end_ch : char) : SyntaxType
- {
- def loop () {
- if (pos >= reader.Length)
- SyntaxType.EndOfFile
- else
- match (read ()) {
- | '\\' => ++pos; loop ()
- | ch when ch != end_ch => loop ()
- | _ => SyntaxType.StringLiteral
- }
- };
- loop ()
- }
-
- get_monkey_string () : SyntaxType
- {
- def loop () {
- match (read ()) {
- | '"' =>
- match (peek ()) {
- | '"' =>
- ++pos;
- loop ()
- | _ => SyntaxType.StringLiteral
- }
- | _ => loop ()
- }
- };
-
- loop ()
- }
-
- comment_beginning (state : ref int) : char
- {
- match (peek ()) {
- | '/' =>
- // we are for sure in one line comment
- while (pos < reader.Length && read () != '\n') {};
- // pass whitespace, so next read would be eof checked
- ' '
-
- | '*' =>
- // multiline comment
- ++pos;
- unless (eat_comment ()) state = 1;
- // pass whitespace, so next read would be eof checked
- ' '
-
- | _ => '/'
- }
- }
-
- eat_comment () : bool
- {
- def loop1 (seen_star) {
- if (pos < reader.Length)
- match (read ()) {
- | '*' => loop1 (true)
- | '/' when seen_star => true
- | _ => loop1 (false)
- }
- else
- false
- };
- loop1 (false);
- }
-
- get_token () : SyntaxType
- {
- if (pos >= reader.Length) SyntaxType.EndOfFile
- else
- match (peek ()) {
- | '"' => ++pos; get_string ('"')
-
- | '\'' => get_id ()
-
- | '.' =>
- ++pos;
- if (Char.IsDigit (peek ()))
- get_number ()
- else
- SyntaxType.Operator
-
- | '@' =>
- ++pos;
- def next = peek ();
- if (LexerBase.IsOperatorChar (next))
- get_op ();
- else if (LexerBase.IsIdBeginning (next)) {
- _ = get_id ();
- SyntaxType.Identifier
- }
- else if (next == '"') {
- ++pos;
- get_monkey_string ()
- }
- else
- throw LexerBase.Error ("expecting operator, identifier or string literal after '@'")
-
- | '{' | '}' | '[' | ']'
- | '(' | ')'
- =>
- ++pos; SyntaxType.OperatorBrace
-
- | ',' | ';' | '\\'
- =>
- ++pos;
- get_op ();
-
- | ' ' => SyntaxType.EndOfFile
-
- | ch =>
- if (Char.IsDigit (ch))
- get_number ()
- else
- if (LexerBase.IsIdBeginning (ch))
- get_id ()
- else
- if (LexerBase.IsOperatorChar (ch))
- get_op ()
- else
- throw LexerBase.Error ("invalid character")
- }
- }
-
- public GetSyntaxToken (state : ref int) : SyntaxToken {
- mutable begin_pos = pos;
- def loop (c) {
- | ' ' | '\t' | '\r' | '\n' =>
- //System.Diagnostics.Debug.WriteLine("loop(whitespace) " + c.ToString());
- if (pos < reader.Length) {
- ++begin_pos;
- loop (read ())
- } else
- SyntaxType.EndOfFile
-
- | '/' =>
- //System.Diagnostics.Debug.WriteLine("loop(/) " + c.ToString());
- match (comment_beginning (ref state)) {
- | '/' => loop (' ')
- | _ =>
- //++begin_pos; // loop (' ') increments in the FUCK case so we have too (but not here, dunno why...)
- SyntaxType.Comment
- }
-
- | '#' =>
- //System.Diagnostics.Debug.WriteLine("loop(#) " + c.ToString());
- throw LexerBase.Error ("preprocessor directives may not occur in"
- " string programs")
-
- | _ =>
- //System.Diagnostics.Debug.WriteLine("loop(else) " + c.ToString());
- --pos; get_token ()
- };
- def tok =
- if (pos >= reader.Length) {
- //System.Diagnostics.Debug.WriteLine("if (pos >= reader.Length)");
- SyntaxType.EndOfFile
- } else if (state == 1) {
- //System.Diagnostics.Debug.WriteLine("} else if (state == 1)");
- ++begin_pos; // loop (' ') increments in the FUCK case so we have too...
- when (eat_comment ()) state = 0;
- SyntaxType.Comment
- }
- else {
- //System.Diagnostics.Debug.WriteLine("else {");
- try {
- //FUCK: because it works as putting an extra char before then everything else has
- // to increment begin_pos, and later we have to substract 1
- loop (' ');
- }
- catch {
- | _ is LexerBase.Error => SyntaxType.EndOfFile
- }
- }
- SyntaxToken (begin_pos-1, pos - 1, tok);
- }
-} // end class LexerColorizer
-
} // end ns
Added: nemerle/trunk/ncc/parsing/LexerColorizer.n
==============================================================================
--- (empty file)
+++ nemerle/trunk/ncc/parsing/LexerColorizer.n Tue May 30 15:09:05 2006
@@ -0,0 +1,435 @@
+using System.Text;
+using System.Globalization;
+using System;
+
+using Nemerle.Collections;
+using Nemerle.Utility;
+
+namespace Nemerle.Compiler {
+
+[ManagerAccess]
+public class LexerColorizer
+{
+ [Record]
+ public class SyntaxToken {
+ public StartPos : int;
+ public EndPos : int;
+ public Token : SyntaxType;
+ }
+
+ mutable reader : string;
+ mutable pos : int;
+
+ public this (man : ManagerClass)
+ {
+ Manager = man;
+ reader = "";
+ pos = 0;
+ }
+
+ public SetString (text : string, offset : int) : void {
+ reader = text;
+ pos = offset;
+ }
+
+ read () : char
+ {
+ if (pos < reader.Length) {
+ def ch = reader[pos];
+ ++pos;
+ ch
+ } else throw LexerBase.Error ("unexpected end of code text")
+ }
+
+ peek () : char
+ {
+ if (pos < reader.Length)
+ reader[pos]
+ else
+ (0 :> char)
+ }
+
+ get_op () : SyntaxType
+ {
+ def loop () {
+ if (LexerBase.IsOperatorChar (peek ())) {
+ def c = read ();
+ if (c == '/' && (peek () == '/' || peek () == '*')) {
+ --pos;
+ SyntaxType.Operator
+ }
+ else
+ loop ();
+ }
+ else
+ SyntaxType.Operator
+ };
+ loop ();
+ }
+
+ get_number () : SyntaxType
+ {
+ mutable already_seen_type = false; // for the case 0b0 vs 0b
+
+ mutable mode =
+ match (read ()) {
+ | '.' => LexerBase.NumberMode.Float
+ | '0' =>
+ match (peek ()) {
+ | 'x' | 'X' => ++pos; LexerBase.NumberMode.Hex
+ | 'o' | 'O' => ++pos; LexerBase.NumberMode.Octal
+ | 'b' | 'B' =>
+ ++pos;
+ unless (Char.IsDigit (peek ())) already_seen_type = true;
+ LexerBase.NumberMode.Binary
+
+ | x when Char.IsDigit (x) =>
+ Message.Warning ("trailing zeros look like"
+ " octal modifiers, but they are not");
+ LexerBase.NumberMode.Decimal
+
+ | _ => LexerBase.NumberMode.Decimal
+ }
+ | _ => LexerBase.NumberMode.Decimal
+ };
+ mutable last_was_digit = true;
+
+ // read digits and . between them if it is present
+ def loop () {
+ match (peek ()) {
+ | '.' =>
+ when (mode == LexerBase.NumberMode.Decimal) {
+ mode = LexerBase.NumberMode.Float;
+ ++pos;
+ if (Char.IsDigit (peek ()))
+ loop ()
+ else {
+ --pos;
+ mode = LexerBase.NumberMode.Decimal;
+ }
+ }
+
+ | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D'
+ | 'e' | 'E' | 'f' | 'F' =>
+ when (mode == LexerBase.NumberMode.Hex) {
+ ++pos;
+ loop ();
+ }
+
+ | x when Char.IsDigit (x) =>
+ ++pos;
+ loop ()
+ | _ => ()
+ }
+ };
+ loop ();
+
+ def exponent_part (only_realsuf) {
+ when (!only_realsuf) {
+ match (peek ()) {
+ | 'E' | 'e' =>
+ ++pos;
+ match (peek ()) {
+ | '+' | '-' => ++pos;
+ | _ => ()
+ };
+ if (Char.IsDigit (peek ()))
+ do {
+ ++pos;
+ } while (Char.IsDigit (peek ()))
+ else
+ throw LexerBase.Error ("no digits after exponent sign in float literal")
+ | _ => ()
+ }
+ };
+ SyntaxType.FloatLiteral
+ };
+
+ def check_type_suffix () {
+ /// we should have integer number here
+
+ def special (c) {
+ | 'l' | 's' | 'b' | 'u' => true
+ | _ => false
+ };
+
+ // check suffixes to make special types conversions
+ mutable ch =
+ if (already_seen_type)
+ 'b'
+ else
+ Char.ToLower (peek (), CultureInfo.InvariantCulture);
+ if (special (ch)) {
+ unless (already_seen_type) ++pos;
+
+ // we can have two letter suffixes
+ def ch' = Char.ToLower (peek (), CultureInfo.InvariantCulture);
+ when (special (ch')) ++pos;
+ SyntaxType.IntLiteral
+ }
+ else
+ SyntaxType.IntLiteral
+ };
+
+ match (mode) {
+ | LexerBase.NumberMode.Float =>
+ match (peek ()) {
+ | 'E' | 'e' =>
+ exponent_part (false)
+ | _ =>
+ exponent_part (true)
+ }
+ | LexerBase.NumberMode.Decimal =>
+ if (last_was_digit)
+ match (Char.ToLower (peek (), CultureInfo.InvariantCulture)) {
+ | 'e' =>
+ exponent_part (false)
+ | 'f' | 'd' | 'm' =>
+ exponent_part (true)
+ | _ =>
+ check_type_suffix ()
+ }
+ else
+ SyntaxType.IntLiteral
+
+ | LexerBase.NumberMode.Hex
+ | LexerBase.NumberMode.Binary
+ | LexerBase.NumberMode.Octal => check_type_suffix ()
+ }
+ }
+
+ get_id () : SyntaxType
+ {
+ def first_ch = read ();
+ if (first_ch == '\'' && !LexerBase.IsIdBeginning (peek ()))
+ get_char ()
+ else {
+ def id_buffer = StringBuilder ();
+ _ = id_buffer.Append (first_ch);
+
+ while (LexerBase.IsIdBeginning (peek ()) ||
+ Char.IsDigit (peek ()) ||
+ peek () == '\'')
+ _ = id_buffer.Append (read ());
+
+ def str = id_buffer.ToString ();
+
+ if (first_ch == '\'' && str.Length == 3 && str[2] == '\'')
+ SyntaxType.CharLiteral
+ else if (Manager.CoreEnv.IsKeyword (str))
+ SyntaxType.Keyword
+ else
+ SyntaxType.Identifier
+ }
+ }
+
+
+ get_char () : SyntaxType
+ {
+ pos += 2;
+ SyntaxType.CharLiteral
+ }
+
+ get_string (end_ch : char) : SyntaxType
+ {
+ def loop () {
+ if (pos >= reader.Length)
+ SyntaxType.EndOfFile
+ else
+ match (read ()) {
+ | '\\' => ++pos; loop ()
+ | ch when ch != end_ch => loop ()
+ | _ => SyntaxType.StringLiteral
+ }
+ };
+ loop ()
+ }
+
+ get_monkey_string () : SyntaxType
+ {
+ def loop () {
+ match (read ()) {
+ | '"' =>
+ match (peek ()) {
+ | '"' =>
+ ++pos;
+ loop ()
+ | _ => SyntaxType.StringLiteral
+ }
+ | _ => loop ()
+ }
+ };
+
+ loop ()
+ }
+
+ comment_beginning (state : ref int) : char
+ {
+ match (peek ()) {
+ | '/' =>
+ // we are for sure in one line comment
+ while (pos < reader.Length && read () != '\n') {};
+ // pass whitespace, so next read would be eof checked
+ ' '
+
+ | '*' =>
+ // multiline comment
+ ++pos;
+ unless (eat_comment ()) state = 1;
+ // pass whitespace, so next read would be eof checked
+ ' '
+
+ | _ => '/'
+ }
+ }
+
+ eat_comment () : bool
+ {
+ def loop1 (seen_star) {
+ if (pos < reader.Length)
+ match (read ()) {
+ | '*' => loop1 (true)
+ | '/' when seen_star => true
+ | _ => loop1 (false)
+ }
+ else
+ false
+ };
+ loop1 (false);
+ }
+
+ get_token () : SyntaxType
+ {
+ if (pos >= reader.Length) SyntaxType.EndOfFile
+ else
+ match (peek ()) {
+ | '"' => ++pos; get_string ('"')
+
+ | '\'' => get_id ()
+
+ | '.' =>
+ ++pos;
+ if (Char.IsDigit (peek ()))
+ get_number ()
+ else
+ SyntaxType.Operator
+
+ | '@' =>
+ ++pos;
+ def next = peek ();
+ if (LexerBase.IsOperatorChar (next))
+ get_op ();
+ else if (LexerBase.IsIdBeginning (next)) {
+ _ = get_id ();
+ SyntaxType.Identifier
+ }
+ else if (next == '"') {
+ ++pos;
+ get_monkey_string ()
+ }
+ else
+ throw LexerBase.Error ("expecting operator, identifier or string literal after '@'")
+
+ | '{' | '}' | '[' | ']'
+ | '(' | ')'
+ =>
+ ++pos; SyntaxType.OperatorBrace
+
+ | ',' | ';' | '\\'
+ =>
+ ++pos;
+ get_op ();
+
+ | ' ' => SyntaxType.EndOfFile
+
+ | ch =>
+ if (Char.IsDigit (ch))
+ get_number ()
+ else
+ if (LexerBase.IsIdBeginning (ch))
+ get_id ()
+ else
+ if (LexerBase.IsOperatorChar (ch))
+ get_op ()
+ else
+ throw LexerBase.Error ("invalid character")
+ }
+ }
+
+ public GetSyntaxToken (state : ref int) : SyntaxToken {
+ mutable begin_pos = pos;
+ def loop (c) {
+ | ' ' | '\t' | '\r' | '\n' =>
+ //System.Diagnostics.Debug.WriteLine("loop(whitespace) " + c.ToString());
+ if (pos < reader.Length) {
+ ++begin_pos;
+ loop (read ())
+ } else
+ SyntaxType.EndOfFile
+
+ | '/' =>
+ //System.Diagnostics.Debug.WriteLine("loop(/) " + c.ToString());
+ match (comment_beginning (ref state)) {
+ | '/' => loop (' ')
+ | _ =>
+ //++begin_pos; // loop (' ') increments in the FUCK case so we have too (but not here, dunno why...)
+ SyntaxType.Comment
+ }
+
+ | '#' =>
+ //System.Diagnostics.Debug.WriteLine("loop(#) " + c.ToString());
+ throw LexerBase.Error ("preprocessor directives may not occur in"
+ " string programs")
+
+ | _ =>
+ //System.Diagnostics.Debug.WriteLine("loop(else) " + c.ToString());
+ --pos; get_token ()
+ };
+ def tok =
+ if (pos >= reader.Length) {
+ //System.Diagnostics.Debug.WriteLine("if (pos >= reader.Length)");
+ SyntaxType.EndOfFile
+ } else if (state == 1) {
+ //System.Diagnostics.Debug.WriteLine("} else if (state == 1)");
+ ++begin_pos; // loop (' ') increments in the FUCK case so we have too...
+ when (eat_comment ()) state = 0;
+ SyntaxType.Comment
+ }
+ else {
+ //System.Diagnostics.Debug.WriteLine("else {");
+ try {
+ //FUCK: because it works as putting an extra char before then everything else has
+ // to increment begin_pos, and later we have to substract 1
+ loop (' ');
+ }
+ catch {
+ | _ is LexerBase.Error => SyntaxType.EndOfFile
+ }
+ }
+ SyntaxToken (begin_pos-1, pos - 1, tok);
+ }
+} // end class LexerColorizer
+
+public class LexerHack
+{
+ manager : ManagerClass;
+ public lexer : LexerColorizer;
+
+ public this() {
+ manager = ManagerClass(CompilationOptions());
+ ManagerClass.Instance = manager;
+ manager.InitCompiler();
+ manager.CoreEnv = GlobalEnv.CreateCore(manager.NameTree);
+ // done in ManagerClass
+ //LibraryReferenceManager.LoadMacrosFrom("Nemerle.Macros");
+ // what happened with GlobalEnv.Init()???
+ lexer = LexerColorizer(manager);
+ }
+
+ public CreateNewPreparser(text : string) : PreParser {
+ def lex = LexerString(manager, text, Location(0, 1, 1));
+ PreParser(lex);
+ }
+}
+
+} // end ns
More information about the svn
mailing list