[svn] r6350: nemerle/trunk/ncc/parsing: Lexer.n LexerColorizer.n

NoiseEHC svnadmin at nemerle.org
Tue May 30 15:09:07 CEST 2006


Log:
LexerColorizer refactored to a new file.
LexerHack seems to initialize the new engine properly :)


Author: NoiseEHC
Date: Tue May 30 15:09:05 2006
New Revision: 6350

Added:
   nemerle/trunk/ncc/parsing/LexerColorizer.n
Modified:
   nemerle/trunk/ncc/parsing/Lexer.n

Modified: nemerle/trunk/ncc/parsing/Lexer.n
==============================================================================
--- nemerle/trunk/ncc/parsing/Lexer.n	(original)
+++ nemerle/trunk/ncc/parsing/Lexer.n	Tue May 30 15:09:05 2006
@@ -1638,406 +1638,4 @@
   | EndOfFile
 }
 
-[ManagerAccess]
-public class LexerColorizer
-{
-  [Record]
-  public class SyntaxToken {
-    public StartPos : int;
-    public EndPos : int;
-    public Token : SyntaxType;
-  }
-
-  mutable reader : string;
-  mutable pos : int;
-  
-  public this (fn : string)
-  {
-    reader = fn;
-    pos = 0;
-  }
-
-  public SetString (text : string, offset : int) : void {
-    reader = text;
-    pos = offset;
-  }
-
-  read () : char
-  {
-    if (pos < reader.Length) {
-      def ch = reader[pos];
-      ++pos;
-      ch
-    } else throw LexerBase.Error ("unexpected end of code text")
-  }
-
-  peek () : char
-  {
-    if (pos < reader.Length)
-      reader[pos]
-    else 
-      (0 :> char) 
-  }
-
-  get_op () : SyntaxType
-  {
-    def loop () {
-      if (LexerBase.IsOperatorChar (peek ())) {
-        def c = read ();
-        if (c == '/' && (peek () == '/' || peek () == '*')) {
-          --pos;
-          SyntaxType.Operator
-        }
-        else
-          loop ();
-      }
-      else
-        SyntaxType.Operator
-    };
-    loop ();
-  }
-
-  get_number () : SyntaxType
-  {
-    mutable already_seen_type = false;  // for the case 0b0 vs 0b
-   
-    mutable mode =
-      match (read ()) {
-        | '.' => LexerBase.NumberMode.Float
-        | '0' =>
-          match (peek ()) {
-            | 'x' | 'X' => ++pos; LexerBase.NumberMode.Hex
-            | 'o' | 'O' => ++pos; LexerBase.NumberMode.Octal
-            | 'b' | 'B' =>
-              ++pos;
-              unless (Char.IsDigit (peek ())) already_seen_type = true;
-              LexerBase.NumberMode.Binary
-              
-            | x when Char.IsDigit (x) =>
-              Message.Warning ("trailing zeros look like"
-                               " octal modifiers, but they are not");
-              LexerBase.NumberMode.Decimal
-              
-            | _ => LexerBase.NumberMode.Decimal
-          }
-        | _ => LexerBase.NumberMode.Decimal
-      };
-    mutable last_was_digit = true;
-
-    // read digits and . between them if it is present
-    def loop () {
-      match (peek ()) {
-        | '.' => 
-          when (mode == LexerBase.NumberMode.Decimal) {
-            mode = LexerBase.NumberMode.Float;
-            ++pos;
-            if (Char.IsDigit (peek ()))
-              loop ()
-            else {
-              --pos;
-              mode = LexerBase.NumberMode.Decimal;
-            }
-          }
-
-        | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D'
-        | 'e' | 'E' | 'f' | 'F' =>
-          when (mode == LexerBase.NumberMode.Hex) {
-            ++pos;
-            loop ();
-          }
-
-        | x when Char.IsDigit (x) =>
-          ++pos;
-          loop ()
-        | _ => ()
-      }
-    };
-    loop ();
-    
-    def exponent_part (only_realsuf) {
-      when (!only_realsuf) {
-        match (peek ()) {
-          | 'E' | 'e' =>
-            ++pos;
-            match (peek ()) {
-              | '+' | '-' => ++pos;
-              | _ => ()
-            };
-            if (Char.IsDigit (peek ()))
-              do {
-                ++pos;
-              } while (Char.IsDigit (peek ()))
-            else
-              throw LexerBase.Error ("no digits after exponent sign in float literal")
-          | _ => ()
-        }
-      };
-      SyntaxType.FloatLiteral
-    };
-
-    def check_type_suffix () {
-      /// we should have integer number here
-
-      def special (c) {
-        | 'l' | 's' | 'b' | 'u' => true
-        | _ => false
-      };
-
-      // check suffixes to make special types conversions
-      mutable ch =
-        if (already_seen_type)
-          'b'
-        else
-          Char.ToLower (peek (), CultureInfo.InvariantCulture);
-      if (special (ch)) {
-        unless (already_seen_type) ++pos;
-
-        // we can have two letter suffixes
-        def ch' = Char.ToLower (peek (), CultureInfo.InvariantCulture);
-        when (special (ch')) ++pos;
-        SyntaxType.IntLiteral
-      }
-      else
-        SyntaxType.IntLiteral
-    };
-    
-    match (mode) {
-      | LexerBase.NumberMode.Float =>
-        match (peek ()) {
-          | 'E' | 'e' => 
-            exponent_part (false)
-          | _ =>
-            exponent_part (true)
-        }
-      | LexerBase.NumberMode.Decimal =>
-        if (last_was_digit)
-          match (Char.ToLower (peek (), CultureInfo.InvariantCulture)) {
-            | 'e' =>
-              exponent_part (false)
-            | 'f' | 'd' | 'm' =>
-              exponent_part (true)
-            | _ =>
-              check_type_suffix ()
-          }
-        else
-          SyntaxType.IntLiteral
-
-      | LexerBase.NumberMode.Hex 
-      | LexerBase.NumberMode.Binary 
-      | LexerBase.NumberMode.Octal => check_type_suffix ()
-    }
-  }
-
-  get_id () : SyntaxType
-  {
-    def first_ch = read ();
-    if (first_ch == '\'' && !LexerBase.IsIdBeginning (peek ()))
-      get_char ()
-    else {
-      def id_buffer = StringBuilder ();
-      _ = id_buffer.Append (first_ch);
-
-      while (LexerBase.IsIdBeginning (peek ()) || 
-             Char.IsDigit (peek ()) || 
-             peek () == '\'')
-        _ = id_buffer.Append (read ());
-          
-      def str = id_buffer.ToString ();
-      
-      if (first_ch == '\'' && str.Length == 3 && str[2] == '\'')
-        SyntaxType.CharLiteral
-      else if (Manager.CoreEnv.IsKeyword (str))
-        SyntaxType.Keyword
-      else
-        SyntaxType.Identifier
-    }
-  }
-
-   
-  get_char () : SyntaxType
-  {
-    pos += 2;
-    SyntaxType.CharLiteral
-  }
-
-  get_string (end_ch : char) : SyntaxType
-  {
-    def loop () {
-      if (pos >= reader.Length)
-        SyntaxType.EndOfFile
-      else             
-        match (read ()) {
-          | '\\' => ++pos; loop ()
-          | ch when ch != end_ch => loop () 
-          | _ => SyntaxType.StringLiteral 
-        }
-    };
-    loop () 
-  }
-
-  get_monkey_string () : SyntaxType
-  {
-    def loop () {
-      match (read ()) {
-        | '"' =>
-          match (peek ()) {
-            | '"' =>
-              ++pos;
-              loop ()
-            | _ => SyntaxType.StringLiteral
-          }
-        | _ => loop ()
-      }
-    };
-
-    loop ()
-  }
-
-  comment_beginning (state : ref int) : char
-  {
-    match (peek ()) {
-      | '/' =>
-        // we are for sure in one line comment
-        while (pos < reader.Length && read () != '\n') {};
-        // pass whitespace, so next read would be eof checked
-        ' '
-
-      | '*' =>
-        // multiline comment
-        ++pos;
-        unless (eat_comment ()) state = 1;
-        // pass whitespace, so next read would be eof checked            
-        ' '
-
-      | _ => '/'
-    }
-  }
-
-  eat_comment () : bool 
-  {
-    def loop1 (seen_star) {
-      if (pos < reader.Length)
-        match (read ()) {
-          | '*' => loop1 (true)
-          | '/' when seen_star => true
-          | _ => loop1 (false)
-        }
-      else
-        false
-    };
-    loop1 (false);
-  }
-
-  get_token () : SyntaxType
-  {
-    if (pos >= reader.Length) SyntaxType.EndOfFile
-    else
-    match (peek ()) {
-      | '"' => ++pos; get_string ('"')
-
-      | '\'' => get_id ()
-
-      | '.' =>
-        ++pos;
-        if (Char.IsDigit (peek ()))
-          get_number ()
-        else
-          SyntaxType.Operator
-
-      | '@' =>
-        ++pos;
-        def next = peek ();
-        if (LexerBase.IsOperatorChar (next))
-          get_op ();
-        else if (LexerBase.IsIdBeginning (next)) {
-          _ = get_id ();
-          SyntaxType.Identifier
-        }
-        else if (next == '"') {
-          ++pos;
-          get_monkey_string ()
-        }
-        else
-          throw LexerBase.Error ("expecting operator, identifier or string literal after '@'")
-
-      | '{' | '}' | '[' | ']' 
-      | '(' | ')'
-        =>
-        ++pos; SyntaxType.OperatorBrace      
-
-      | ',' | ';' | '\\'
-        =>
-        ++pos; 
-        get_op ();
-     
-      | ' ' => SyntaxType.EndOfFile
-     
-      | ch =>
-        if (Char.IsDigit (ch))
-          get_number ()
-        else
-          if (LexerBase.IsIdBeginning (ch))
-            get_id ()
-          else
-            if (LexerBase.IsOperatorChar (ch)) 
-              get_op ()
-            else
-              throw LexerBase.Error ("invalid character")
-    }      
-  }
-
-  public GetSyntaxToken (state : ref int) : SyntaxToken {
-    mutable begin_pos = pos;
-    def loop (c) {
-      | ' ' | '\t' | '\r' | '\n' =>
-        //System.Diagnostics.Debug.WriteLine("loop(whitespace) " + c.ToString());
-        if (pos < reader.Length) {
-          ++begin_pos;
-          loop (read ())
-        } else
-          SyntaxType.EndOfFile
-            
-      | '/' =>
-        //System.Diagnostics.Debug.WriteLine("loop(/) " + c.ToString());
-        match (comment_beginning (ref state)) {
-          | '/' => loop (' ')
-          | _ => 
-            //++begin_pos; // loop (' ') increments in the FUCK case so we have too (but not here, dunno why...)
-            SyntaxType.Comment 
-        }
-        
-      | '#' =>
-        //System.Diagnostics.Debug.WriteLine("loop(#) " + c.ToString());
-        throw LexerBase.Error ("preprocessor directives may not occur in"
-                               " string programs")
-        
-      | _ =>
-        //System.Diagnostics.Debug.WriteLine("loop(else) " + c.ToString());
-        --pos; get_token ()
-    };
-    def tok =
-      if (pos >= reader.Length) {
-        //System.Diagnostics.Debug.WriteLine("if (pos >= reader.Length)");
-        SyntaxType.EndOfFile
-      } else if (state == 1) {
-        //System.Diagnostics.Debug.WriteLine("} else if (state == 1)");
-        ++begin_pos; // loop (' ') increments in the FUCK case so we have too...
-        when (eat_comment ()) state = 0;
-        SyntaxType.Comment
-      }
-      else {
-        //System.Diagnostics.Debug.WriteLine("else {");
-        try {
-          //FUCK: because it works as putting an extra char before then everything else has 
-          // to increment begin_pos, and later we have to substract 1
-          loop (' ');
-        }
-        catch {
-          | _ is LexerBase.Error => SyntaxType.EndOfFile
-        }
-      }
-    SyntaxToken (begin_pos-1, pos - 1, tok);
-  }  
-} // end class LexerColorizer
-
 } // end ns

Added: nemerle/trunk/ncc/parsing/LexerColorizer.n
==============================================================================
--- (empty file)
+++ nemerle/trunk/ncc/parsing/LexerColorizer.n	Tue May 30 15:09:05 2006
@@ -0,0 +1,435 @@
+using System.Text;
+using System.Globalization;
+using System;
+
+using Nemerle.Collections;
+using Nemerle.Utility;
+
+namespace Nemerle.Compiler {
+
+[ManagerAccess]
+public class LexerColorizer
+{
+  [Record]
+  public class SyntaxToken {
+    public StartPos : int;
+    public EndPos : int;
+    public Token : SyntaxType;
+  }
+
+  mutable reader : string;
+  mutable pos : int;
+  
+  public this (man : ManagerClass)
+  {
+    Manager = man;
+    reader = "";
+    pos = 0;
+  }
+
+  public SetString (text : string, offset : int) : void {
+    reader = text;
+    pos = offset;
+  }
+
+  read () : char
+  {
+    if (pos < reader.Length) {
+      def ch = reader[pos];
+      ++pos;
+      ch
+    } else throw LexerBase.Error ("unexpected end of code text")
+  }
+
+  peek () : char
+  {
+    if (pos < reader.Length)
+      reader[pos]
+    else 
+      (0 :> char) 
+  }
+
+  get_op () : SyntaxType
+  {
+    def loop () {
+      if (LexerBase.IsOperatorChar (peek ())) {
+        def c = read ();
+        if (c == '/' && (peek () == '/' || peek () == '*')) {
+          --pos;
+          SyntaxType.Operator
+        }
+        else
+          loop ();
+      }
+      else
+        SyntaxType.Operator
+    };
+    loop ();
+  }
+
+  get_number () : SyntaxType
+  {
+    mutable already_seen_type = false;  // for the case 0b0 vs 0b
+   
+    mutable mode =
+      match (read ()) {
+        | '.' => LexerBase.NumberMode.Float
+        | '0' =>
+          match (peek ()) {
+            | 'x' | 'X' => ++pos; LexerBase.NumberMode.Hex
+            | 'o' | 'O' => ++pos; LexerBase.NumberMode.Octal
+            | 'b' | 'B' =>
+              ++pos;
+              unless (Char.IsDigit (peek ())) already_seen_type = true;
+              LexerBase.NumberMode.Binary
+              
+            | x when Char.IsDigit (x) =>
+              Message.Warning ("trailing zeros look like"
+                               " octal modifiers, but they are not");
+              LexerBase.NumberMode.Decimal
+              
+            | _ => LexerBase.NumberMode.Decimal
+          }
+        | _ => LexerBase.NumberMode.Decimal
+      };
+    mutable last_was_digit = true;
+
+    // read digits and . between them if it is present
+    def loop () {
+      match (peek ()) {
+        | '.' => 
+          when (mode == LexerBase.NumberMode.Decimal) {
+            mode = LexerBase.NumberMode.Float;
+            ++pos;
+            if (Char.IsDigit (peek ()))
+              loop ()
+            else {
+              --pos;
+              mode = LexerBase.NumberMode.Decimal;
+            }
+          }
+
+        | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D'
+        | 'e' | 'E' | 'f' | 'F' =>
+          when (mode == LexerBase.NumberMode.Hex) {
+            ++pos;
+            loop ();
+          }
+
+        | x when Char.IsDigit (x) =>
+          ++pos;
+          loop ()
+        | _ => ()
+      }
+    };
+    loop ();
+    
+    def exponent_part (only_realsuf) {
+      when (!only_realsuf) {
+        match (peek ()) {
+          | 'E' | 'e' =>
+            ++pos;
+            match (peek ()) {
+              | '+' | '-' => ++pos;
+              | _ => ()
+            };
+            if (Char.IsDigit (peek ()))
+              do {
+                ++pos;
+              } while (Char.IsDigit (peek ()))
+            else
+              throw LexerBase.Error ("no digits after exponent sign in float literal")
+          | _ => ()
+        }
+      };
+      SyntaxType.FloatLiteral
+    };
+
+    def check_type_suffix () {
+      /// we should have integer number here
+
+      def special (c) {
+        | 'l' | 's' | 'b' | 'u' => true
+        | _ => false
+      };
+
+      // check suffixes to make special types conversions
+      mutable ch =
+        if (already_seen_type)
+          'b'
+        else
+          Char.ToLower (peek (), CultureInfo.InvariantCulture);
+      if (special (ch)) {
+        unless (already_seen_type) ++pos;
+
+        // we can have two letter suffixes
+        def ch' = Char.ToLower (peek (), CultureInfo.InvariantCulture);
+        when (special (ch')) ++pos;
+        SyntaxType.IntLiteral
+      }
+      else
+        SyntaxType.IntLiteral
+    };
+    
+    match (mode) {
+      | LexerBase.NumberMode.Float =>
+        match (peek ()) {
+          | 'E' | 'e' => 
+            exponent_part (false)
+          | _ =>
+            exponent_part (true)
+        }
+      | LexerBase.NumberMode.Decimal =>
+        if (last_was_digit)
+          match (Char.ToLower (peek (), CultureInfo.InvariantCulture)) {
+            | 'e' =>
+              exponent_part (false)
+            | 'f' | 'd' | 'm' =>
+              exponent_part (true)
+            | _ =>
+              check_type_suffix ()
+          }
+        else
+          SyntaxType.IntLiteral
+
+      | LexerBase.NumberMode.Hex 
+      | LexerBase.NumberMode.Binary 
+      | LexerBase.NumberMode.Octal => check_type_suffix ()
+    }
+  }
+
+  get_id () : SyntaxType
+  {
+    def first_ch = read ();
+    if (first_ch == '\'' && !LexerBase.IsIdBeginning (peek ()))
+      get_char ()
+    else {
+      def id_buffer = StringBuilder ();
+      _ = id_buffer.Append (first_ch);
+
+      while (LexerBase.IsIdBeginning (peek ()) || 
+             Char.IsDigit (peek ()) || 
+             peek () == '\'')
+        _ = id_buffer.Append (read ());
+          
+      def str = id_buffer.ToString ();
+      
+      if (first_ch == '\'' && str.Length == 3 && str[2] == '\'')
+        SyntaxType.CharLiteral
+      else if (Manager.CoreEnv.IsKeyword (str))
+        SyntaxType.Keyword
+      else
+        SyntaxType.Identifier
+    }
+  }
+
+   
+  get_char () : SyntaxType
+  {
+    pos += 2;
+    SyntaxType.CharLiteral
+  }
+
+  get_string (end_ch : char) : SyntaxType
+  {
+    def loop () {
+      if (pos >= reader.Length)
+        SyntaxType.EndOfFile
+      else             
+        match (read ()) {
+          | '\\' => ++pos; loop ()
+          | ch when ch != end_ch => loop () 
+          | _ => SyntaxType.StringLiteral 
+        }
+    };
+    loop () 
+  }
+
+  get_monkey_string () : SyntaxType
+  {
+    def loop () {
+      match (read ()) {
+        | '"' =>
+          match (peek ()) {
+            | '"' =>
+              ++pos;
+              loop ()
+            | _ => SyntaxType.StringLiteral
+          }
+        | _ => loop ()
+      }
+    };
+
+    loop ()
+  }
+
+  comment_beginning (state : ref int) : char
+  {
+    match (peek ()) {
+      | '/' =>
+        // we are for sure in one line comment
+        while (pos < reader.Length && read () != '\n') {};
+        // pass whitespace, so next read would be eof checked
+        ' '
+
+      | '*' =>
+        // multiline comment
+        ++pos;
+        unless (eat_comment ()) state = 1;
+        // pass whitespace, so next read would be eof checked            
+        ' '
+
+      | _ => '/'
+    }
+  }
+
+  eat_comment () : bool 
+  {
+    def loop1 (seen_star) {
+      if (pos < reader.Length)
+        match (read ()) {
+          | '*' => loop1 (true)
+          | '/' when seen_star => true
+          | _ => loop1 (false)
+        }
+      else
+        false
+    };
+    loop1 (false);
+  }
+
+  get_token () : SyntaxType
+  {
+    if (pos >= reader.Length) SyntaxType.EndOfFile
+    else
+    match (peek ()) {
+      | '"' => ++pos; get_string ('"')
+
+      | '\'' => get_id ()
+
+      | '.' =>
+        ++pos;
+        if (Char.IsDigit (peek ()))
+          get_number ()
+        else
+          SyntaxType.Operator
+
+      | '@' =>
+        ++pos;
+        def next = peek ();
+        if (LexerBase.IsOperatorChar (next))
+          get_op ();
+        else if (LexerBase.IsIdBeginning (next)) {
+          _ = get_id ();
+          SyntaxType.Identifier
+        }
+        else if (next == '"') {
+          ++pos;
+          get_monkey_string ()
+        }
+        else
+          throw LexerBase.Error ("expecting operator, identifier or string literal after '@'")
+
+      | '{' | '}' | '[' | ']' 
+      | '(' | ')'
+        =>
+        ++pos; SyntaxType.OperatorBrace      
+
+      | ',' | ';' | '\\'
+        =>
+        ++pos; 
+        get_op ();
+     
+      | ' ' => SyntaxType.EndOfFile
+     
+      | ch =>
+        if (Char.IsDigit (ch))
+          get_number ()
+        else
+          if (LexerBase.IsIdBeginning (ch))
+            get_id ()
+          else
+            if (LexerBase.IsOperatorChar (ch)) 
+              get_op ()
+            else
+              throw LexerBase.Error ("invalid character")
+    }      
+  }
+
+  public GetSyntaxToken (state : ref int) : SyntaxToken {
+    mutable begin_pos = pos;
+    def loop (c) {
+      | ' ' | '\t' | '\r' | '\n' =>
+        //System.Diagnostics.Debug.WriteLine("loop(whitespace) " + c.ToString());
+        if (pos < reader.Length) {
+          ++begin_pos;
+          loop (read ())
+        } else
+          SyntaxType.EndOfFile
+            
+      | '/' =>
+        //System.Diagnostics.Debug.WriteLine("loop(/) " + c.ToString());
+        match (comment_beginning (ref state)) {
+          | '/' => loop (' ')
+          | _ => 
+            //++begin_pos; // loop (' ') increments in the FUCK case so we have too (but not here, dunno why...)
+            SyntaxType.Comment 
+        }
+        
+      | '#' =>
+        //System.Diagnostics.Debug.WriteLine("loop(#) " + c.ToString());
+        throw LexerBase.Error ("preprocessor directives may not occur in"
+                               " string programs")
+        
+      | _ =>
+        //System.Diagnostics.Debug.WriteLine("loop(else) " + c.ToString());
+        --pos; get_token ()
+    };
+    def tok =
+      if (pos >= reader.Length) {
+        //System.Diagnostics.Debug.WriteLine("if (pos >= reader.Length)");
+        SyntaxType.EndOfFile
+      } else if (state == 1) {
+        //System.Diagnostics.Debug.WriteLine("} else if (state == 1)");
+        ++begin_pos; // loop (' ') increments in the FUCK case so we have too...
+        when (eat_comment ()) state = 0;
+        SyntaxType.Comment
+      }
+      else {
+        //System.Diagnostics.Debug.WriteLine("else {");
+        try {
+          //FUCK: because it works as putting an extra char before then everything else has 
+          // to increment begin_pos, and later we have to substract 1
+          loop (' ');
+        }
+        catch {
+          | _ is LexerBase.Error => SyntaxType.EndOfFile
+        }
+      }
+    SyntaxToken (begin_pos-1, pos - 1, tok);
+  }  
+} // end class LexerColorizer
+
+public class LexerHack
+{
+  manager : ManagerClass;
+  public lexer : LexerColorizer;
+  
+  public this() {
+    manager = ManagerClass(CompilationOptions());
+    ManagerClass.Instance = manager;
+    manager.InitCompiler();
+    manager.CoreEnv = GlobalEnv.CreateCore(manager.NameTree);
+    // done in ManagerClass
+    //LibraryReferenceManager.LoadMacrosFrom("Nemerle.Macros");
+    // what happened with GlobalEnv.Init()???
+    lexer = LexerColorizer(manager);
+  }
+
+  public CreateNewPreparser(text : string) : PreParser {
+    def lex = LexerString(manager, text, Location(0, 1, 1));
+    PreParser(lex);
+  }
+}
+
+} // end ns



More information about the svn mailing list