[svn] r5827: nemerle/trunk/ncc: CompilationOptions.n parsing/Lexer.n parsing/MainParser.n parsing/PreParse...

malekith svnadmin at nemerle.org
Tue Oct 18 13:45:15 CEST 2005


Log:
Initial checkout of patches by Ellis Whitehead with support for python-like (or haskell-like ;-) syntax.

Author: malekith
Date: Tue Oct 18 13:45:12 2005
New Revision: 5827

Added:
   nemerle/trunk/ncc/testsuite/positive/indentation-syntax.n
Modified:
   nemerle/trunk/ncc/CompilationOptions.n
   nemerle/trunk/ncc/parsing/Lexer.n
   nemerle/trunk/ncc/parsing/MainParser.n
   nemerle/trunk/ncc/parsing/PreParser.n
   nemerle/trunk/ncc/passes.n

Modified: nemerle/trunk/ncc/CompilationOptions.n
==============================================================================
--- nemerle/trunk/ncc/CompilationOptions.n	(original)
+++ nemerle/trunk/ncc/CompilationOptions.n	Tue Oct 18 13:45:12 2005
@@ -53,6 +53,7 @@
     public mutable CompileToMemory : bool;
     public mutable EarlyExit : bool;
     public mutable GreedyReferences : bool;
+    public mutable IndentationSyntax : bool;
     // do not unload external libraries in consecutive compilations
     public mutable PersistentLibraries : bool; 
 
@@ -97,6 +98,7 @@
       DumpNamedMethod = "";
       AdditionalDebug = false;
       PersistentLibraries = false;
+      IndentationSyntax = false;
 
       LinkedResources = [];
       EmbeddedResources = [];
@@ -273,6 +275,11 @@
       }
 
       [
+        Getopt.CliOption.Flag (name = "-indentation-syntax", 
+                               aliases = ["-i"],
+                               help = "Turn on indentation-based syntax (similar to Python)",
+                               handler = fun () { Options.IndentationSyntax = true; }),
+
         Getopt.CliOption.String (name = "-out",
                        aliases = ["-o"],
                        help = "Output file name",

Modified: nemerle/trunk/ncc/parsing/Lexer.n
==============================================================================
--- nemerle/trunk/ncc/parsing/Lexer.n	(original)
+++ nemerle/trunk/ncc/parsing/Lexer.n	Tue Oct 18 13:45:12 2005
@@ -50,10 +50,10 @@
 
   | Comment { value : string; } 
 
-  | Semicolon
+  | Semicolon { generated : bool; }
   | Comma
-  | BeginBrace    // {
-  | EndBrace      // }
+  | BeginBrace { generated : bool; }   // {
+  | EndBrace { generated : bool; }     // }
   | BeginRound    // (
   | EndRound      // )
   | BeginSquare   // [
@@ -61,6 +61,8 @@
   | BeginQuote    // <[
   | EndQuote      // ]>
 
+  | Indent { value : string; }
+
   | RoundGroup { Child : Token; }     // ( ... )
   | BracesGroup { Child : Token; }    // { ... }
   | SquareGroup { mutable Child : Token; }    // [ ... ]
@@ -111,6 +113,8 @@
       | BeginQuote     => "<["
       | EndQuote       => "]>"
 
+      | Indent => "indentation"
+      
       | RoundGroup | BracesGroup | SquareGroup | QuoteGroup | LooseGroup =>
         PreParser.Dump (this, "")
 
@@ -146,12 +150,31 @@
         | EndOfFile => "end of file"
         | EndOfGroup => "separator or closing bracket"
         | Comment => "documentation comment"
-        | Semicolon   | Comma      | BeginBrace     
-        | EndBrace    | BeginRound | EndRound       
+        | Comma       | BeginRound | EndRound       
         | BeginSquare | EndSquare  | BeginQuote     
         | EndQuote  =>
           "operator `" + ToString () + "'"
 
+        | BeginBrace (generated) =>
+          if (generated)
+            "group begin"
+          else
+            "operator `" + ToString () + "'"
+        
+        | EndBrace (generated) =>
+          if (generated)
+            "group end"
+          else
+            "operator `" + ToString () + "'"
+        
+        | Semicolon (generated) =>
+          if (generated)
+            "end of statement"
+          else
+            "operator `" + ToString () + "'"
+
+        | Indent (value) => $"indentation `$value'"
+        
         | BracesGroup => "`{...}' group"          
         | RoundGroup => "`(...)' group"
         | SquareGroup => "`[...]' group"
@@ -260,7 +283,7 @@
 
   public abstract Dispose () : void;
   
-  protected read () : char
+  protected virtual read () : char
   {
     def ch = 
       if (!putback)
@@ -740,7 +763,7 @@
 
   /// returns true if there is some character in the input pending
   /// or false if we are at the end of file
-  protected eat_whitespace () : bool
+  protected virtual eat_whitespace () : bool
   {
     mutable eof = false;
     
@@ -803,7 +826,7 @@
     loop (false)
   }
 
-  protected do_get_token () : Token
+  virtual protected do_get_token () : Token
   {
     def ch = 
       if (isPendingChar) {
@@ -825,8 +848,8 @@
         else
           Token.Operator (".")
 
-      | '{' => Token.BeginBrace ()
-      | '}' => Token.EndBrace ()
+      | '{' => Token.BeginBrace (generated = false)
+      | '}' => Token.EndBrace (generated = false)
       | '[' => Token.BeginSquare ()
       | ']' =>
         if (peek () == '>') {
@@ -838,7 +861,7 @@
       | '(' => Token.BeginRound ()
       | ')' => Token.EndRound ()
       | ',' => Token.Comma ()
-      | ';' => Token.Semicolon ()
+      | ';' => Token.Semicolon (generated = false)
 
       | '<' when peek () == '[' => ignore (read ()); Token.BeginQuote ()
         
@@ -1505,6 +1528,86 @@
   }
 }
 
+
+public class LexerFileIndent : LexerFile
+{
+  // Variables for indentation-based syntax
+  // The location where a token should be automatically inserted
+  protected mutable insertLocation : Location;
+  // Used by GetToken() to store that token that it postpones when insert_indent = true
+  protected mutable tokPending : Token;
+  // Whether we are currently adding whitespaces to indentString
+  protected mutable countingIndentation : bool;
+  // Whether GetToken() should return a Token.Indent
+  protected mutable insert_indent : bool;
+  // String holding the current line's indent
+  protected mutable indentString : string;
+  
+  
+  public this (fn : string)
+  {
+    base (fn);
+    countingIndentation = true;
+    indentString = "";
+  }
+  
+  override protected read () : char
+  {
+    def ch = base.read ();
+    //Message.Debug (Location (file_idx, line, col), $"ch = $ch");
+    
+    // Start counting indentation anew after a newline
+    if (ch == '\n' || ch == '\r') {
+      indentString = "";
+      countingIndentation = true;
+      //Message.Debug (Location (file_idx, line, col), "countingIndentation ON");
+    }
+    else when (countingIndentation) {
+      if (ch == '\t' || ch == ' ')
+        indentString += ch.ToString();
+      // We have a non-space character, so stop counting indentation
+      //  and indicate that we have a new line to process
+      else {
+        //Message.Debug (Location (file_idx, line, col), $"countingIndentation OFF: indentColNew = $indentColNew");
+        countingIndentation = false;
+        insert_indent = true;
+      }
+    }
+    
+    ch
+  }
+        
+  override protected eat_whitespace () : bool
+  {
+    // Location where possible automatic token insertion will take place
+    insertLocation = Location (file_idx, line, col);
+    
+    base.eat_whitespace ();
+  }
+
+  public override GetToken () : Token
+  {
+    if (tokPending != null) {
+      def tok = tokPending;
+      tokPending = null;
+      tok;
+    }
+    else {
+      def tok = base.GetToken ();
+      if (insert_indent) {
+        tokPending = tok;
+        insert_indent = false;
+        mutable tokIndent = Token.Indent (indentString);
+        tokIndent.Location = insertLocation;
+        tokIndent;
+      }
+      else
+        tok;
+    }
+  }
+} // LexerFileIndent
+
+
 public enum SyntaxType {
   | Identifier 
   | Keyword 

Modified: nemerle/trunk/ncc/parsing/MainParser.n
==============================================================================
--- nemerle/trunk/ncc/parsing/MainParser.n	(original)
+++ nemerle/trunk/ncc/parsing/MainParser.n	Tue Oct 18 13:45:12 2005
@@ -138,7 +138,11 @@
      */
     public static Parse (lex : LexerBase) : list [TopDeclaration]
     {
-      def preparser = PreParser (lex);
+      def preparser =
+        if (Options.IndentationSyntax)
+          PreParserIndent (lex)
+        else
+          PreParser (lex);
       def parser = MainParser (GlobalEnv.Core);
       
       mutable result = [];

Modified: nemerle/trunk/ncc/parsing/PreParser.n
==============================================================================
--- nemerle/trunk/ncc/parsing/PreParser.n	(original)
+++ nemerle/trunk/ncc/parsing/PreParser.n	Tue Oct 18 13:45:12 2005
@@ -47,8 +47,8 @@
    */
   public class PreParser
   {
-    lexer : LexerBase;
-    mutable last_token : Token = null;
+    protected lexer : LexerBase;
+    protected mutable last_token : Token = null;
     mutable Env : GlobalEnv;
 
     mutable finished : bool = false;
@@ -93,7 +93,7 @@
     }
 
     /** Fetch next token (from one token buffer or lexer if it's empty */
-    get_token () : Token {
+    protected virtual get_token () : Token {
       if (last_token != null) {
         def result = last_token;
         last_token = null;
@@ -449,4 +449,251 @@
       top
     }
   }
+  
+  public class PreParserIndent : PreParser {
+    // For indentation syntax
+    mutable insert_unindents : int;
+    mutable indent_level : int;
+    mutable new_indent : string;
+    mutable current_indent : string;
+    // The number of unmatched { ( [ tokens found in the user code at this point
+    mutable explicit_groups : int;
+    static mutable indentation_syntax_active : bool;
+    mutable insertLocation : Location;
+    mutable token_pending : Token;
+    mutable last_real_tok : Token;
+    mutable indent_strings : System.Collections.ArrayList;
+    mutable set_namespace : bool;
+    mutable set_class : bool;
+    mutable force_brace_after_newline : bool;
+
+    public this (lex : LexerBase) {
+      base (lex);
+      
+      indentation_syntax_active = true;
+      new_indent = "";
+      current_indent = "";
+      indent_strings = System.Collections.ArrayList (20);
+      _ = indent_strings.Add("");
+    }
+
+    /** Fetch next token (from one token buffer or lexer if it's empty */
+    protected override get_token () : Token {
+      mutable lexer_tok = null;
+      mutable source = 0;
+      
+      mutable tok =
+        if (insert_unindents > 0) {
+          insert_unindents--;
+          indent_level--;
+          //Message.Debug (insertLocation, "Generate '}'");
+          Token.EndBrace (true);
+        }
+        // last_token has priority over token_pending
+        else if (last_token != null) {
+          source = 2;
+          def result = last_token;
+          last_token = null;
+          result;
+        }
+        else if (token_pending != null) {
+          source = 1;
+          def result = token_pending;
+          token_pending = null;
+          result;
+        }
+        else {
+          source = 3;
+          try {
+            lexer_tok = lexer.GetToken ();
+            match (lexer_tok) {
+              | Token.Indent as t =>
+                if (indentation_syntax_active) {
+                  if (force_brace_after_newline) {
+                    force_brace_after_newline = false;
+                    Token.BeginBrace (true);
+                  }
+                  else {
+                    new_indent = t.value;
+                    insertLocation = lexer_tok.Location;
+                    //Message.Debug (insertLocation, "Calling get_token_after_indent ()");
+                    source = 4;
+                    get_token_after_indent ();
+                  }
+                }
+                else
+                  get_token();
+              
+              | Token.EndOfFile =>
+                insert_unindents = indent_level;
+                when (set_class)
+                  insert_unindents++;
+                when (set_namespace)
+                  insert_unindents++;
+                if (insert_unindents > 0) {
+                  insertLocation = lexer_tok.Location;
+                  token_pending = lexer_tok;
+                  Token.EndBrace (true);
+                }
+                else {
+                  last_real_tok = lexer_tok;
+                  lexer_tok
+                }
+                
+              | BeginBrace
+              | BeginRound
+              | BeginSquare
+              | BeginQuote =>
+                explicit_groups++;
+                indentation_syntax_active = false;
+                last_real_tok = lexer_tok;
+                lexer_tok
+                
+              | EndBrace
+              | EndRound
+              | EndSquare
+              | EndQuote =>
+                when (explicit_groups == 0)
+                  // TODO: better error message
+                  throw PreParserException (lexer_tok.Location, "unmatched group end");
+                explicit_groups--;
+                when (explicit_groups == 0)
+                  indentation_syntax_active = true;
+                last_real_tok = lexer_tok;
+                lexer_tok
+                
+              | _ =>
+                last_real_tok = lexer_tok;
+                lexer_tok
+            }
+          }
+          catch {
+            | e is LexerBase.Error =>
+              Message.Error (lexer.Location, e.name);
+              get_token ()
+          }
+        }
+      
+      //Message.Debug (tok.ToString() + ", " + tok.GetType().ToString() + ", " + new_indent.Length.ToString());
+      // If this is the 'set' keyword in the first column of a new line:
+      //when (new_indent.Length == 0 && tok is Token.Identifier && (tok :> Token.Identifier).name == "set") {
+      when (current_indent.Length == 0 && tok.ToString() == "set") {
+        def tokNext = lexer.GetToken ();
+        match (tokNext) {
+          | Token.Keyword ("namespace") =>
+            when (set_namespace) 
+              throw PreParserException (tokNext.Location, "the 'set namespace' directive can only be used once per file");
+            set_namespace = true;
+            //tokNext = get_token_after_indent (tokNext);
+            force_brace_after_newline = true;
+          
+          | Token.Keyword ("class") =>
+            when (set_class) 
+              throw PreParserException (tokNext.Location, "the 'set class' directive can only be used once per file");
+            set_class = true;
+            //tokNext = get_token_after_indent (tokNext);
+            force_brace_after_newline = true;
+          
+          | _ =>
+            throw PreParserException (tokNext.Location, $"unrecognized 'set' directive: '$tokNext'");
+        }
+        tok = tokNext;
+      }
+        
+      //Message.Debug (tok.Location, tok.ToString() + $" ($source)");
+      tok;
+    }
+
+    get_token_after_indent () : Token {
+      //Message.Debug ("1");
+      mutable tok = lexer.GetToken ();
+      
+      //Message.Debug ("2");
+      while (tok is Token.Indent || tok is Token.Comment) {
+        // TODO: what's the best way to do this?
+        match (tok) {
+          | Token.Indent (value) => new_indent = value;
+          | _ => ()
+        }
+        tok = lexer.GetToken ();
+      }
+      
+      when (!force_brace_after_newline)
+        tok = get_token_after_indent (tok);
+      
+      tok;
+    }
+    
+    get_token_after_indent (tok : Token) : Token {
+      // If we have not unindented
+      if (new_indent.Length >= current_indent.Length) {
+        // Make sure that the beginning of the new indent string
+        //  starts with the contents of the current indent string.
+        when (!new_indent.StartsWith(current_indent)) {
+          // TODO: make a better error message -- checkout python error message
+          Message.Error (tok.Location, "inconsistent indentation");
+          current_indent = new_indent;
+        }
+        
+        // If we have remained at the same indentation level
+        if (new_indent == current_indent) {
+          if (last_real_tok != null) {
+            token_pending = tok;
+            //Message.Debug (tok.Location, $"Generate ';'");
+            mutable tokInsert = 
+              /*if (force_brace_after_newline) {
+                force_brace_after_newline = false;
+                Token.BeginBrace (true);
+              }
+              else*/
+                Token.Semicolon (true);
+            tokInsert.Location = insertLocation;
+            tokInsert;
+          }
+          else
+            tok;
+        }
+        // If we've indented further than the previous line
+        else {
+          match (tok) {
+            | Token.Operator =>
+              tok;
+            
+            | _ =>
+              indent_level++;
+              _ = indent_strings.Add(new_indent);
+              current_indent = new_indent;
+              token_pending = tok;
+              //Message.Debug (tok.Location, "Generate '{'");
+              mutable tokInsert = Token.BeginBrace (true);
+              tokInsert.Location = insertLocation;
+              tokInsert;
+          }
+        }
+      }
+      // Otherwise, we've unintented:
+      else {
+        def i = indent_strings.IndexOf(new_indent);
+        when (i == -1) {
+          // TODO: make a better error message -- checkout python error message
+          throw PreParserException (tok.Location, "inconsistent indentation");
+        }
+        
+        insert_unindents = indent_level - i;
+        indent_level = i;
+        indent_strings.RemoveRange(i + 1, indent_strings.Count - (i + 1));
+        //while (indent_strings.Length > i + 1)
+        //  indent_strings.Pop ();
+        
+        current_indent = new_indent;
+        token_pending = tok;
+        //Message.Debug (tok.Location, $"Generate $insert_unindents '}'");
+        insert_unindents--;
+        mutable tokInsert = Token.EndBrace (true);
+        tokInsert.Location = insertLocation;
+        tokInsert;
+      }
+    }
+    
+  }
 }

Modified: nemerle/trunk/ncc/passes.n
==============================================================================
--- nemerle/trunk/ncc/passes.n	(original)
+++ nemerle/trunk/ncc/passes.n	Tue Oct 18 13:45:12 2005
@@ -77,6 +77,9 @@
     
     /// initialize pipelines with default values
     this () {
+      if (Options.IndentationSyntax)
+        LexingPipeline = LexerFileIndent;
+      else
       LexingPipeline = LexerFile;
       ParsingPipeline = MainParser.Parse;
       ScanningPipeline = ScanTypeHierarchy.ProcessDeclaration;

Added: nemerle/trunk/ncc/testsuite/positive/indentation-syntax.n
==============================================================================
--- (empty file)
+++ nemerle/trunk/ncc/testsuite/positive/indentation-syntax.n	Tue Oct 18 13:45:12 2005
@@ -0,0 +1,49 @@
+// OPTIONS: -i
+// 3. With initial comment and spaces between lines
+// 4. Add a condition with a single statement beneath it
+// 5. Add nested conditions to test whether multi-unindenting works
+// 6. Add 'set' keyword for namespace and class
+// 7. Add two kinds of line-continuation + missing newline at end of file
+
+using System.Console
+
+/*
+BEGIN-OUTPUT
+Hello, World!
+n > 0
+c = 3, d = 4
+END-OUTPUT
+*/
+
+set namespace Test
+
+set class App
+
+static Main() : void
+	// 3.
+	Write("Hello")
+
+	// 4.
+	def n = 1
+	when (n > 0)
+		Write(", World")
+	WriteLine("!")
+
+	// 5.
+	if (n > 0)
+		WriteLine("n > 0")
+		when (n > 1)
+			WriteLine("n > 1")
+	else
+		WriteLine("Zero")
+
+	def a = 1
+	def b = 2
+	def c = (
+		a +
+		b
+		)
+	def d = a +
+		c
+	WriteLine($"c = $c, d = $d")
+



More information about the svn mailing list