[svn] r5827: nemerle/trunk/ncc: CompilationOptions.n
parsing/Lexer.n parsing/MainParser.n parsing/PreParse...
malekith
svnadmin at nemerle.org
Tue Oct 18 13:45:15 CEST 2005
Log:
Initial checkout of patches by Ellis Whitehead with support for python-like (or haskell-like ;-) syntax.
Author: malekith
Date: Tue Oct 18 13:45:12 2005
New Revision: 5827
Added:
nemerle/trunk/ncc/testsuite/positive/indentation-syntax.n
Modified:
nemerle/trunk/ncc/CompilationOptions.n
nemerle/trunk/ncc/parsing/Lexer.n
nemerle/trunk/ncc/parsing/MainParser.n
nemerle/trunk/ncc/parsing/PreParser.n
nemerle/trunk/ncc/passes.n
Modified: nemerle/trunk/ncc/CompilationOptions.n
==============================================================================
--- nemerle/trunk/ncc/CompilationOptions.n (original)
+++ nemerle/trunk/ncc/CompilationOptions.n Tue Oct 18 13:45:12 2005
@@ -53,6 +53,7 @@
public mutable CompileToMemory : bool;
public mutable EarlyExit : bool;
public mutable GreedyReferences : bool;
+ public mutable IndentationSyntax : bool;
// do not unload external libraries in consecutive compilations
public mutable PersistentLibraries : bool;
@@ -97,6 +98,7 @@
DumpNamedMethod = "";
AdditionalDebug = false;
PersistentLibraries = false;
+ IndentationSyntax = false;
LinkedResources = [];
EmbeddedResources = [];
@@ -273,6 +275,11 @@
}
[
+ Getopt.CliOption.Flag (name = "-indentation-syntax",
+ aliases = ["-i"],
+ help = "Turn on indentation-based syntax (similar to Python)",
+ handler = fun () { Options.IndentationSyntax = true; }),
+
Getopt.CliOption.String (name = "-out",
aliases = ["-o"],
help = "Output file name",
Modified: nemerle/trunk/ncc/parsing/Lexer.n
==============================================================================
--- nemerle/trunk/ncc/parsing/Lexer.n (original)
+++ nemerle/trunk/ncc/parsing/Lexer.n Tue Oct 18 13:45:12 2005
@@ -50,10 +50,10 @@
| Comment { value : string; }
- | Semicolon
+ | Semicolon { generated : bool; }
| Comma
- | BeginBrace // {
- | EndBrace // }
+ | BeginBrace { generated : bool; } // {
+ | EndBrace { generated : bool; } // }
| BeginRound // (
| EndRound // )
| BeginSquare // [
@@ -61,6 +61,8 @@
| BeginQuote // <[
| EndQuote // ]>
+ | Indent { value : string; }
+
| RoundGroup { Child : Token; } // ( ... )
| BracesGroup { Child : Token; } // { ... }
| SquareGroup { mutable Child : Token; } // [ ... ]
@@ -111,6 +113,8 @@
| BeginQuote => "<["
| EndQuote => "]>"
+ | Indent => "indentation"
+
| RoundGroup | BracesGroup | SquareGroup | QuoteGroup | LooseGroup =>
PreParser.Dump (this, "")
@@ -146,12 +150,31 @@
| EndOfFile => "end of file"
| EndOfGroup => "separator or closing bracket"
| Comment => "documentation comment"
- | Semicolon | Comma | BeginBrace
- | EndBrace | BeginRound | EndRound
+ | Comma | BeginRound | EndRound
| BeginSquare | EndSquare | BeginQuote
| EndQuote =>
"operator `" + ToString () + "'"
+ | BeginBrace (generated) =>
+ if (generated)
+ "group begin"
+ else
+ "operator `" + ToString () + "'"
+
+ | EndBrace (generated) =>
+ if (generated)
+ "group end"
+ else
+ "operator `" + ToString () + "'"
+
+ | Semicolon (generated) =>
+ if (generated)
+ "end of statement"
+ else
+ "operator `" + ToString () + "'"
+
+ | Indent (value) => $"indentation `$value'"
+
| BracesGroup => "`{...}' group"
| RoundGroup => "`(...)' group"
| SquareGroup => "`[...]' group"
@@ -260,7 +283,7 @@
public abstract Dispose () : void;
- protected read () : char
+ protected virtual read () : char
{
def ch =
if (!putback)
@@ -740,7 +763,7 @@
/// returns true if there is some character in the input pending
/// or false if we are at the end of file
- protected eat_whitespace () : bool
+ protected virtual eat_whitespace () : bool
{
mutable eof = false;
@@ -803,7 +826,7 @@
loop (false)
}
- protected do_get_token () : Token
+ virtual protected do_get_token () : Token
{
def ch =
if (isPendingChar) {
@@ -825,8 +848,8 @@
else
Token.Operator (".")
- | '{' => Token.BeginBrace ()
- | '}' => Token.EndBrace ()
+ | '{' => Token.BeginBrace (generated = false)
+ | '}' => Token.EndBrace (generated = false)
| '[' => Token.BeginSquare ()
| ']' =>
if (peek () == '>') {
@@ -838,7 +861,7 @@
| '(' => Token.BeginRound ()
| ')' => Token.EndRound ()
| ',' => Token.Comma ()
- | ';' => Token.Semicolon ()
+ | ';' => Token.Semicolon (generated = false)
| '<' when peek () == '[' => ignore (read ()); Token.BeginQuote ()
@@ -1505,6 +1528,86 @@
}
}
+
+public class LexerFileIndent : LexerFile
+{
+ // Variables for indentation-based syntax
+ // The location where a token should be automatically inserted
+ protected mutable insertLocation : Location;
+ // Used by GetToken() to store that token that it postpones when insert_indent = true
+ protected mutable tokPending : Token;
+ // Whether we are currently adding whitespaces to indentString
+ protected mutable countingIndentation : bool;
+ // Whether GetToken() should return a Token.Indent
+ protected mutable insert_indent : bool;
+ // String holding the current line's indent
+ protected mutable indentString : string;
+
+
+ public this (fn : string)
+ {
+ base (fn);
+ countingIndentation = true;
+ indentString = "";
+ }
+
+ override protected read () : char
+ {
+ def ch = base.read ();
+ //Message.Debug (Location (file_idx, line, col), $"ch = $ch");
+
+ // Start counting indentation anew after a newline
+ if (ch == '\n' || ch == '\r') {
+ indentString = "";
+ countingIndentation = true;
+ //Message.Debug (Location (file_idx, line, col), "countingIndentation ON");
+ }
+ else when (countingIndentation) {
+ if (ch == '\t' || ch == ' ')
+ indentString += ch.ToString();
+ // We have a non-space character, so stop counting indentation
+ // and indicate that we have a new line to process
+ else {
+ //Message.Debug (Location (file_idx, line, col), $"countingIndentation OFF: indentColNew = $indentColNew");
+ countingIndentation = false;
+ insert_indent = true;
+ }
+ }
+
+ ch
+ }
+
+ override protected eat_whitespace () : bool
+ {
+ // Location where possible automatic token insertion will take place
+ insertLocation = Location (file_idx, line, col);
+
+ base.eat_whitespace ();
+ }
+
+ public override GetToken () : Token
+ {
+ if (tokPending != null) {
+ def tok = tokPending;
+ tokPending = null;
+ tok;
+ }
+ else {
+ def tok = base.GetToken ();
+ if (insert_indent) {
+ tokPending = tok;
+ insert_indent = false;
+ mutable tokIndent = Token.Indent (indentString);
+ tokIndent.Location = insertLocation;
+ tokIndent;
+ }
+ else
+ tok;
+ }
+ }
+} // LexerFileIndent
+
+
public enum SyntaxType {
| Identifier
| Keyword
Modified: nemerle/trunk/ncc/parsing/MainParser.n
==============================================================================
--- nemerle/trunk/ncc/parsing/MainParser.n (original)
+++ nemerle/trunk/ncc/parsing/MainParser.n Tue Oct 18 13:45:12 2005
@@ -138,7 +138,11 @@
*/
public static Parse (lex : LexerBase) : list [TopDeclaration]
{
- def preparser = PreParser (lex);
+ def preparser =
+ if (Options.IndentationSyntax)
+ PreParserIndent (lex)
+ else
+ PreParser (lex);
def parser = MainParser (GlobalEnv.Core);
mutable result = [];
Modified: nemerle/trunk/ncc/parsing/PreParser.n
==============================================================================
--- nemerle/trunk/ncc/parsing/PreParser.n (original)
+++ nemerle/trunk/ncc/parsing/PreParser.n Tue Oct 18 13:45:12 2005
@@ -47,8 +47,8 @@
*/
public class PreParser
{
- lexer : LexerBase;
- mutable last_token : Token = null;
+ protected lexer : LexerBase;
+ protected mutable last_token : Token = null;
mutable Env : GlobalEnv;
mutable finished : bool = false;
@@ -93,7 +93,7 @@
}
/** Fetch next token (from one token buffer or lexer if it's empty */
- get_token () : Token {
+ protected virtual get_token () : Token {
if (last_token != null) {
def result = last_token;
last_token = null;
@@ -449,4 +449,251 @@
top
}
}
+
+ public class PreParserIndent : PreParser {
+ // For indentation syntax
+ mutable insert_unindents : int;
+ mutable indent_level : int;
+ mutable new_indent : string;
+ mutable current_indent : string;
+ // The number of unmatched { ( [ tokens found in the user code at this point
+ mutable explicit_groups : int;
+ static mutable indentation_syntax_active : bool;
+ mutable insertLocation : Location;
+ mutable token_pending : Token;
+ mutable last_real_tok : Token;
+ mutable indent_strings : System.Collections.ArrayList;
+ mutable set_namespace : bool;
+ mutable set_class : bool;
+ mutable force_brace_after_newline : bool;
+
+ public this (lex : LexerBase) {
+ base (lex);
+
+ indentation_syntax_active = true;
+ new_indent = "";
+ current_indent = "";
+ indent_strings = System.Collections.ArrayList (20);
+ _ = indent_strings.Add("");
+ }
+
+ /** Fetch next token (from one token buffer or lexer if it's empty */
+ protected override get_token () : Token {
+ mutable lexer_tok = null;
+ mutable source = 0;
+
+ mutable tok =
+ if (insert_unindents > 0) {
+ insert_unindents--;
+ indent_level--;
+ //Message.Debug (insertLocation, "Generate '}'");
+ Token.EndBrace (true);
+ }
+ // last_token has priority over token_pending
+ else if (last_token != null) {
+ source = 2;
+ def result = last_token;
+ last_token = null;
+ result;
+ }
+ else if (token_pending != null) {
+ source = 1;
+ def result = token_pending;
+ token_pending = null;
+ result;
+ }
+ else {
+ source = 3;
+ try {
+ lexer_tok = lexer.GetToken ();
+ match (lexer_tok) {
+ | Token.Indent as t =>
+ if (indentation_syntax_active) {
+ if (force_brace_after_newline) {
+ force_brace_after_newline = false;
+ Token.BeginBrace (true);
+ }
+ else {
+ new_indent = t.value;
+ insertLocation = lexer_tok.Location;
+ //Message.Debug (insertLocation, "Calling get_token_after_indent ()");
+ source = 4;
+ get_token_after_indent ();
+ }
+ }
+ else
+ get_token();
+
+ | Token.EndOfFile =>
+ insert_unindents = indent_level;
+ when (set_class)
+ insert_unindents++;
+ when (set_namespace)
+ insert_unindents++;
+ if (insert_unindents > 0) {
+ insertLocation = lexer_tok.Location;
+ token_pending = lexer_tok;
+ Token.EndBrace (true);
+ }
+ else {
+ last_real_tok = lexer_tok;
+ lexer_tok
+ }
+
+ | BeginBrace
+ | BeginRound
+ | BeginSquare
+ | BeginQuote =>
+ explicit_groups++;
+ indentation_syntax_active = false;
+ last_real_tok = lexer_tok;
+ lexer_tok
+
+ | EndBrace
+ | EndRound
+ | EndSquare
+ | EndQuote =>
+ when (explicit_groups == 0)
+ // TODO: better error message
+ throw PreParserException (lexer_tok.Location, "unmatched group end");
+ explicit_groups--;
+ when (explicit_groups == 0)
+ indentation_syntax_active = true;
+ last_real_tok = lexer_tok;
+ lexer_tok
+
+ | _ =>
+ last_real_tok = lexer_tok;
+ lexer_tok
+ }
+ }
+ catch {
+ | e is LexerBase.Error =>
+ Message.Error (lexer.Location, e.name);
+ get_token ()
+ }
+ }
+
+ //Message.Debug (tok.ToString() + ", " + tok.GetType().ToString() + ", " + new_indent.Length.ToString());
+ // If this is the 'set' keyword in the first column of a new line:
+ //when (new_indent.Length == 0 && tok is Token.Identifier && (tok :> Token.Identifier).name == "set") {
+ when (current_indent.Length == 0 && tok.ToString() == "set") {
+ def tokNext = lexer.GetToken ();
+ match (tokNext) {
+ | Token.Keyword ("namespace") =>
+ when (set_namespace)
+ throw PreParserException (tokNext.Location, "the 'set namespace' directive can only be used once per file");
+ set_namespace = true;
+ //tokNext = get_token_after_indent (tokNext);
+ force_brace_after_newline = true;
+
+ | Token.Keyword ("class") =>
+ when (set_class)
+ throw PreParserException (tokNext.Location, "the 'set class' directive can only be used once per file");
+ set_class = true;
+ //tokNext = get_token_after_indent (tokNext);
+ force_brace_after_newline = true;
+
+ | _ =>
+ throw PreParserException (tokNext.Location, $"unrecognized 'set' directive: '$tokNext'");
+ }
+ tok = tokNext;
+ }
+
+ //Message.Debug (tok.Location, tok.ToString() + $" ($source)");
+ tok;
+ }
+
+ get_token_after_indent () : Token {
+ //Message.Debug ("1");
+ mutable tok = lexer.GetToken ();
+
+ //Message.Debug ("2");
+ while (tok is Token.Indent || tok is Token.Comment) {
+ // TODO: what's the best way to do this?
+ match (tok) {
+ | Token.Indent (value) => new_indent = value;
+ | _ => ()
+ }
+ tok = lexer.GetToken ();
+ }
+
+ when (!force_brace_after_newline)
+ tok = get_token_after_indent (tok);
+
+ tok;
+ }
+
+ get_token_after_indent (tok : Token) : Token {
+ // If we have not unindented
+ if (new_indent.Length >= current_indent.Length) {
+ // Make sure that the beginning of the new indent string
+ // starts with the contents of the current indent string.
+ when (!new_indent.StartsWith(current_indent)) {
+ // TODO: make a better error message -- checkout python error message
+ Message.Error (tok.Location, "inconsistent indentation");
+ current_indent = new_indent;
+ }
+
+ // If we have remained at the same indentation level
+ if (new_indent == current_indent) {
+ if (last_real_tok != null) {
+ token_pending = tok;
+ //Message.Debug (tok.Location, $"Generate ';'");
+ mutable tokInsert =
+ /*if (force_brace_after_newline) {
+ force_brace_after_newline = false;
+ Token.BeginBrace (true);
+ }
+ else*/
+ Token.Semicolon (true);
+ tokInsert.Location = insertLocation;
+ tokInsert;
+ }
+ else
+ tok;
+ }
+ // If we've indented further than the previous line
+ else {
+ match (tok) {
+ | Token.Operator =>
+ tok;
+
+ | _ =>
+ indent_level++;
+ _ = indent_strings.Add(new_indent);
+ current_indent = new_indent;
+ token_pending = tok;
+ //Message.Debug (tok.Location, "Generate '{'");
+ mutable tokInsert = Token.BeginBrace (true);
+ tokInsert.Location = insertLocation;
+ tokInsert;
+ }
+ }
+ }
+ // Otherwise, we've unintented:
+ else {
+ def i = indent_strings.IndexOf(new_indent);
+ when (i == -1) {
+ // TODO: make a better error message -- checkout python error message
+ throw PreParserException (tok.Location, "inconsistent indentation");
+ }
+
+ insert_unindents = indent_level - i;
+ indent_level = i;
+ indent_strings.RemoveRange(i + 1, indent_strings.Count - (i + 1));
+ //while (indent_strings.Length > i + 1)
+ // indent_strings.Pop ();
+
+ current_indent = new_indent;
+ token_pending = tok;
+ //Message.Debug (tok.Location, $"Generate $insert_unindents '}'");
+ insert_unindents--;
+ mutable tokInsert = Token.EndBrace (true);
+ tokInsert.Location = insertLocation;
+ tokInsert;
+ }
+ }
+
+ }
}
Modified: nemerle/trunk/ncc/passes.n
==============================================================================
--- nemerle/trunk/ncc/passes.n (original)
+++ nemerle/trunk/ncc/passes.n Tue Oct 18 13:45:12 2005
@@ -77,6 +77,9 @@
/// initialize pipelines with default values
this () {
+ if (Options.IndentationSyntax)
+ LexingPipeline = LexerFileIndent;
+ else
LexingPipeline = LexerFile;
ParsingPipeline = MainParser.Parse;
ScanningPipeline = ScanTypeHierarchy.ProcessDeclaration;
Added: nemerle/trunk/ncc/testsuite/positive/indentation-syntax.n
==============================================================================
--- (empty file)
+++ nemerle/trunk/ncc/testsuite/positive/indentation-syntax.n Tue Oct 18 13:45:12 2005
@@ -0,0 +1,49 @@
+// OPTIONS: -i
+// 3. With initial comment and spaces between lines
+// 4. Add a condition with a single statement beneath it
+// 5. Add nested conditions to test whether multi-unindenting works
+// 6. Add 'set' keyword for namespace and class
+// 7. Add two kinds of line-continuation + missing newline at end of file
+
+using System.Console
+
+/*
+BEGIN-OUTPUT
+Hello, World!
+n > 0
+c = 3, d = 4
+END-OUTPUT
+*/
+
+set namespace Test
+
+set class App
+
+static Main() : void
+ // 3.
+ Write("Hello")
+
+ // 4.
+ def n = 1
+ when (n > 0)
+ Write(", World")
+ WriteLine("!")
+
+ // 5.
+ if (n > 0)
+ WriteLine("n > 0")
+ when (n > 1)
+ WriteLine("n > 1")
+ else
+ WriteLine("Zero")
+
+ def a = 1
+ def b = 2
+ def c = (
+ a +
+ b
+ )
+ def d = a +
+ c
+ WriteLine($"c = $c, d = $d")
+
More information about the svn
mailing list