/* * Copyright (c) 2003-2008 The University of Wroclaw. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the University may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ using Nemerle.Collections; using Nemerle.Compiler; using System; using System.Text.RegularExpressions; using PT = Nemerle.Compiler.Parsetree; namespace Nemerle.Text { /** * Translation of matching on regular expressions to operations on .NET * regular expressions and its Groups, which allows short and simple * extraction of regular subpatterns. */ /* * Syntax of this macro is like in following example: * * regexp match (str) { * | "a+.*" => printf ("a\n"); * | @"(?\d+)-\w+" => printf ("%d\n", num + 3); * | "(?[name](Ala|Kasia))? ma kota" => * match (name) { * | Some (n) => printf ("%s\n", n) * | None => printf ("noname?\n") * } * | _ => printf ("default\n"); * } * */ macro @regexp (mat) syntax ("regexp", mat) { /// syntax is [regexp match { .. }], so [mat] must be [match] match (mat) { | <[ match ($val) { ..$cases } ]> => // final pattern of regular expression for builded match mutable pat = Text.StringBuilder (); // names of created groups of final regular expression, names // of existing named groups in patterns and associated expressions // to execute mutable alternatives = []; // variable used for valid creation of alternatives [(..)|(..)|(..)] mutable first = true; // default matching case if occurred mutable default = None (); // walks through cases of given match and builds final regular // expression in [pat] and stores group names and related expressions // to [alternatives] def create_regpattern (cass : list [PT.MatchCase]) { mutable patnames = []; // walks thorough guards of single case, collecting names // for each created group of regular expression and storing // names of existing group names inside in [patnames] def walk_guards (gs, acc) { match (gs) { | <[ $_ when $_ ]> :: _ => Message.FatalError ("guarded pattern not allowed in regexp macro") | <[ $(str : string) ]> :: xs => // take names of named groups def namesreg = Regex (@"\(\?\<(\w+)\s*(:\s*((\w|\.)+))?"); mutable m = namesreg.Match (str); // find all occurrenceces of existing gropus inside while (m.Success) { mutable end = false; mutable optional = false; // check if this group is optional (it's context information) // so we must count [(] and [)] def stack = Stack(); mutable slashes_in_row = 0; mutable found_group = false; for (mutable i = 0; !end && i < str.Length; ++i) { match (str[i]) { | '\\' => slashes_in_row++ | '(' when (slashes_in_row % 2 == 0) => slashes_in_row = 0; if(i > m.Index && !found_group) end = true; else { when(i == m.Index) found_group = true; stack.Push(i <= m.Index); } | ')' when (slashes_in_row % 2 == 0) => slashes_in_row = 0; when(stack.Pop() && i > m.Index && i + 1 < str.Length && (str[i + 1] == '?' || str[i + 1] == '*')) { end = true; optional = true; } | _ => slashes_in_row = 0 } } // the group can actually be escaped as in @"(\(?.*)" // we can't properly check such escapes with regex thus check here when(found_group) { // store this existing named group with its name, information // if it's optional and name of its enforced type // - [(?..)?] match ((m.Groups[1].Success, m.Groups[3].Success)) { | (true, false) => patnames = (m.Groups[1].ToString (), optional, None ()) :: patnames | (true, true) => patnames = (m.Groups[1].ToString (), optional, Some (m.Groups[3].ToString ())) :: patnames; | _ => () } } m = m.NextMatch () } def nsymb = Macros.NewSymbol (); // add | if it is not first alternative in our pattern unless (first) pat = pat.Append ("|"); first = false; def newstr = namesreg.Replace (str, "(?<$1"); // add current alternative to our pattern pat = pat.Append("(?<" + nsymb.Id + ">^" + newstr + "$)"); walk_guards (xs, nsymb.Id :: acc) | [] => acc | _ :: _ => Message.FatalError ("only string patterns allowed in regexp macro") } } match (cass) { // default case it special and we must store it | [ <[ case: _ => $expr ]> ] => default = Some (expr) | <[ case: | ..$guards => $expr ]> :: xs => alternatives = (walk_guards (guards, []), patnames, expr) :: alternatives; create_regpattern (xs) | [] => () } } create_regpattern (cases); // build decision tree, which checks created groups one by one and if // some has matched executes corresponding expression def build_checking (cass) { // build || condition for patterns from one case in initial matching def build_alts (alts) { match (alts) { | [str] => <[ matchobj.Groups[$(str : string)].Success ]> | x::xs => <[ matchobj.Groups[$(x : string)].Success || $(build_alts (xs)) ]> | [] => Message.FatalError ("empty list of guards?") } } // build expressions containing values of existing named groups // converting their type (parsing them from string) if needed def build_bindings (patnames, acc) { match (patnames) { | (name, opt, choosen_type) :: xs => def strin = <[ matchobj.Groups[$(name : string)].ToString () ]>; def value = match (choosen_type) { | None => strin | Some (t) => <[ $(Nemerle.Macros.Symbol ((t : string)) : name).Parse ($strin) ]> } def name_symbol = Macros.UseSiteSymbol (name); def binding = if (opt) <[ def $(name_symbol : name) = match (matchobj.Groups[$(name : string)].Success) { | true => Some ($value) | _ => None () } ]> else <[ def $(name_symbol : name) = $value ]>; build_bindings (xs, binding :: acc); | [] => acc } } match (cass) { // creates entire expression checking if one case have succeeded | [(_, patnames, expr)] => <[ { .. $(List.Append (build_bindings (patnames, []), [expr])) } ]> | (grds, patnames, expr) :: xs => <[ match ($(build_alts (grds))) { | true => { .. $(List.Append (build_bindings (patnames, []), [expr])) } | _ => $(build_checking (xs)) } ]> | [] => Message.FatalError("empty list of guards?") } } // def defexpr = match (default) { | None => Message.Warning ("this pattern might be not exhaustive"); <[ throw MatchFailureException() ]> | Some (defexpr) => defexpr } // generation of final code for building regular expression and // extracting its groups def ctx = Nemerle.Macros.ImplicitCTX (); def tb = ctx.CurrentTypeBuilder; def static_regobj = Macros.NewSymbol ("static_regobj"); def value = Macros.NewSymbol ("value"); if (ctx.InErrorMode && !ctx.Manager.IsIntelliSenseMode) { <[ ignore ($val : string); match (false) { | true => $(build_checking (alternatives)) | _ => $defexpr } ]> } else { def tb = tb.DefineNestedType (<[ decl: private module $(static_regobj : name) { public $(value : name) : Regex = Regex ( $(pat.ToString () : string), RegexOptions.ExplicitCapture %| RegexOptions.Compiled ); } ]>); tb.Compile (); <[ def matchobj = $(static_regobj : name).$(value : name).Match ($val); match (matchobj.Success) { | true => $(build_checking (alternatives)) | _ => $defexpr } ]> } | _ => Message.FatalError ("the `regexp' macro expects a match construct") } } }