/*
* Copyright (c) 2003-2008 The University of Wroclaw.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the University may not be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
* NO EVENT SHALL THE UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
using Nemerle.Collections;
using Nemerle.Compiler;
using System;
using System.Text.RegularExpressions;
using PT = Nemerle.Compiler.Parsetree;
namespace Nemerle.Text
{
/**
* Translation of matching on regular expressions to operations on .NET
* regular expressions and its Groups, which allows short and simple
* extraction of regular subpatterns.
*/
/*
* Syntax of this macro is like in following example:
*
* regexp match (str) {
* | "a+.*" => printf ("a\n");
* | @"(?\d+)-\w+" => printf ("%d\n", num + 3);
* | "(?[name](Ala|Kasia))? ma kota" =>
* match (name) {
* | Some (n) => printf ("%s\n", n)
* | None => printf ("noname?\n")
* }
* | _ => printf ("default\n");
* }
*
*/
macro @regexp (mat)
syntax ("regexp", mat)
{
/// syntax is [regexp match { .. }], so [mat] must be [match]
match (mat) {
| <[ match ($val) { ..$cases } ]> =>
// final pattern of regular expression for builded match
mutable pat = Text.StringBuilder ();
// names of created groups of final regular expression, names
// of existing named groups in patterns and associated expressions
// to execute
mutable alternatives = [];
// variable used for valid creation of alternatives [(..)|(..)|(..)]
mutable first = true;
// default matching case if occurred
mutable default = None ();
// walks through cases of given match and builds final regular
// expression in [pat] and stores group names and related expressions
// to [alternatives]
def create_regpattern (cass : list [PT.MatchCase])
{
mutable patnames = [];
// walks thorough guards of single case, collecting names
// for each created group of regular expression and storing
// names of existing group names inside in [patnames]
def walk_guards (gs, acc)
{
match (gs) {
| <[ $_ when $_ ]> :: _ =>
Message.FatalError ("guarded pattern not allowed in regexp macro")
| <[ $(str : string) ]> :: xs =>
// take names of named groups
def namesreg = Regex (@"\(\?\<(\w+)\s*(:\s*((\w|\.)+))?");
mutable m = namesreg.Match (str);
// find all occurrenceces of existing gropus inside
while (m.Success)
{
mutable end = false;
mutable optional = false;
// check if this group is optional (it's context information)
// so we must count [(] and [)]
def stack = Stack();
mutable slashes_in_row = 0;
mutable found_group = false;
for (mutable i = 0; !end && i < str.Length; ++i)
{
match (str[i]) {
| '\\' => slashes_in_row++
| '(' when (slashes_in_row % 2 == 0) =>
slashes_in_row = 0;
if(i > m.Index && !found_group)
end = true;
else {
when(i == m.Index)
found_group = true;
stack.Push(i <= m.Index);
}
| ')' when (slashes_in_row % 2 == 0) =>
slashes_in_row = 0;
when(stack.Pop() && i > m.Index && i + 1 < str.Length && (str[i + 1] == '?' || str[i + 1] == '*')) {
end = true;
optional = true;
}
| _ => slashes_in_row = 0
}
}
// the group can actually be escaped as in @"(\(?.*)"
// we can't properly check such escapes with regex thus check here
when(found_group) {
// store this existing named group with its name, information
// if it's optional and name of its enforced type
// - [(?..)?]
match ((m.Groups[1].Success, m.Groups[3].Success)) {
| (true, false) =>
patnames = (m.Groups[1].ToString (), optional,
None ()) :: patnames
| (true, true) =>
patnames = (m.Groups[1].ToString (), optional,
Some (m.Groups[3].ToString ())) :: patnames;
| _ => ()
}
}
m = m.NextMatch ()
}
def nsymb = Macros.NewSymbol ();
// add | if it is not first alternative in our pattern
unless (first)
pat = pat.Append ("|");
first = false;
def newstr = namesreg.Replace (str, "(?<$1");
// add current alternative to our pattern
pat = pat.Append("(?<" + nsymb.Id + ">^" + newstr + "$)");
walk_guards (xs, nsymb.Id :: acc)
| [] => acc
| _ :: _ =>
Message.FatalError ("only string patterns allowed in regexp macro")
}
}
match (cass) {
// default case it special and we must store it
| [ <[ case: _ => $expr ]> ] => default = Some (expr)
| <[ case: | ..$guards => $expr ]> :: xs =>
alternatives = (walk_guards (guards, []), patnames, expr)
:: alternatives;
create_regpattern (xs)
| [] => ()
}
}
create_regpattern (cases);
// build decision tree, which checks created groups one by one and if
// some has matched executes corresponding expression
def build_checking (cass)
{
// build || condition for patterns from one case in initial matching
def build_alts (alts)
{
match (alts) {
| [str] =>
<[ matchobj.Groups[$(str : string)].Success ]>
| x::xs =>
<[
matchobj.Groups[$(x : string)].Success ||
$(build_alts (xs))
]>
| [] => Message.FatalError ("empty list of guards?")
}
}
// build expressions containing values of existing named groups
// converting their type (parsing them from string) if needed
def build_bindings (patnames, acc)
{
match (patnames) {
| (name, opt, choosen_type) :: xs =>
def strin =
<[ matchobj.Groups[$(name : string)].ToString () ]>;
def value =
match (choosen_type) {
| None => strin
| Some (t) =>
<[ $(Nemerle.Macros.Symbol ((t : string)) : name).Parse ($strin) ]>
}
def name_symbol = Macros.UseSiteSymbol (name);
def binding =
if (opt)
<[
def $(name_symbol : name) =
match (matchobj.Groups[$(name : string)].Success) {
| true => Some ($value) | _ => None ()
}
]>
else
<[ def $(name_symbol : name) = $value ]>;
build_bindings (xs, binding :: acc);
| [] => acc
}
}
match (cass) {
// creates entire expression checking if one case have succeeded
| [(_, patnames, expr)] =>
<[ { .. $(List.Append (build_bindings (patnames, []), [expr])) } ]>
| (grds, patnames, expr) :: xs =>
<[
match ($(build_alts (grds))) {
| true => {
.. $(List.Append (build_bindings (patnames, []), [expr]))
}
| _ => $(build_checking (xs))
}
]>
| [] => Message.FatalError("empty list of guards?")
}
}
//
def defexpr =
match (default) {
| None =>
Message.Warning ("this pattern might be not exhaustive");
<[ throw MatchFailureException() ]>
| Some (defexpr) => defexpr
}
// generation of final code for building regular expression and
// extracting its groups
def ctx = Nemerle.Macros.ImplicitCTX ();
def tb = ctx.CurrentTypeBuilder;
def static_regobj = Macros.NewSymbol ("static_regobj");
def value = Macros.NewSymbol ("value");
if (ctx.InErrorMode && !ctx.Manager.IsIntelliSenseMode) {
<[
ignore ($val : string);
match (false) {
| true => $(build_checking (alternatives))
| _ => $defexpr
}
]>
}
else {
def tb = tb.DefineNestedType (<[ decl:
private module $(static_regobj : name) {
public $(value : name) : Regex =
Regex ( $(pat.ToString () : string), RegexOptions.ExplicitCapture %| RegexOptions.Compiled );
}
]>);
tb.Compile ();
<[
def matchobj = $(static_regobj : name).$(value : name).Match ($val);
match (matchobj.Success) {
| true => $(build_checking (alternatives))
| _ => $defexpr
}
]>
}
| _ =>
Message.FatalError ("the `regexp' macro expects a match construct")
}
}
}