/* ------------------------------------------------------------------------- */ /* Knuth-Morris-Pratt exact string matching algorithm */ /* */ /* See ESMAJ: http://www-igm.univ-mlv.fr/~lecroq/string/node8.html */ /* ------------------------------------------------------------------------- */ using System.Array; using Nemerle.IO; class PrefixFunction { private mutable _prefix_function : array [int]; private mutable _pattern : array [char]; private mutable _pattern_length : int; public this (pattern : string) { _pattern = pattern.ToCharArray (); _pattern_length = pattern.Length; _prefix_function = array (_pattern_length + 1); calculate_prefix_function () } public this (pattern : array [char]) { _pattern = pattern; _pattern_length = pattern.Length; _prefix_function = array (_pattern_length + 1); calculate_prefix_function () } public Get (index : int) : int { _prefix_function [index] } private calculate_prefix_function () : void { mutable i = 0; mutable j = -1; _prefix_function [0] = -1; while (i < _pattern_length) { while (j > -1 && _pattern [i] != _pattern [j]) { j = _prefix_function [j] }; i = i + 1; j = j + 1; if (i < _pattern_length && _pattern [i] == _pattern [j]) _prefix_function [i] = _prefix_function [j] else _prefix_function [i] = j } } } class LastOccurrenceFunction { private mutable _last_occurrence_function : array [int]; private mutable _pattern : array [char]; private mutable _pattern_length : int; public this (pattern : string) { _pattern = pattern.ToCharArray (); _pattern_length = pattern.Length; _last_occurrence_function = array (256); calculate_last_occurrence_function () } public Get (character : char) : int { assert ((character :> int) < 256, "only 8-bit wide characters are supported for simplicity"); _last_occurrence_function [(character :> int)] } private calculate_last_occurrence_function () : void { def loop (index : int) : void { when (index < _pattern_length) { assert ((_pattern [index] :> int) < 256, "only 8-bit wide characters are supported for simplicity"); _last_occurrence_function [ (_pattern [index] :> int) ] = index; loop (index + 1) } }; loop (0) } } class GoodSuffixFunction { private mutable _good_suffix_function : array [int]; private mutable _pattern : array [char]; private mutable _reversed_pattern : array [char]; private mutable _pattern_length : int; private mutable _prefix_function : PrefixFunction; private mutable _reversed_prefix_function : PrefixFunction; public this (pattern : string) { _pattern = pattern.ToCharArray (); _reversed_pattern = pattern.ToCharArray (); _pattern_length = pattern.Length; reverse_string (_reversed_pattern); _prefix_function = PrefixFunction (pattern); _reversed_prefix_function = PrefixFunction (_reversed_pattern); _good_suffix_function = array (_pattern_length + 1); calculate_good_suffix_function () } public Get (index : int) : int { _good_suffix_function [index + 1] } private calculate_good_suffix_function () : void { def pattern_m_minus_pi_m = _pattern_length - _prefix_function.Get (_pattern_length); def loop1 (index : int) : void { when (index <= _pattern_length) { _good_suffix_function [index] = pattern_m_minus_pi_m; loop1 (index + 1) } }; loop1 (0); def loop2 (index : int) : void { when (index <= _pattern_length) { def j = _pattern_length - _reversed_prefix_function.Get (index); when (_good_suffix_function [j] > index - _reversed_prefix_function.Get (index)) _good_suffix_function [j] = index - _reversed_prefix_function.Get (index); loop2 (index + 1) } }; loop2 (1) } private reverse_string (text : array [char]) : void { def loop (index : int) : void { when (index <= text.Length / 2) { def t = text [text.Length - index - 1]; text [text.Length - index - 1] = text [index]; text [index] = t; loop (index + 1) } }; loop (0) } } class BM { private mutable _pattern : array [char]; private mutable _pattern_length : int; private mutable _last_occurrence_function : LastOccurrenceFunction; private mutable _good_suffix_function : GoodSuffixFunction; public this (pattern : string) { _pattern = pattern.ToCharArray (); _pattern_length = pattern.Length; _last_occurrence_function = LastOccurrenceFunction (pattern); _good_suffix_function = GoodSuffixFunction (pattern); } public Search (text : string) : option [int] { def text_length = text.Length; def text = text.ToCharArray (); mutable s = 0; def loop () : option [int] { if (s < text_length - _pattern_length) { mutable j = _pattern_length - 1; while (j >= 0 && _pattern [j] == text [s + j]) { j = j - 1 }; if (j == -1) Some (s) else { s = s + Max (_good_suffix_function.Get (j), j - _last_occurrence_function.Get (text [s + j])); loop () } } else None () }; loop () } private static Max (x : int, y : int) : int { if (x < y) y else x; } public static Main () : void { def r = BM ("ziemi egipskiej"); match (r.Search ("Jam jest Pan Bóg twój, który Ciê wywiód³ z ziemi egipskiej, z domu niewoli")) { | Some (i) => printf ("Found at position %d\n", i + 1) | None => printf ("Not found\n") } } }