Browse thread
[Caml-list] Need NFA/DFA conversion help
[
Home
]
[ Index:
by date
|
by threads
]
[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
| Date: | -- (:) |
| From: | skaller <skaller@u...> |
| Subject: | Re: [Caml-list] Need NFA/DFA conversion help |
On Thu, 2004-09-16 at 01:05, skaller wrote:
> I need some help understanding how to convert
> an NFA with *labelled* arcs into an equivalent DFA.
Thx to those who responded. Just for motivation:
> Ocamllex functionality is already available,
This already works:
(* test matcher *)
(* tokens *)
type codes_t = [
| `Word
| `Ident
| `Num
| `White
| `Dus
]
let string_of_code = function
| `Word -> "Word"
| `Ident -> "Ident"
| `Num -> "Num"
| `White -> "White"
| `Dus -> "Dus"
| `Reserved -> "Reserved"
let string_of_result f = function
| `Coded xs -> String.concat ", " (List.map f xs)
| `Error _ -> "Match failure"
(* regexps *)
let digit = regexp_of_charset "0123456789"
let lower = regexp_of_inclusive_char_range 'a' 'z'
let upper = regexp_of_inclusive_char_range 'A' 'Z'
let letter = alt upper lower
let underscore = regexp_of_char '_'
let word = many letter
let idstart = alt letter underscore
let idrest = alt idstart digit
let space = regexp_of_char ' '
let white = many space
let ident = seq idstart (aster idrest)
let num = many digit
let us2 = seq underscore underscore
(* associate regexps with tokens *)
let tk_word = seq word (code `Word)
let tk_ident = seq ident (code `Ident)
let tk_num = seq num (code `Num)
let tk_white = seq white (code `White)
let tk_us2 = seq (seq us2 (code `Dus))
(* gather the alternatives *)
let re = alts [tk_word; tk_ident; tk_num; tk_white; tk_us2]
let alphabet, states, codes, matrix = process_regexp re;;
(* print all the token associated with command line argument *)
let data = Sys.argv.(1) ;;
let matcher = make_matcher matrix codes data;;
let result = recognize matcher;;
print_endline ("Result: " ^ string_of_result string_of_code result);;
---------------------------
With some minor fiddling this is a tokeniser
like Ocamllex, except you don't need any external tool
or library, you can define everything 'in-caml'
and dynamically, as shown. (The core of the matcher
engine is also purely functional :)
Then you can either process some data,
or you can save the compiled automaton (possibly
as Ocaml or C sourcecode which can be compiled directly
to executable code like (Ocaml)lex output).
What doesn't work is sub-group matching,
look ahead, etc -- things requiring actions
in the middle of a regexp (rather than at the end).
If that could be made to work, we'd have a nice
pure Ocaml only library that could replace Ocamllex,
Str, and PCRE all at once -- and also allow a lot
of other things to be built on top, such as an RTN parser.
[It turns out polymorphic variants are quite useful
in extending the regular expression ASTs, as well
as being used as tokens.]
--
John Skaller, mailto:skaller@users.sf.net
voice: 061-2-9660-0850,
snail: PO BOX 401 Glebe NSW 2037 Australia
Checkout the Felix programming language http://felix.sf.net
-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners