Version française
Home     About     Download     Resources     Contact us    
Browse thread
Ocamllex question
[ Home ] [ Index: by date | by threads ]
[ Search: ]

[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
Date: -- (:)
From: Michael Wohlwend <micha-1@f...>
Subject: Re: Another problem (was Re: [Caml-list] Ocamllex question)
On Sunday 23 October 2005 23:12, Matt Gushee wrote:
> While we're on the subject of Ocamllex, there's another issue I'm
>
> wondering about. My lexer needs to handle quoted strings, something like:
>   | '"' [^ '"'] * as word '"'     { WORD word }
>
> Not essential, but it would be nice to allow escaped quotes within such
> strings:
>
>   "The quick brown fox jumped over the \"lazy\" dog."
>
> I've haven't actually tried to implement this yet, but thinking about it
> it seems like it would make the lexer hugely more complex. Can anyone
> suggest a reasonably simple way to deal with escape sequences?

you write a extra lexer-rule which gets called when a " is seen; in this rule 
you read single chars and append them to a string buffer. If you see a 
backslash you handle the next character special; example:

let char_for_backslash = function   
    | 'a' -> '\007'
    | 'v' -> '\011'
    | 'f' -> '\012'
    | 'n' -> '\n'
    | 't' -> '\t'
    | 'b' -> '\b'
    | 'r' -> '\r'
    | c   -> c

let bs_escapes = [ '\032' - '\255' ]

let string_buff = Buffer.create 256
let reset_string_buffer () = Buffer.clear string_buff  
let store_string_char c = Buffer.add_char string_buff c
let store_string s = Buffer.add_string string_buff s
let get_stored_string () = Buffer.contents string_buff

rule dict = parser
  | ...
   | [ '"'  ] as d 
        {
         reset_string_buffer(); 
         scan_str  lexbuf;
         let s = get_stored_string() in
         (* Printf.printf " String (%c) read:%s " d s; *)
         STRING(s) 
        }

and scan_str  = parse 
    | [ '"' ]  { () }
    | '\\' (bs_escapes as c)  
        { store_string_char (char_for_backslash c); scan_str lexbuf }
    | allowed_string_char as c {store_string_char c }
    | eof  { raise( Lexical_error("unterminated string") ) }


works well,

 Michael