Version française
Home     About     Download     Resources     Contact us    
Browse thread
Haskell parser combinators in OCaml?
[ Home ] [ Index: by date | by threads ]
[ Search: ]

[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
Date: -- (:)
From: Jorgen Hermanrud Fjeld <jhf@h...>
Subject: Haskell parser combinators in OCaml?
Hi.

From the world of Haskell, the work of S. Doaitse Swierstra in the paper
"Combinator Parsers: From Toys to Tools" 
"http://citeseer.ist.psu.edu/363886.html", introduces some very nice
combinator parsers that parse LALR(k) grammars, and give good error
messages.

I would love too express something equivalent in OCaml, but I'm not sure
how to translate the concepts used into concepts in OCaml.

I am hoping some of the type theorists out there would glance at the
paper, and bestow some reflection, advice or warning upon me.

There are several issues:
1) How to express the lazy lookahead data structure?
3) How to express the type of the parser in OCaml?

Some details:
1) The lazy data structure in 4.1 can not be expressed directly,
   and I believe some kind of explicit fixed point is needed.
   Would one need fixed points with deBruijn indexes?
   Do you know of any similar examples that I may look at for
   inspiration?
2) The parser has the haskell type 
type Parser a =
  forall b result .
     Future b result
  -> Stack a b
  -> Errs
  -> Input
  -> Steps result
which I can not express in OCaml. My attempts at encoding this 
using an encoding that express existential types, have so far not 
worked out. I always end up with a type error, and do not see how
to better design it. 
######## The type error
File "parser.ml", line 154, characters 21-26:
This field value has type
  ('a, 'a) future ->
  (symbol, 'a) stack -> (errors -> errors) -> input -> ('a * errors) steps
which is less general than
  'b 'c.
    ('b, 'c) future ->
    ('d, 'b) stack -> (errors -> errors) -> input -> ('c * errors) steps
######## Begin code
module BraunTree = 
struct
    type ('key,'value) braun_tree = 
        | Node of ('key,'value) braun_tree * ('key * 'value) * ('key,'value) braun_tree
        | Nil
    ;;
    let tree_of_list (l:('key*'value) list) : ('key,'value) braun_tree = 
        let rec tree_of_list len l = 
            match l with
            | [] -> (Nil,[])
            | (h::[]) -> (Node (Nil,h,Nil),[])
            | (h::t) ->  
                    let left_len = (len - 1) / 2 in
                    let right_len = len - 1 - left_len in
                    let (left_tree,left_list) = tree_of_list left_len l in
                    match left_list with
                    | [] -> assert false
                    | (left_head::left_tail) ->
                            let (right_tree,right_tail) = tree_of_list right_len left_tail in
                            (Node (left_tree,left_head,right_tree),right_tail)
        in
        let (tree,l) = tree_of_list (List.length l) l in
        match l with
        | [] -> tree
        | _ -> assert false
    ;;
    let find ~(key:'key) ~(tree:('key,'value) braun_tree) : 'value option = 
        let rec find tree = 
            match tree with
            | Nil -> None
            | Node (left,(found_key,value),right) -> 
                    match compare key found_key with
                    | 0 -> Some value
                    | 1 -> find left
                    | -1 -> find right
                    | _ -> assert false
        in
        find tree
    ;;
end

module ContinuationTrieParser = 
struct
  type symbol = string
  type input = symbol list
  type 'result steps = 
      Ok of 'result steps
    | Fail of 'result steps 
    | Stop of 'result
  type ('a,'b) stack = 'a -> 'b;;
  type ('cont,'result) future = 'cont -> (errors->errors) -> input -> 'result steps
  and errors = 
      | Deleted of symbol * string * errors
      | Inserted of symbol * string * errors
      | Notused of string 
  type 'p automaton = 
    | Shift of 'p * (symbol * 'p automaton) list
    | ShiftReduce of 'p automaton * 'p automaton
    | Reduce of 'p
    | Found of 'p * 'p automaton
  type 'a combinator_parser = {
      parse:'cont 'result.
      ('cont,'result) future-> ('a,'cont) stack -> (errors->errors) -> input -> ('result*errors) steps
      }
  type 'a parser_generator = {
      automaton : ('a combinator_parser) automaton;
      generated : 'a combinator_parser
  }
  exception Ambigous_grammar
  ;;
  let rec best : 'result steps -> 'result steps -> 'result steps = 
    fun left right -> match (left,right) with
        (Ok left,Ok right) -> Ok (best left right) 
      | (Fail left,Fail right) -> Fail (best left right) 
      | (Ok _,Fail _) -> left 
      | (Fail _,Ok _) -> right 
      | (Stop _,_) -> left 
      | (_,Stop _) -> right 
  ;;
  let best_parser (left:'a combinator_parser) (right:'a combinator_parser) : 'a combinator_parser = 
      let parse cont stack errors input =
          best (left.parse cont stack errors input) (right.parse cont stack errors input)
      in
      {parse=parse}
  ;;

  (** Also known as a catamorphism *)
  let transform_automaton ((transform_shift
                     ,transform_shiftreduce
                     ,transform_reduce
                     ,transform_found
                     ): ((('p * (symbol*'p automaton) list) -> 'b)
                        *(('p automaton * 'p automaton) -> 'b)
                        *('p -> 'b)
                        *(('p * 'p automaton) -> 'b)))
                     (automaton:'a automaton) : 'b =
        let rec transform (automaton:'a automaton) = 
                         match automaton with
                         | Shift (p,choices) -> 
                                 let rec foreach choices collected = 
                                     match choices with
                                     | [] -> collected
                                     | ((symbol,choice)::tail) -> 
                                       let collected = (symbol,transform choice)::collected in
                                           foreach tail collected 

                                 in 
                                 transform_shift (p,foreach choices []) 
                         | ShiftReduce (shift,reduce) ->
                                 transform_shiftreduce (transform shift,transform reduce)
                         | Reduce reduce -> transform_reduce reduce
                         | Found (found,more) -> transform_found (found,transform more)
        in
        transform automaton
  ;;

  let map_automaton (f:'a->'b) (automaton: 'a automaton) : 'b automaton = 
    let transform_shift (p,choices) = Shift (f p,choices)
    in
    let transform_shiftreduce (shift,reduce) = ShiftReduce (shift,reduce)
    in
    let transform_reduce reduce = Reduce (f reduce)
    in
    let transform_found (found,more) = Found (f found,more)
    in
    transform_automaton
      (transform_shift,transform_shiftreduce,transform_reduce,transform_found)
      automaton
  ;;

  let rec mkparser (automaton: string automaton) : string parser_generator =
      let choose (input:input) : string combinator_parser = 
          let transform_shift ((p,choices)
              :symbol * (symbol*symbol automaton) list) : string combinator_parser = 
              let table : (symbol,symbol automaton) BraunTree.braun_tree = BraunTree.tree_of_list choices in
              let find key = BraunTree.find ~key ~tree:table in
              let parse cont stack errors input : ('a*errors) steps = 
                  match input with
                  | [] -> 
                          let error = 
                              errors (Inserted (p,"Insert at end of file",Notused ""))
                          in Stop (stack p,error)
                  | (h::t) -> begin
                      match find h with
                      | Some automaton ->
                              Ok ((mkparser automaton).generated.parse cont stack errors t)
                      | None -> 
                          let errors error = errors (Deleted (h,"Deleted symbol",error))
                          in
                          let errors error = errors (Inserted (p,"Insert symbol",error))
                          in Fail (Fail (Stop (stack p,errors (Notused h))))
                  end 
              in 
              {parse=parse}
          in
          let transform_shiftreduce ((shift,reduce) : symbol automaton * symbol automaton ) =
              let parse cont stack errors input = 
                  (best_parser (mkparser shift).generated (mkparser reduce).generated).parse cont stack errors input
              in 
              parse
          in
          let transform_reduce (reduce:symbol) =
              let parse cont stack errors input = reduce cont stack errors input
              in parse
          in
          let transform_found ((found,more):symbol*symbol automaton) =
              let parse cont stack errors input = found cont stack errors input
              in {parse=parse}
          in
          transform_automaton
          (transform_shift,transform_shiftreduce,transform_reduce,transform_found)
          automaton 
      in
      let parse cont stack errors input =
          (choose input).parse cont stack errors input
      in
      {automaton=automaton;parse=parse}
  ;;

  (** <|> *)
  let either : ('a parser_generator * 'a parser_generator) -> 'a parser_generator = 
      fun (p,q) -> 
          mkparser (merge_ch p.automaton q.automaton)
  ;;

  let rec combine (lefts: (symbol * 'p automaton) list )
                  (rights: (symbol * 'p automaton) list )
                  : (symbol * 'p automaton) list = 
      match (lefts,rights) with
      | ((((left_symbol,left_sentence) as left_head)::left_tail)
        ,(((right_symbol,right_sentence) as right_head)::right_tail)
        ) -> 
              begin
                  match compare left_symbol right_symbol with
                  | 1 -> left_head::(combine left_tail rights)
                  | -1 -> right_head::(combine lefts right_tail)
                  | 0 -> 
                       let head = (left_symbol,either(left_sentence,right_sentence)) in
                       let tail = (combine left_tail right_tail) in
                       head::tail
                 | _ -> assert false
              end
      | ([],_) -> rights
      | (_,[]) -> lefts
  ;;

  (** <*> *)
  let rec both : ('a parser_generator * 'a parser_generator) -> 'a parser_generator =
      fun (p,q) ->
          (** Use two combinator parsers in sequence
           * a both for combinator parsers
           *)
          let both_combinator_parsers first second = 
              let parse cont stack errors input =
                  let stack f x = stack (f x) in
                  first.parse (second.parse cont) stack errors input
              in
              {parse=parse}
          in
          let transform_shift (p,choices) = Shift (both_combinator_parsers p q.generated,choices)
          in
          let transform_shiftreduce (shift,reduce) = merge_ch shift reduce
          in
          let transform_reduce reduce = 
              let worker x = fwby reduce x in
              map_automaton worker q.automaton
          in
          let transform_found (found,more) = Found (both_combinator_parsers found q.generated,more)
          in
          let automaton = transform_automaton (transform_shift
                                              ,transform_shiftreduce
                                              ,transform_reduce
                                              ,transform_found) p.automaton
          in
          mkparser automaton
  ;;

  let merge_ch left right = 
      match (left,right) with
      | (Shift (left_parser,left_choices),Shift (right_parser,right_choices)) ->
              let best = best_parser left_parser right_parser in
              let choices = combine left_choices right_choices in
              Shift (best,choices)
      | (Shift _,ShiftReduce (shift,reduce)) ->
              ShiftReduce (merge_ch left shift,reduce)
      | (Shift _,Reduce _) -> ShiftReduce (left,right)
      | (Shift _,Found (_,more)) ->merge_ch left more
      | (Found (_,more),_) -> merge_ch more right
      | (_,Shift _) -> merge_ch right left
      | (ShiftReduce _,_) 
      | (Reduce _,_) -> raise Ambigous_grammar
  ;;

  let symbol (a:symbol)  : symbol combinator_parser = 
      let rec parse cont
                    stack
                    errors
                    input =
          match input with
          | x::xs -> 
              if a = x 
              then Ok (cont (stack a) errors xs)
              else 
                  let deleted_x =
                      let errors e = errors (Deleted (x,position xs,e)) in
                      parse cont stack errors xs in
                  let inserted_a =
                      let errors e = errors (Inserted (a,show_symbol a,e)) in
                      cont (stack x) errors input in
                  Fail (best deleted_x inserted_a)
          | [] -> 
                  let errors e = errors (Inserted (a,eof,e)) in
                  let inserted_a = cont (stack a) errors input in
                  Fail inserted_a
      in
      let accept cont stack errors input = 
          match input with
          | [] -> assert false
          | (x::xs) -> 
                  assert (a = x) ;
                  Ok  (cont (stack a) errors xs)
      in
      let shift = Shift ({parse=parse},[(a,Reduce {parse=accept})]) in
      let found = Found ({parse=parse},shift) in
      mkparser found
  ;;

  let succeed f =
      let parse cont stack errors input = cont (stack f) errors input in
      mkparser (End {parse=parse})
  ;;

end
;;


######## End code
   
-- 
Sincerely | Homepage:
Jørgen    | http://www.hex.no/jhf
          | Public GPG key:
          | http://www.hex.no/jhf/key.txt