Previous Contents Next

Exercises

Filtering Comments Out

Comments in Objective CAML are hierarchical. We can thus comment away sections of text, including those containing comments. A comment starts with characters (* and finishes with *). Here's an example:

(* comment spread
over several
lines *)

let succ x = (* successor function *)
x + 1;;

(* level 1 commented text
let old_succ y = (* level 2 successor function level 2 *)
y +1;;
level 1 *)
succ 2;;
The aim of this exercise is to create a new text without comments. You are free to choose whatever lexical analysis tool you wish.

  1. Write a lexer able to recognize Objective CAML comments. These start with a (* and end with a *). Your lexer should ensure comments are balanced, that is to say the number of comment openings equals the number of comment closings. We are not interested in other constructions in the language which may contain characters (* and *).
    (** fichier comment1.mll **)

    {
    let ignore_lex lb = ignore (Lexing.lexeme lb)
    let traite_normal = ignore_lex
    let traite_comment = ignore_lex
    exception Commentaires_mal_balances
    }

    rule normal = parse
    "(*" { ignore_lex lexbuf ; commentaire lexbuf ; normal lexbuf }
    | "*)" { raise Commentaires_mal_balances }
    | _ { traite_normal lexbuf ; normal lexbuf }
    | eof { () }

    and commentaire = parse
    "(*" { ignore_lex lexbuf ; commentaire lexbuf ; commentaire lexbuf }
    | "*)" { ignore_lex lexbuf }
    | _ { traite_comment lexbuf ; commentaire lexbuf }
    | eof { raise Commentaires_mal_balances }



  2. Write a program which takes a file, reads it, filters comments away and writes a new file with the remaining text.
    (** fichier comment2.mll **)
    {
    let ignore_lex lb = ignore (Lexing.lexeme lb)
    let sortie = ref stdout
    let init_sortie f = sortie := f
    let traite_normal lb = output_string !sortie (Lexing.lexeme lb)
    let traite_comment = ignore_lex
    exception Commentaires_mal_balances
    }

    rule normal = parse
    "(*" { ignore_lex lexbuf ; commentaire lexbuf ; normal lexbuf }
    | "*)" { raise Commentaires_mal_balances }
    | _ { traite_normal lexbuf ; normal lexbuf }
    | eof { () }

    and commentaire = parse
    "(*" { ignore_lex lexbuf ; commentaire lexbuf ; commentaire lexbuf }
    | "*)" { ignore_lex lexbuf }
    | _ { traite_comment lexbuf ; commentaire lexbuf }
    | eof { raise Commentaires_mal_balances }


    {

    let decommente src dest =
    let file_in = open_in src in
    let lb = Lexing.from_channel file_in in
    let file_out = open_out dest in
    init_sortie file_out ;
    normal lb ;
    close_in file_in ;
    close_out file_out ;;

    let usage () =
    print_string "comment2 filein fileout";
    print_newline() ;;


    let main () =
    if Array.length (Sys.argv) <> 3 then usage ()
    else decommente Sys.argv.(1) Sys.argv.(2) ;;

    main ();;
    }











  3. In Objective CAML character strings may contain any character, even the sequences (* and *). For example, character string "what(*ever te*)xt" should not be considered a comment. Modify your lexer to consider character strings.
    (** fichier comment3.mll **)
    {
    let ignore_lex lb = ignore (Lexing.lexeme lb)
    let sortie = ref stdout
    let init_sortie f = sortie := f
    let traite_normal lb = output_string !sortie (Lexing.lexeme lb)
    let traite_comment = ignore_lex
    exception Commentaires_mal_balances
    }

    rule normal = parse
    "(*" { ignore_lex lexbuf ; commentaire lexbuf ; normal lexbuf }
    | "*)" { raise Commentaires_mal_balances }
    | '"' { traite_normal lexbuf ; chaine lexbuf ; normal lexbuf }
    | _ { traite_normal lexbuf ; normal lexbuf }
    | eof { () }

    and commentaire = parse
    "(*" { ignore_lex lexbuf ; commentaire lexbuf ; commentaire lexbuf }
    | "*)" { ignore_lex lexbuf }
    | _ { traite_comment lexbuf ; commentaire lexbuf }
    | eof { raise Commentaires_mal_balances }

    and chaine = parse
    '"' { traite_normal lexbuf ; () }
    | "\\\"" { traite_normal lexbuf ; chaine lexbuf }
    | _ { traite_normal lexbuf ; chaine lexbuf }





  4. Use this new lexer to remove comments from an Objective CAML program .
    (** fichier comment4.mll **)
    {
    let ignore_lex lb = ignore (Lexing.lexeme lb)
    let sortie = ref stdout
    let init_sortie f = sortie := f
    let traite_normal lb = output_string !sortie (Lexing.lexeme lb)
    let traite_comment = ignore_lex
    exception Commentaires_mal_balances
    }

    rule normal = parse
    "(*" { ignore_lex lexbuf ; commentaire lexbuf ; normal lexbuf }
    | "*)" { raise Commentaires_mal_balances }
    | '"' { traite_normal lexbuf ; chaine lexbuf ; normal lexbuf }
    | _ { traite_normal lexbuf ; normal lexbuf }
    | eof { () }

    and commentaire = parse
    "(*" { ignore_lex lexbuf ; commentaire lexbuf ; commentaire lexbuf }
    | "*)" { ignore_lex lexbuf }
    | _ { traite_comment lexbuf ; commentaire lexbuf }
    | eof { raise Commentaires_mal_balances }

    and chaine = parse
    '"' { traite_normal lexbuf ; () }
    | "\\\"" { traite_normal lexbuf ; chaine lexbuf }
    | _ { traite_normal lexbuf ; chaine lexbuf }


    {

    let decommente src dest =
    let file_in = open_in src in
    let lb = Lexing.from_channel file_in in
    let file_out = open_out dest in
    init_sortie file_out ;
    normal lb ;
    close_in file_in ;
    close_out file_out ;;

    let usage () =
    print_string "comment2 filein fileout";
    print_newline() ;;


    let main () =
    if Array.length (Sys.argv) <> 3 then usage ()
    else decommente Sys.argv.(1) Sys.argv.(2) ;;

    main ();;
    }

Evaluator

We will use ocamlyacc to implement an expression evaluator. The idea is to perform the evaluation of expressions directly in the grammar rules.

We choose a (completely parenthesized) prefix arithmetic expression language with variable arity operators. For example, expression (ADD e1 e2 .. en) is equivalent to e1 + e2 + .. + en. Plus and times operators are right-associative and subtraction and division are left-associative.
  1. Define in file opn_parser.mly the parsing and evaluation rules for an expression.

    %{
    let rec app_right f xs =
    match xs with
    [x] -> x
    | x::xs -> f x (app_right f xs)
    | _ -> failwith"missing argument" ;;
    let rec app_left f xs =
    match xs with
    [x] -> x
    | x1::x2::xs -> app_left f ((f x1 x2)::xs)
    | _ -> failwith"missing argument" ;;

    let t = Hashtbl.create 3 ;;

    (
    Hashtbl.add t "ADD" (app_right (+.));
    Hashtbl.add t "SUB" (app_left (-.));
    Hashtbl.add t "MUL" (app_right ( *.));
    Hashtbl.add t "DIV" (app_left (/.))
    ) ;;

    let apply o vs =
    try
    (Hashtbl.find t o) vs
    with
    Not_found -> (Printf.eprintf"Unknown operator %s\n" o; exit(1)) ;;
    %}

    %token Lpar Rpar
    %token <float> Num
    %token <string> Atom

    %start term
    %type <float> term
    %type <float list> terms

    %%
    term :
    Num { $1 }
    | Lpar Atom terms Rpar
    { (apply $2 $3) }
    ;
    terms :
    term { [$1] }
    | term terms { $1::$2 }
    ;
    %%


  2. Define in file opn_lexer.mll the lexical analysis of expressions.

    {
    open Opn_parser
    }

    rule lexer = parse
    [' ' '\n'] { lexer lexbuf }
    | '(' { Lpar }
    | ')' { Rpar }
    | '-'?['0'-'9']*'.'?['0'-'9']*
    { Num (float_of_string (Lexing.lexeme lexbuf)) }
    | ['A'-'z']+ { Atom (Lexing.lexeme lexbuf) }


  3. Write a simple main program opn which reads a line from standard input containing an expression and prints the result of evaluating the expression.

    open Opn_lexer ;;
    open Opn_parser ;;

    Printf.printf"? "; flush stdout;
    let buf = Lexing.from_string (input_line stdin) in
    Printf.printf "= %f\n" (Opn_parser.term Opn_lexer.lexer buf) ;;

Previous Contents Next