Version franaise
Home About Download Resources Contact us
Browse thread
LLVM: A native-code compiler for MiniML in ~100LOC
[ Home ] [ Index: by date | by threads ]
[ Search: ]

[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
Date: -- (:)
From: Tom_Primožič <tom.primozic@g...>
Subject: Re: [Caml-list] LLVM: A native-code compiler for MiniML in ~100LOC
Amazing!

I once wanted to write the compiler et. al myself, however, since
discovering LLVM, I have been determined to use it as my backend! It
supports many platforms, has a lot of (configurable) optimization passes,
and simple IL. Furthermore, it has GC bindings and JIT on some platforms.

So, this is a perfect example for me since it has only made me more certain
of my decision. I will write an excelent language (ML-like) and feed it to
LLVM to compile it!

 - Tom

On 26/11/2007, Jon Harrop <jon@ffconsultancy.com> wrote:
>
>
> I recently rediscovered the Low-Level Virtual Machine (LLVM) project that
> has
> since been adopted by Apple:
>
>   http://llvm.org
>
> This is a library (with OCaml bindings!) that allows you to write a
> compiler
> that generates their RISC-like intermediate language (IL) that can then be
> compiled to native code. LLVM even supports JIT compilation.
>
> I went through the usual steps in trying this and was extremely impressed
> with
> the results. After only two days I was able to create an optimizing
> native-code compiler for a subset of CAML large enough to represent the
> following Fibonacci program:
>
>   let rec fib n =
>     if n <= 2 then 1 else
>       fib(n-1) + fib(n-2)
>
>   do fib 40
>
> The compiler is written entirely in OCaml, using camlp4 for lexing and
> parsing, and the whole thing is only ~100 lines of code!
>
> I'll detail exactly how you can use LLVM from OCaml in a future OCaml
> Journal
> article:
>
>   http://www.ffconsultancy.com/products/ocaml_journal/?ol
>
> Meanwhile here's my latest source:
>
> type expr =
>   | Int of int
>   | Var of string
>   | BinOp of [ `Add | `Sub | `Leq ] * expr * expr
>   | If of expr * expr * expr
>   | Apply of expr * expr
>
> type defn =
>   | LetRec of string * string * expr
>
> open Camlp4.PreCast
>
> let expr = Gram.Entry.mk "expr"
> let defn = Gram.Entry.mk "defn"
> let prog = Gram.Entry.mk "prog"
>
> EXTEND Gram
>   expr:
>   [ [ "if"; p = expr; "then"; t = expr; "else"; f = expr ->
>         If(p, t, f) ]
>   | [ e1 = expr; "<="; e2 = expr -> BinOp(`Leq, e1, e2) ]
>   | [ e1 = expr; "+"; e2 = expr -> BinOp(`Add, e1, e2)
>     | e1 = expr; "-"; e2 = expr -> BinOp(`Sub, e1, e2) ]
>   | [ f = expr; x = expr -> Apply(f, x) ]
>   | [ v = LIDENT -> Var v
>     | n = INT -> Int(int_of_string n)
>     | "("; e = expr; ")" -> e ] ];
>   defn:
>   [ [ "let"; "rec"; f = LIDENT; x = LIDENT; "="; body = expr ->
>         LetRec(f, x, body) ] ];
>   prog:
>   [ [ defns = LIST0 defn; "do"; run = expr -> defns, run ] ];
> END
>
> open Printf
>
> let program, run =
>   try Gram.parse prog Loc.ghost (Stream.of_channel stdin) with
>   | Loc.Exc_located(loc, e) ->
>       printf "%s at line %d\n" (Printexc.to_string e) (Loc.start_lineloc);
>       exit 1
>
> open Llvm
>
> let ty = i64_type
>
> let ( |> ) x f = f x
>
> type state =
>     { fn: llvalue;
>       blk: llbasicblock;
>       vars: (string * llvalue) list }
>
> let bb state = builder_at_end state.blk
> let new_block state name = append_block name state.fn
> let find state v =
>   try List.assoc v state.vars with Not_found ->
>     eprintf "Unknown variable %s\n" v;
>     raise Not_found
> let cont (v, state) dest_blk =
>   build_br dest_blk (bb state) |> ignore;
>   v, state
>
> let rec expr state = function
>   | Int n -> const_int ty n, state
>   | Var x -> find state x, state
>   | BinOp(op, f, g) ->
>       let f, state = expr state f in
>       let g, state = expr state g in
>       let build, name = match op with
>         | `Add -> build_add, "add"
>         | `Sub -> build_sub, "sub"
>         | `Leq -> build_icmp Icmp_sle, "leq" in
>       build f g name (bb state), state
>   | If(p, t, f) ->
>       let t_blk = new_block state "pass" in
>       let f_blk = new_block state "fail" in
>       let k_blk = new_block state "cont" in
>       let cond, state = expr state p in
>       build_cond_br cond t_blk f_blk (bb state) |> ignore;
>       let t, state = cont (expr { state with blk = t_blk } t) k_blk in
>       let f, state = cont (expr { state with blk = f_blk } f) k_blk in
>       build_phi [t, t_blk; f, f_blk] "join" (bb state), state
>   | Apply(f, arg) ->
>       let f, state = expr state f in
>       let arg, state = expr state arg in
>       build_call f [|arg|] "apply" (bb state), state
>
> let defn m vars = function
>   | LetRec(f, arg, body) ->
>       let ty = function_type ty [| ty |] in
>       let fn = define_function f ty m in
>       let vars' = (arg, param fn 0) :: (f, fn) :: vars in
>       let body, state =
>         expr { fn = fn; blk = entry_block fn; vars = vars' } body in
>       build_ret body (bb state) |> ignore;
>       (f, fn) :: vars
>
> let int n = const_int ty n
>
> let main filename =
>   let m = create_module filename in
>
>   let string = pointer_type i8_type in
>
>   let print =
>     declare_function "printf" (var_arg_function_type ty [|string|]) m in
>
>   let main = define_function "main" (function_type ty [| |]) m in
>   let blk = entry_block main in
>   let bb = builder_at_end blk in
>
>   let str s = define_global "buf" (const_stringz s) m in
>   let int_spec = build_gep (str "%d\n") [| int 0; int 0 |] "int_spec" bb
> in
>
>   let vars = List.fold_left (defn m) [] program in
>   let n, _ = expr { fn = main; blk = blk; vars = vars } run in
>
>   build_call print [| int_spec; n |] "" bb |> ignore;
>
>   build_ret (int 0) bb |> ignore;
>
>   if not (Llvm_bitwriter.write_bitcode_file m filename) then exit 1;
>   dispose_module m
>
> let () = match Sys.argv with
>   | [|_; filename|] -> main filename
>   | _ as a -> Printf.eprintf "Usage: %s <file>\n" a.(0)
>
> To use it, simply download and install the latest SVN version of LLVM
> (which
> even builds and installs the OCaml bindings for you!) and then do:
>
> $ ocamlc -g -dtypes -pp camlp4oof -I +camlp4 dynlink.cma camlp4lib.cma -cc
> g++
> llvm.cma llvm_bitwriter.cma minml.ml -o minml
> $ ./minml run.bc <fib.ml
> $ llc -f run.bc -o run.s
> $ gcc run.s -o run
> $ ./run
> 102334155
> $
>
> You can look at the generated intermediate representation with:
>
> $ llvm-dis -f run.bc
> $ cat run.ll
>
> If anyone improves upon this I'd love to hear about it! :-)
>
> --
> Dr Jon D Harrop, Flying Frog Consultancy Ltd.
> http://www.ffconsultancy.com/products/?e
>
> _______________________________________________
> Caml-list mailing list. Subscription management:
> http://yquem.inria.fr/cgi-bin/mailman/listinfo/caml-list
> Archives: http://caml.inria.fr
> Beginner's list: http://groups.yahoo.com/group/ocaml_beginners
> Bug reports: http://caml.inria.fr/bin/caml-bugs
>