Version française
Home     About     Download     Resources     Contact us    
Browse thread
LLVM: A native-code compiler for MiniML in ~100LOC
[ Home ] [ Index: by date | by threads ]
[ Search: ]

[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
Date: -- (:)
From: Jon Harrop <jon@f...>
Subject: LLVM: A native-code compiler for MiniML in ~100LOC

I recently rediscovered the Low-Level Virtual Machine (LLVM) project that has 
since been adopted by Apple:

  http://llvm.org

This is a library (with OCaml bindings!) that allows you to write a compiler 
that generates their RISC-like intermediate language (IL) that can then be 
compiled to native code. LLVM even supports JIT compilation.

I went through the usual steps in trying this and was extremely impressed with 
the results. After only two days I was able to create an optimizing 
native-code compiler for a subset of CAML large enough to represent the 
following Fibonacci program:

  let rec fib n =
    if n <= 2 then 1 else
      fib(n-1) + fib(n-2)

  do fib 40

The compiler is written entirely in OCaml, using camlp4 for lexing and 
parsing, and the whole thing is only ~100 lines of code!

I'll detail exactly how you can use LLVM from OCaml in a future OCaml Journal 
article:

  http://www.ffconsultancy.com/products/ocaml_journal/?ol

Meanwhile here's my latest source:

type expr =
  | Int of int
  | Var of string
  | BinOp of [ `Add | `Sub | `Leq ] * expr * expr
  | If of expr * expr * expr
  | Apply of expr * expr

type defn =
  | LetRec of string * string * expr

open Camlp4.PreCast

let expr = Gram.Entry.mk "expr"
let defn = Gram.Entry.mk "defn"
let prog = Gram.Entry.mk "prog"

EXTEND Gram
  expr:
  [ [ "if"; p = expr; "then"; t = expr; "else"; f = expr ->
	If(p, t, f) ]
  | [ e1 = expr; "<="; e2 = expr -> BinOp(`Leq, e1, e2) ]
  | [ e1 = expr; "+"; e2 = expr -> BinOp(`Add, e1, e2)
    | e1 = expr; "-"; e2 = expr -> BinOp(`Sub, e1, e2) ]
  | [ f = expr; x = expr -> Apply(f, x) ]
  | [ v = LIDENT -> Var v
    | n = INT -> Int(int_of_string n)
    | "("; e = expr; ")" -> e ] ];
  defn:
  [ [ "let"; "rec"; f = LIDENT; x = LIDENT; "="; body = expr ->
	LetRec(f, x, body) ] ];
  prog:
  [ [ defns = LIST0 defn; "do"; run = expr -> defns, run ] ];
END

open Printf

let program, run =
  try Gram.parse prog Loc.ghost (Stream.of_channel stdin) with
  | Loc.Exc_located(loc, e) ->
      printf "%s at line %d\n" (Printexc.to_string e) (Loc.start_line loc);
      exit 1

open Llvm

let ty = i64_type

let ( |> ) x f = f x

type state =
    { fn: llvalue;
      blk: llbasicblock;
      vars: (string * llvalue) list }

let bb state = builder_at_end state.blk
let new_block state name = append_block name state.fn
let find state v =
  try List.assoc v state.vars with Not_found ->
    eprintf "Unknown variable %s\n" v;
    raise Not_found
let cont (v, state) dest_blk =
  build_br dest_blk (bb state) |> ignore;
  v, state

let rec expr state = function
  | Int n -> const_int ty n, state
  | Var x -> find state x, state
  | BinOp(op, f, g) ->
      let f, state = expr state f in
      let g, state = expr state g in
      let build, name = match op with
	| `Add -> build_add, "add"
	| `Sub -> build_sub, "sub"
	| `Leq -> build_icmp Icmp_sle, "leq" in
      build f g name (bb state), state
  | If(p, t, f) ->
      let t_blk = new_block state "pass" in
      let f_blk = new_block state "fail" in
      let k_blk = new_block state "cont" in
      let cond, state = expr state p in
      build_cond_br cond t_blk f_blk (bb state) |> ignore;
      let t, state = cont (expr { state with blk = t_blk } t) k_blk in
      let f, state = cont (expr { state with blk = f_blk } f) k_blk in
      build_phi [t, t_blk; f, f_blk] "join" (bb state), state
  | Apply(f, arg) ->
      let f, state = expr state f in
      let arg, state = expr state arg in
      build_call f [|arg|] "apply" (bb state), state

let defn m vars = function
  | LetRec(f, arg, body) ->
      let ty = function_type ty [| ty |] in
      let fn = define_function f ty m in
      let vars' = (arg, param fn 0) :: (f, fn) :: vars in
      let body, state =
	expr { fn = fn; blk = entry_block fn; vars = vars' } body in
      build_ret body (bb state) |> ignore;
      (f, fn) :: vars

let int n = const_int ty n

let main filename =
  let m = create_module filename in

  let string = pointer_type i8_type in

  let print =
    declare_function "printf" (var_arg_function_type ty [|string|]) m in

  let main = define_function "main" (function_type ty [| |]) m in
  let blk = entry_block main in
  let bb = builder_at_end blk in

  let str s = define_global "buf" (const_stringz s) m in
  let int_spec = build_gep (str "%d\n") [| int 0; int 0 |] "int_spec" bb in

  let vars = List.fold_left (defn m) [] program in
  let n, _ = expr { fn = main; blk = blk; vars = vars } run in

  build_call print [| int_spec; n |] "" bb |> ignore;

  build_ret (int 0) bb |> ignore;

  if not (Llvm_bitwriter.write_bitcode_file m filename) then exit 1;
  dispose_module m

let () = match Sys.argv with
  | [|_; filename|] -> main filename
  | _ as a -> Printf.eprintf "Usage: %s <file>\n" a.(0)

To use it, simply download and install the latest SVN version of LLVM (which 
even builds and installs the OCaml bindings for you!) and then do:

$ ocamlc -g -dtypes -pp camlp4oof -I +camlp4 dynlink.cma camlp4lib.cma -cc g++ 
llvm.cma llvm_bitwriter.cma minml.ml -o minml
$ ./minml run.bc <fib.ml
$ llc -f run.bc -o run.s
$ gcc run.s -o run
$ ./run
102334155
$

You can look at the generated intermediate representation with:

$ llvm-dis -f run.bc
$ cat run.ll

If anyone improves upon this I'd love to hear about it! :-)

-- 
Dr Jon D Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/products/?e