Ocamlopt x86-32 and SSE2
From: Jon Harrop
Subject: Re: [Caml-list] Ocamlopt x86-32 and SSE2
On Monday 11 May 2009 09:43:59 Dmitry Bely wrote:
> So it's just pie in the sky. No working implementation has been
> demonstrated since then.

The file "test/CodeGen/Generic/GC/simple_ocaml.ll" in the LLVM 2.5 source 
distribution contains the following test code for the OCaml-compatible 
frametable emitter:

  %struct.obj = type { i8*, %struct.obj* }
  define %struct.obj* @fun(%struct.obj* %head) gc "ocaml" {
          %gcroot.0 = alloca i8*
          %gcroot.1 = alloca i8*
          call void @llvm.gcroot(i8** %gcroot.0, i8* null)
          call void @llvm.gcroot(i8** %gcroot.1, i8* null)
          %local.0 = bitcast i8** %gcroot.0 to %struct.obj**
          %local.1 = bitcast i8** %gcroot.1 to %struct.obj**
          store %struct.obj* %head, %struct.obj** %local.0
          br label %bb.loop
          %t0 = load %struct.obj** %local.0
          %t1 = getelementptr %struct.obj* %t0, i32 0, i32 1
          %t2 = bitcast %struct.obj* %t0 to i8*
          %t3 = bitcast %struct.obj** %t1 to i8**
          %t4 = call i8* @llvm.gcread(i8* %t2, i8** %t3)
          %t5 = bitcast i8* %t4 to %struct.obj*
          %t6 = icmp eq %struct.obj* %t5, null
          br i1 %t6, label %bb.loop, label %bb.end
          %t7 = malloc %struct.obj
          store %struct.obj* %t7, %struct.obj** %local.1
          %t8 = bitcast %struct.obj* %t7 to i8*
          %t9 = load %struct.obj** %local.0
          %t10 = getelementptr %struct.obj* %t9, i32 0, i32 1
          %t11 = bitcast %struct.obj* %t9 to i8*
          %t12 = bitcast %struct.obj** %t10 to i8**
          call void @llvm.gcwrite(i8* %t8, i8* %t11, i8** %t12)
          ret %struct.obj* %t7
  declare void @llvm.gcroot(i8** %value, i8* %tag)
  declare void @llvm.gcwrite(i8* %value, i8* %obj, i8** %field)
  declare i8* @llvm.gcread(i8* %obj, i8** %field)

Compiling this with:

  llvm-as <simple_ocaml.ll | llc


          .file	"<stdin>"
          .globl	caml<stdin>__code_begin
          .globl	caml<stdin>__data_begin
          .align	16
          .globl	fun
          .type	fun,@function
          subl	$12, %esp
          movl	$0, 8(%esp)
          movl	$0, 4(%esp)
          movl	16(%esp), %eax
          movl	%eax, 8(%esp)
          .align	16
  .LBB1_1:	# bb.loop
          movl	8(%esp), %eax
          cmpl	$0, 4(%eax)
          je	.LBB1_1	# bb.loop
  .LBB1_2:	# bb.end
          movl	$8, (%esp)
          call	malloc
          movl	%eax, 4(%esp)
          movl	8(%esp), %ecx
          movl	%eax, 4(%ecx)
          addl	$12, %esp
          .size	fun, .-fun
          .section	.eh_frame,"aw",@progbits
          .long	.Leh_frame_common_end-.Leh_frame_common_begin
          .long	0x0
          .byte	0x1
          .asciz	"zR"
          .uleb128	1
          .sleb128	-4
          .byte	0x8
          .uleb128	1
          .byte	0x1B
          .byte	0xC
          .uleb128	4
          .uleb128	4
          .byte	0x88
          .uleb128	1
          .align	4
          .long	.Leh_frame_end1-.Leh_frame_begin1
          .long	.Leh_frame_begin1-.Leh_frame_common
          .long	.Leh_func_begin1-.
          .long	.Leh_func_end1-.Leh_func_begin1
          .uleb128	0
          .byte	0xE
          .uleb128	16
          .byte	0x4
          .long	.Llabel1-.Leh_func_begin1
          .byte	0xD
          .uleb128	4
          .align	4
          .globl	caml<stdin>__code_end
          .globl	caml<stdin>__data_end
          .long	0
          .globl	caml<stdin>__frametable
          # live roots for fun
          .long	.Llabel2
          .short	0xC
          .short	0x2
          .word	8
          .word	4
          .align	4
          .section	.note.GNU-stack,"",@progbits

So perhaps it is worth a look.

Dr Jon Harrop, Flying Frog Consultancy Ltd.