Browse thread
Value types (Was: [Caml-list] ocamlopt LLVM support)
[
Home
]
[ Index:
by date
|
by threads
]
[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
| Date: | -- (:) |
| From: | Jon Harrop <jon@f...> |
| Subject: | RE: Value types (Was: [Caml-list] ocamlopt LLVM support) |
Török Edwin wrote:
> Do you really need to use Int64 for that though? Won't the 63-bit
> version do?
I'm running 32-bit.
> > I am unable to reproduce your results. Here, the time falls from 24s
> > to 19.5s (using ocamlopt 3.12.0 on Intel x86) which is still 26×
> > slower than HLVM.
Sorry, I'm actually using an Opteron x86 (logged in from an Intel x86!).
> Do you still have 'idiv' in the compiled code? See my attached
> assembly, and compare it with yours please.
> I was doing the test on 64-bit, with ocamlopt 3.11.2 and 3.12.0.
I get what appear to be calls to C code:
camlCollatz__collatzLen_1030:
subl $8, %esp
.L103:
movl %eax, 4(%esp)
movl %ebx, 0(%esp)
pushl $camlCollatz__10
pushl %ebx
movl $caml_equal, %eax
call caml_c_call
.L104:
addl $8, %esp
cmpl $1, %eax
je .L102
movl 4(%esp), %eax
addl $8, %esp
ret
.align 16
.L102:
pushl $camlCollatz__8
movl 4(%esp), %eax
pushl %eax
movl $caml_int64_and, %eax
call caml_c_call
.L105:
addl $8, %esp
pushl $camlCollatz__9
pushl %eax
movl $caml_equal, %eax
call caml_c_call
.L106:
addl $8, %esp
cmpl $1, %eax
je .L101
pushl $3
movl 4(%esp), %eax
pushl %eax
movl $caml_int64_shift_right, %eax
call caml_c_call
.L107:
addl $8, %esp
movl %eax, %ebx
jmp .L100
.align 16
.L101:
movl 0(%esp), %eax
pushl %eax
pushl $camlCollatz__6
movl $caml_int64_mul, %eax
call caml_c_call
.L108:
addl $8, %esp
pushl $camlCollatz__7
pushl %eax
movl $caml_int64_add, %eax
call caml_c_call
.L109:
addl $8, %esp
movl %eax, %ebx
.L100:
movl 4(%esp), %eax
addl $2, %eax
jmp .L103
> FWIW the original code took 2.8 seconds here, so only 4x slower (this
> is an AMD Phenom II x6 1090T CPU). It probably depends how fast/slow
> the 'idiv' is on your CPU.
The performance of idiv is irrelevant here. The bottleneck may be those C calls but I don't understand why they are being generated.
Cheers,
Jon.