| Attached Files | 0001-Optimize-Ccheckbound-during-cmmgen.patch [^] (5,912 bytes) 2011-08-20 17:41 [Show Content] [Hide Content]From 713ddaae591a9d9ae26119548704b7d0be316d2f Mon Sep 17 00:00:00 2001
From: Benedikt Meurer <benedikt.meurer@googlemail.com>
Date: Sat, 20 Aug 2011 12:53:50 +0200
Subject: [PATCH 1/3] Optimize Ccheckbound during cmmgen.
Attempt to replace Cmm constructs like
(checkbound (>>u arg n) m)
where n and m are integer constants with
(checkbound arg (m << m + (1 << n - 1)))
which is both shorter and results in faster code. On amd64 and i386 this
replaces a sequence of mov,shr,cmp,jbe with a sequence of mov,cmp,jbe.
This could be further optimized to a sequence of cmp,jbe in the backends.
Signed-off-by: Benedikt Meurer <benedikt.meurer@googlemail.com>
---
asmcomp/cmmgen.ml | 33 ++++++++++++++++++++-------------
1 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/asmcomp/cmmgen.ml b/asmcomp/cmmgen.ml
index ca9d2f0..b8589d1 100644
--- a/asmcomp/cmmgen.ml
+++ b/asmcomp/cmmgen.ml
@@ -369,6 +369,14 @@ let make_float_alloc tag args =
make_alloc_generic float_array_set tag
(List.length args * size_float / size_addr) args
+(* Bounds checking *)
+
+let make_checkbound dbg = function
+ | [Cop(Clsr, [a1; Cconst_int n]); Cconst_int m] when (m lsl n) > n ->
+ Cop(Ccheckbound dbg, [a1; Cconst_int(m lsl n + 1 lsl n - 1)])
+ | args ->
+ Cop(Ccheckbound dbg, args)
+
(* To compile "let rec" over values *)
let fundecls_size fundecls =
@@ -534,7 +542,7 @@ let bigarray_elt_size = function
let bigarray_indexing unsafe elt_kind layout b args dbg =
let check_bound a1 a2 k =
- if unsafe then k else Csequence(Cop(Ccheckbound dbg, [a1;a2]), k) in
+ if unsafe then k else Csequence(make_checkbound dbg [a1;a2], k) in
let rec ba_indexing dim_ofs delta_ofs = function
[] -> assert false
| [arg] ->
@@ -1207,7 +1215,7 @@ and transl_prim_2 p arg1 arg2 dbg =
(bind "str" (transl arg1) (fun str ->
bind "index" (untag_int (transl arg2)) (fun idx ->
Csequence(
- Cop(Ccheckbound dbg, [string_length str; idx]),
+ make_checkbound dbg [string_length str; idx],
Cop(Cload Byte_unsigned, [add_int str idx])))))
(* Array operations *)
@@ -1231,21 +1239,20 @@ and transl_prim_2 p arg1 arg2 dbg =
bind "arr" (transl arg1) (fun arr ->
bind "header" (header arr) (fun hdr ->
Cifthenelse(is_addr_array_hdr hdr,
- Csequence(Cop(Ccheckbound dbg, [addr_array_length hdr; idx]),
+ Csequence(make_checkbound dbg [addr_array_length hdr; idx],
addr_array_ref arr idx),
- Csequence(Cop(Ccheckbound dbg, [float_array_length hdr; idx]),
+ Csequence(make_checkbound dbg [float_array_length hdr; idx],
float_array_ref arr idx)))))
| Paddrarray | Pintarray ->
bind "index" (transl arg2) (fun idx ->
bind "arr" (transl arg1) (fun arr ->
- Csequence(Cop(Ccheckbound dbg, [addr_array_length(header arr); idx]),
+ Csequence(make_checkbound dbg [addr_array_length(header arr); idx],
addr_array_ref arr idx)))
| Pfloatarray ->
box_float(
bind "index" (transl arg2) (fun idx ->
bind "arr" (transl arg1) (fun arr ->
- Csequence(Cop(Ccheckbound dbg,
- [float_array_length(header arr); idx]),
+ Csequence(make_checkbound dbg [float_array_length(header arr); idx],
unboxed_float_array_ref arr idx))))
end
@@ -1314,7 +1321,7 @@ and transl_prim_3 p arg1 arg2 arg3 dbg =
(bind "str" (transl arg1) (fun str ->
bind "index" (untag_int (transl arg2)) (fun idx ->
Csequence(
- Cop(Ccheckbound dbg, [string_length str; idx]),
+ make_checkbound dbg [string_length str; idx],
Cop(Cstore Byte_unsigned,
[add_int str idx; untag_int(transl arg3)])))))
@@ -1343,25 +1350,25 @@ and transl_prim_3 p arg1 arg2 arg3 dbg =
bind "arr" (transl arg1) (fun arr ->
bind "header" (header arr) (fun hdr ->
Cifthenelse(is_addr_array_hdr hdr,
- Csequence(Cop(Ccheckbound dbg, [addr_array_length hdr; idx]),
+ Csequence(make_checkbound dbg [addr_array_length hdr; idx],
addr_array_set arr idx newval),
- Csequence(Cop(Ccheckbound dbg, [float_array_length hdr; idx]),
+ Csequence(make_checkbound dbg [float_array_length hdr; idx],
float_array_set arr idx
(unbox_float newval)))))))
| Paddrarray ->
bind "index" (transl arg2) (fun idx ->
bind "arr" (transl arg1) (fun arr ->
- Csequence(Cop(Ccheckbound dbg, [addr_array_length(header arr); idx]),
+ Csequence(make_checkbound dbg [addr_array_length(header arr); idx],
addr_array_set arr idx (transl arg3))))
| Pintarray ->
bind "index" (transl arg2) (fun idx ->
bind "arr" (transl arg1) (fun arr ->
- Csequence(Cop(Ccheckbound dbg, [addr_array_length(header arr); idx]),
+ Csequence(make_checkbound dbg [addr_array_length(header arr); idx],
int_array_set arr idx (transl arg3))))
| Pfloatarray ->
bind "index" (transl arg2) (fun idx ->
bind "arr" (transl arg1) (fun arr ->
- Csequence(Cop(Ccheckbound dbg, [float_array_length(header arr);idx]),
+ Csequence(make_checkbound dbg [float_array_length(header arr);idx],
float_array_set arr idx (transl_unbox_float arg3))))
end)
| _ ->
--
1.7.4.4
0002-Optimize-common-case-of-Ccheckbound-for-amd64.patch [^] (3,978 bytes) 2011-08-20 17:41 [Show Content] [Hide Content]From 72dd06ed04118daea9564301f70692ba558eea6d Mon Sep 17 00:00:00 2001
From: Benedikt Meurer <benedikt.meurer@googlemail.com>
Date: Sat, 20 Aug 2011 16:46:48 +0200
Subject: [PATCH 2/3] Optimize common case of Ccheckbound for amd64.
One common case of Ccheckbound on amd64 (considering the optimization
from the previous commit), is:
Ccheckbound, [Cop(Cload _, [loc]); Cconst_int n]
Using the normal code selection, this generates a sequence of mov,cmp,jbe.
But the cmp instruction supports memory operands with immediate operands,
which would be a sequence of cmp,jbe. So we add an Ispecific operation
Icheckboundmem_imm, which does exactly this and is selected appropriately.
Signed-off-by: Benedikt Meurer <benedikt.meurer@googlemail.com>
---
asmcomp/amd64/arch.ml | 4 ++++
asmcomp/amd64/emit.mlp | 4 ++++
asmcomp/amd64/emit_nt.mlp | 4 ++++
asmcomp/amd64/selection.ml | 9 +++++++++
4 files changed, 21 insertions(+), 0 deletions(-)
diff --git a/asmcomp/amd64/arch.ml b/asmcomp/amd64/arch.ml
index 3e8f4b1..cbb133a 100644
--- a/asmcomp/amd64/arch.ml
+++ b/asmcomp/amd64/arch.ml
@@ -40,6 +40,8 @@ type specific_operation =
| Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
| Ifloatarithmem of float_operation * addressing_mode
(* Float arith operation with memory *)
+ | Icheckboundmem_imm of int * addressing_mode
+ (* Check bounds with memory and integer constant *)
and float_operation =
Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv
@@ -109,3 +111,5 @@ let print_specific_operation printreg op ppf arg =
fprintf ppf "%a %s float64[%a]" printreg arg.(0) (op_name op)
(print_addressing printreg addr)
(Array.sub arg 1 (Array.length arg - 1))
+ | Icheckboundmem_imm(n, addr) ->
+ fprintf ppf "%a check > %i" (print_addressing printreg addr) arg n
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp
index a33a0fa..ca3117a 100644
--- a/asmcomp/amd64/emit.mlp
+++ b/asmcomp/amd64/emit.mlp
@@ -478,6 +478,10 @@ let emit_instr fallthrough i =
let lbl = bound_error_label i.dbg in
` cmpq ${emit_int n}, {emit_reg i.arg.(0)}\n`;
` jbe {emit_label lbl}\n`
+ | Lop(Ispecific(Icheckboundmem_imm(n, addr))) ->
+ let lbl = bound_error_label i.dbg in
+ ` cmpq ${emit_int n}, {emit_addressing addr i.arg 0}\n`;
+ ` jbe {emit_label lbl}\n`
| Lop(Iintop(Idiv | Imod)) ->
` cqto\n`;
` idivq {emit_reg i.arg.(1)}\n`
diff --git a/asmcomp/amd64/emit_nt.mlp b/asmcomp/amd64/emit_nt.mlp
index 724d6ee..16e4630 100644
--- a/asmcomp/amd64/emit_nt.mlp
+++ b/asmcomp/amd64/emit_nt.mlp
@@ -480,6 +480,10 @@ let emit_instr fallthrough i =
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, {emit_int n}\n`;
` jbe {emit_label lbl}\n`
+ | Lop(Ispecific(Icheckboundmem_imm(n, addr))) ->
+ let lbl = bound_error_label i.dbg in
+ ` cmp QWORD PTR {emit_addressing addr i.arg 0}, {emit_int n}\n`;
+ ` jbe {emit_label lbl}\n`
| Lop(Iintop(Idiv | Imod)) ->
` cqo\n`;
` idiv {emit_reg i.arg.(1)}\n`
diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
index 4921e51..c376f78 100644
--- a/asmcomp/amd64/selection.ml
+++ b/asmcomp/amd64/selection.ml
@@ -196,6 +196,15 @@ method! select_operation op args =
| _ ->
super#select_operation op args
end
+ (* Recognize checkound with memory and immediate. *)
+ | Ccheckbound _ ->
+ begin match args with
+ [Cop(Cload _, [loc]); Cconst_int n] when self#is_immediate n ->
+ let (addr, arg) = self#select_addressing loc in
+ (Ispecific(Icheckboundmem_imm(n, addr)), [arg])
+ | _ ->
+ super#select_operation op args
+ end
| _ -> super#select_operation op args
(* Recognize float arithmetic with mem *)
--
1.7.4.4
0003-Also-optimize-common-case-of-Ccheckbound-for-i386.patch [^] (3,579 bytes) 2011-08-20 17:41 [Show Content] [Hide Content]From 6b8b5b330cdd448365fbad31076228d7d06afa67 Mon Sep 17 00:00:00 2001
From: Benedikt Meurer <benedikt.meurer@googlemail.com>
Date: Sat, 20 Aug 2011 17:27:30 +0200
Subject: [PATCH 3/3] Also optimize common case of Ccheckbound for i386.
Apply the same optimization to i386 that was introduced with the
previous commit for amd64.
Signed-off-by: Benedikt Meurer <benedikt.meurer@googlemail.com>
---
asmcomp/i386/arch.ml | 4 ++++
asmcomp/i386/emit.mlp | 4 ++++
asmcomp/i386/emit_nt.mlp | 4 ++++
asmcomp/i386/selection.ml | 9 +++++++++
4 files changed, 21 insertions(+), 0 deletions(-)
diff --git a/asmcomp/i386/arch.ml b/asmcomp/i386/arch.ml
index 04d673d..3796bed 100644
--- a/asmcomp/i386/arch.ml
+++ b/asmcomp/i386/arch.ml
@@ -47,6 +47,8 @@ type specific_operation =
(* Float arith operation with memory *)
(* bool: true=64 bits, false=32 *)
| Ifloatspecial of string
+ | Icheckboundmem_imm of int * addressing_mode
+ (* Check bounds with memory and integer constant *)
and float_operation =
Ifloatadd | Ifloatsub | Ifloatsubrev | Ifloatmul | Ifloatdiv | Ifloatdivrev
@@ -144,6 +146,8 @@ let print_specific_operation printreg op ppf arg =
if i > 0 then fprintf ppf ", ";
printreg ppf arg.(i)
done
+ | Icheckboundmem_imm(n, addr) ->
+ fprintf ppf "%a check > %i" (print_addressing printreg addr) arg n
(* Stack alignment constraints *)
diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp
index 881a936..b30adc1 100644
--- a/asmcomp/i386/emit.mlp
+++ b/asmcomp/i386/emit.mlp
@@ -581,6 +581,10 @@ let emit_instr fallthrough i =
let lbl = bound_error_label i.dbg in
` cmpl ${emit_int n}, {emit_reg i.arg.(0)}\n`;
` jbe {emit_label lbl}\n`
+ | Lop(Ispecific(Icheckboundmem_imm(n, addr))) ->
+ let lbl = bound_error_label i.dbg in
+ ` cmpl ${emit_int n}, {emit_addressing addr i.arg 0}\n`;
+ ` jbe {emit_label lbl}\n`
| Lop(Iintop(Idiv | Imod)) ->
` cltd\n`;
` idivl {emit_reg i.arg.(1)}\n`
diff --git a/asmcomp/i386/emit_nt.mlp b/asmcomp/i386/emit_nt.mlp
index 7091b3d..09a7bf5 100644
--- a/asmcomp/i386/emit_nt.mlp
+++ b/asmcomp/i386/emit_nt.mlp
@@ -528,6 +528,10 @@ let emit_instr i =
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, {emit_int n}\n`;
` jbe {emit_label lbl}\n`
+ | Lop(Ispecific(Icheckboundmem_imm(n, addr))) ->
+ let lbl = bound_error_label i.dbg in
+ ` cmp DWORD PTR {emit_addressing addr i.arg 0}, {emit_int n}\n`;
+ ` jbe {emit_label lbl}\n`
| Lop(Iintop(Idiv | Imod)) ->
` cdq\n`;
` idiv {emit_reg i.arg.(1)}\n`
diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml
index 5a8720f..bf37f66 100644
--- a/asmcomp/i386/selection.ml
+++ b/asmcomp/i386/selection.ml
@@ -242,6 +242,15 @@ method! select_operation op args =
| Cextcall(fn, ty_res, false, dbg)
when !fast_math && List.mem fn inline_float_ops ->
(Ispecific(Ifloatspecial fn), args)
+ (* Recognize checkbound with memory and immediate. *)
+ | Ccheckbound _ ->
+ begin match args with
+ [Cop(Cload _, [loc]); Cconst_int n] when self#is_immediate n ->
+ let (addr, arg) = self#select_addressing loc in
+ (Ispecific(Icheckboundmem_imm(n, addr)), [arg])
+ | _ ->
+ super#select_operation op args
+ end
(* Default *)
| _ -> super#select_operation op args
--
1.7.4.4
|