Attached Files | 0001-Add-a-boolean-operation-interpretation.-Use-it-to-de.patch [^] (30,119 bytes) 2012-10-05 19:50 [Show Content] [Hide Content]From f2a94e4a9fa97e5eb30f0eb77b20671b9aaf705c Mon Sep 17 00:00:00 2001
From: Pierre Chambart <pierre.chambart@ocamlpro.org>
Date: Fri, 5 Oct 2012 19:35:22 +0200
Subject: [PATCH] Add a boolean operation interpretation. Use it to detect
bswap and optimise it on amd64
---
.depend | 63 ++++----
Makefile | 2 +-
asmcomp/amd64/arch.ml | 4 +
asmcomp/amd64/emit.mlp | 9 ++
asmcomp/amd64/selection.ml | 3 +-
asmcomp/boolsimplif.ml | 359 ++++++++++++++++++++++++++++++++++++++++++++
asmcomp/boolsimplif.mli | 30 ++++
asmcomp/cmmgen.ml | 4 +-
8 files changed, 442 insertions(+), 32 deletions(-)
create mode 100644 asmcomp/boolsimplif.ml
create mode 100644 asmcomp/boolsimplif.mli
diff --git a/.depend b/.depend
index e61be55..1228e32 100644
--- a/.depend
+++ b/.depend
@@ -90,9 +90,9 @@ typing/parmatch.cmi : typing/types.cmi typing/typedtree.cmi typing/path.cmi \
typing/path.cmi : typing/ident.cmi
typing/predef.cmi : typing/types.cmi typing/path.cmi typing/ident.cmi
typing/primitive.cmi :
+typing/printtyped.cmi : typing/typedtree.cmi
typing/printtyp.cmi : typing/types.cmi typing/path.cmi \
typing/outcometree.cmi parsing/longident.cmi typing/ident.cmi
-typing/printtyped.cmi : typing/typedtree.cmi
typing/stypes.cmi : typing/typedtree.cmi parsing/location.cmi \
typing/annot.cmi
typing/subst.cmi : typing/types.cmi typing/path.cmi typing/ident.cmi
@@ -214,6 +214,12 @@ typing/predef.cmx : typing/types.cmx typing/path.cmx parsing/location.cmx \
typing/ident.cmx typing/btype.cmx parsing/asttypes.cmi typing/predef.cmi
typing/primitive.cmo : utils/misc.cmi typing/primitive.cmi
typing/primitive.cmx : utils/misc.cmx typing/primitive.cmi
+typing/printtyped.cmo : typing/typedtree.cmi typing/path.cmi \
+ parsing/longident.cmi parsing/location.cmi typing/ident.cmi \
+ parsing/asttypes.cmi typing/printtyped.cmi
+typing/printtyped.cmx : typing/typedtree.cmx typing/path.cmx \
+ parsing/longident.cmx parsing/location.cmx typing/ident.cmx \
+ parsing/asttypes.cmi typing/printtyped.cmi
typing/printtyp.cmo : typing/types.cmi typing/primitive.cmi \
typing/predef.cmi typing/path.cmi typing/outcometree.cmi \
typing/oprint.cmi utils/misc.cmi parsing/longident.cmi typing/ident.cmi \
@@ -224,12 +230,6 @@ typing/printtyp.cmx : typing/types.cmx typing/primitive.cmx \
typing/oprint.cmx utils/misc.cmx parsing/longident.cmx typing/ident.cmx \
typing/env.cmx typing/ctype.cmx utils/clflags.cmx typing/btype.cmx \
parsing/asttypes.cmi typing/printtyp.cmi
-typing/printtyped.cmo : typing/typedtree.cmi typing/path.cmi \
- parsing/longident.cmi parsing/location.cmi typing/ident.cmi \
- parsing/asttypes.cmi typing/printtyped.cmi
-typing/printtyped.cmx : typing/typedtree.cmx typing/path.cmx \
- parsing/longident.cmx parsing/location.cmx typing/ident.cmx \
- parsing/asttypes.cmi typing/printtyped.cmi
typing/stypes.cmo : typing/typedtree.cmi typing/printtyp.cmi \
parsing/location.cmi utils/clflags.cmi typing/annot.cmi typing/stypes.cmi
typing/stypes.cmx : typing/typedtree.cmx typing/printtyp.cmx \
@@ -533,12 +533,13 @@ asmcomp/asmgen.cmi : bytecomp/lambda.cmi asmcomp/cmm.cmi
asmcomp/asmlibrarian.cmi :
asmcomp/asmlink.cmi : asmcomp/cmx_format.cmi
asmcomp/asmpackager.cmi :
+asmcomp/boolsimplif.cmi : asmcomp/cmm.cmi
asmcomp/clambda.cmi : bytecomp/lambda.cmi typing/ident.cmi \
asmcomp/debuginfo.cmi parsing/asttypes.cmi
asmcomp/closure.cmi : bytecomp/lambda.cmi asmcomp/clambda.cmi
-asmcomp/cmm.cmi : typing/ident.cmi asmcomp/debuginfo.cmi
asmcomp/cmmgen.cmi : asmcomp/cmx_format.cmi asmcomp/cmm.cmi \
asmcomp/clambda.cmi
+asmcomp/cmm.cmi : typing/ident.cmi asmcomp/debuginfo.cmi
asmcomp/cmx_format.cmi : asmcomp/clambda.cmi
asmcomp/codegen.cmi : asmcomp/cmm.cmi
asmcomp/coloring.cmi :
@@ -546,8 +547,8 @@ asmcomp/comballoc.cmi : asmcomp/mach.cmi
asmcomp/compilenv.cmi : bytecomp/lambda.cmi typing/ident.cmi \
asmcomp/cmx_format.cmi asmcomp/clambda.cmi
asmcomp/debuginfo.cmi : parsing/location.cmi bytecomp/lambda.cmi
-asmcomp/emit.cmi : asmcomp/linearize.cmi asmcomp/cmm.cmi
asmcomp/emitaux.cmi : asmcomp/debuginfo.cmi
+asmcomp/emit.cmi : asmcomp/linearize.cmi asmcomp/cmm.cmi
asmcomp/interf.cmi : asmcomp/mach.cmi
asmcomp/linearize.cmi : asmcomp/reg.cmi asmcomp/mach.cmi \
asmcomp/debuginfo.cmi
@@ -560,8 +561,8 @@ asmcomp/printlinear.cmi : asmcomp/linearize.cmi
asmcomp/printmach.cmi : asmcomp/reg.cmi asmcomp/mach.cmi
asmcomp/proc.cmi : asmcomp/reg.cmi asmcomp/mach.cmi
asmcomp/reg.cmi : asmcomp/cmm.cmi
-asmcomp/reload.cmi : asmcomp/mach.cmi
asmcomp/reloadgen.cmi : asmcomp/reg.cmi asmcomp/mach.cmi
+asmcomp/reload.cmi : asmcomp/mach.cmi
asmcomp/schedgen.cmi : asmcomp/mach.cmi asmcomp/linearize.cmi
asmcomp/scheduling.cmi : asmcomp/linearize.cmi
asmcomp/selectgen.cmi : utils/tbl.cmi asmcomp/reg.cmi asmcomp/mach.cmi \
@@ -619,6 +620,10 @@ asmcomp/asmpackager.cmx : typing/typemod.cmx bytecomp/translmod.cmx \
asmcomp/cmx_format.cmi utils/clflags.cmx asmcomp/clambda.cmx \
utils/ccomp.cmx asmcomp/asmlink.cmx asmcomp/asmgen.cmx \
asmcomp/asmpackager.cmi
+asmcomp/boolsimplif.cmo : asmcomp/cmm.cmi asmcomp/arch.cmo \
+ asmcomp/boolsimplif.cmi
+asmcomp/boolsimplif.cmx : asmcomp/cmm.cmx asmcomp/arch.cmx \
+ asmcomp/boolsimplif.cmi
asmcomp/clambda.cmo : bytecomp/lambda.cmi typing/ident.cmi \
asmcomp/debuginfo.cmi parsing/asttypes.cmi asmcomp/clambda.cmi
asmcomp/clambda.cmx : bytecomp/lambda.cmx typing/ident.cmx \
@@ -626,15 +631,11 @@ asmcomp/clambda.cmx : bytecomp/lambda.cmx typing/ident.cmx \
asmcomp/closure.cmo : utils/tbl.cmi bytecomp/switch.cmi typing/primitive.cmi \
utils/misc.cmi bytecomp/lambda.cmi typing/ident.cmi asmcomp/debuginfo.cmi \
asmcomp/compilenv.cmi utils/clflags.cmi asmcomp/clambda.cmi \
- parsing/asttypes.cmi asmcomp/closure.cmi
+ parsing/asttypes.cmi asmcomp/arch.cmo asmcomp/closure.cmi
asmcomp/closure.cmx : utils/tbl.cmx bytecomp/switch.cmx typing/primitive.cmx \
utils/misc.cmx bytecomp/lambda.cmx typing/ident.cmx asmcomp/debuginfo.cmx \
asmcomp/compilenv.cmx utils/clflags.cmx asmcomp/clambda.cmx \
- parsing/asttypes.cmi asmcomp/closure.cmi
-asmcomp/cmm.cmo : typing/ident.cmi asmcomp/debuginfo.cmi asmcomp/arch.cmo \
- asmcomp/cmm.cmi
-asmcomp/cmm.cmx : typing/ident.cmx asmcomp/debuginfo.cmx asmcomp/arch.cmx \
- asmcomp/cmm.cmi
+ parsing/asttypes.cmi asmcomp/arch.cmx asmcomp/closure.cmi
asmcomp/cmmgen.cmo : typing/types.cmi bytecomp/switch.cmi asmcomp/proc.cmi \
typing/primitive.cmi utils/misc.cmi bytecomp/lambda.cmi typing/ident.cmi \
asmcomp/debuginfo.cmi utils/config.cmi asmcomp/compilenv.cmi \
@@ -647,6 +648,10 @@ asmcomp/cmmgen.cmx : typing/types.cmx bytecomp/switch.cmx asmcomp/proc.cmx \
asmcomp/cmx_format.cmi asmcomp/cmm.cmx utils/clflags.cmx \
asmcomp/clambda.cmx parsing/asttypes.cmi asmcomp/arch.cmx \
asmcomp/cmmgen.cmi
+asmcomp/cmm.cmo : typing/ident.cmi asmcomp/debuginfo.cmi asmcomp/arch.cmo \
+ asmcomp/cmm.cmi
+asmcomp/cmm.cmx : typing/ident.cmx asmcomp/debuginfo.cmx asmcomp/arch.cmx \
+ asmcomp/cmm.cmi
asmcomp/codegen.cmo : asmcomp/split.cmi asmcomp/spill.cmi asmcomp/reload.cmi \
asmcomp/reg.cmi asmcomp/printmach.cmi asmcomp/printlinear.cmi \
asmcomp/printcmm.cmi asmcomp/liveness.cmi asmcomp/linearize.cmi \
@@ -673,6 +678,12 @@ asmcomp/debuginfo.cmo : parsing/location.cmi bytecomp/lambda.cmi \
asmcomp/debuginfo.cmi
asmcomp/debuginfo.cmx : parsing/location.cmx bytecomp/lambda.cmx \
asmcomp/debuginfo.cmi
+asmcomp/emitaux.cmo : asmcomp/reg.cmi asmcomp/linearize.cmi \
+ asmcomp/debuginfo.cmi utils/config.cmi asmcomp/cmm.cmi utils/clflags.cmi \
+ asmcomp/arch.cmo asmcomp/emitaux.cmi
+asmcomp/emitaux.cmx : asmcomp/reg.cmx asmcomp/linearize.cmx \
+ asmcomp/debuginfo.cmx utils/config.cmx asmcomp/cmm.cmx utils/clflags.cmx \
+ asmcomp/arch.cmx asmcomp/emitaux.cmi
asmcomp/emit.cmo : asmcomp/reg.cmi asmcomp/proc.cmi utils/misc.cmi \
asmcomp/mach.cmi asmcomp/linearize.cmi asmcomp/emitaux.cmi \
asmcomp/debuginfo.cmi utils/config.cmi asmcomp/compilenv.cmi \
@@ -681,12 +692,6 @@ asmcomp/emit.cmx : asmcomp/reg.cmx asmcomp/proc.cmx utils/misc.cmx \
asmcomp/mach.cmx asmcomp/linearize.cmx asmcomp/emitaux.cmx \
asmcomp/debuginfo.cmx utils/config.cmx asmcomp/compilenv.cmx \
asmcomp/cmm.cmx utils/clflags.cmx asmcomp/arch.cmx asmcomp/emit.cmi
-asmcomp/emitaux.cmo : asmcomp/reg.cmi asmcomp/linearize.cmi \
- asmcomp/debuginfo.cmi utils/config.cmi asmcomp/cmm.cmi utils/clflags.cmi \
- asmcomp/arch.cmo asmcomp/emitaux.cmi
-asmcomp/emitaux.cmx : asmcomp/reg.cmx asmcomp/linearize.cmx \
- asmcomp/debuginfo.cmx utils/config.cmx asmcomp/cmm.cmx utils/clflags.cmx \
- asmcomp/arch.cmx asmcomp/emitaux.cmi
asmcomp/interf.cmo : asmcomp/reg.cmi asmcomp/proc.cmi utils/misc.cmi \
asmcomp/mach.cmi asmcomp/interf.cmi
asmcomp/interf.cmx : asmcomp/reg.cmx asmcomp/proc.cmx utils/misc.cmx \
@@ -735,14 +740,14 @@ asmcomp/proc.cmx : asmcomp/reg.cmx utils/misc.cmx asmcomp/mach.cmx \
asmcomp/arch.cmx asmcomp/proc.cmi
asmcomp/reg.cmo : asmcomp/cmm.cmi asmcomp/reg.cmi
asmcomp/reg.cmx : asmcomp/cmm.cmx asmcomp/reg.cmi
-asmcomp/reload.cmo : asmcomp/reloadgen.cmi asmcomp/reg.cmi asmcomp/mach.cmi \
- asmcomp/cmm.cmi utils/clflags.cmi asmcomp/arch.cmo asmcomp/reload.cmi
-asmcomp/reload.cmx : asmcomp/reloadgen.cmx asmcomp/reg.cmx asmcomp/mach.cmx \
- asmcomp/cmm.cmx utils/clflags.cmx asmcomp/arch.cmx asmcomp/reload.cmi
asmcomp/reloadgen.cmo : asmcomp/reg.cmi utils/misc.cmi asmcomp/mach.cmi \
asmcomp/reloadgen.cmi
asmcomp/reloadgen.cmx : asmcomp/reg.cmx utils/misc.cmx asmcomp/mach.cmx \
asmcomp/reloadgen.cmi
+asmcomp/reload.cmo : asmcomp/reloadgen.cmi asmcomp/reg.cmi asmcomp/mach.cmi \
+ asmcomp/cmm.cmi utils/clflags.cmi asmcomp/arch.cmo asmcomp/reload.cmi
+asmcomp/reload.cmx : asmcomp/reloadgen.cmx asmcomp/reg.cmx asmcomp/mach.cmx \
+ asmcomp/cmm.cmx utils/clflags.cmx asmcomp/arch.cmx asmcomp/reload.cmi
asmcomp/schedgen.cmo : asmcomp/reg.cmi utils/misc.cmi asmcomp/mach.cmi \
asmcomp/linearize.cmi asmcomp/cmm.cmi asmcomp/arch.cmo \
asmcomp/schedgen.cmi
@@ -775,8 +780,8 @@ asmcomp/split.cmx : asmcomp/reg.cmx utils/misc.cmx asmcomp/mach.cmx \
asmcomp/split.cmi
driver/compile.cmi : typing/env.cmi
driver/errors.cmi :
-driver/main.cmi :
driver/main_args.cmi :
+driver/main.cmi :
driver/optcompile.cmi : typing/env.cmi
driver/opterrors.cmi :
driver/optmain.cmi :
@@ -811,6 +816,8 @@ driver/errors.cmx : utils/warnings.cmx typing/typetexp.cmx \
typing/includemod.cmx typing/env.cmx typing/ctype.cmx \
typing/cmi_format.cmx bytecomp/bytepackager.cmx bytecomp/bytelink.cmx \
bytecomp/bytelibrarian.cmx driver/errors.cmi
+driver/main_args.cmo : utils/warnings.cmi driver/main_args.cmi
+driver/main_args.cmx : utils/warnings.cmx driver/main_args.cmi
driver/main.cmo : utils/warnings.cmi utils/misc.cmi driver/main_args.cmi \
parsing/location.cmi driver/errors.cmi utils/config.cmi \
driver/compile.cmi utils/clflags.cmi bytecomp/bytepackager.cmi \
@@ -819,8 +826,6 @@ driver/main.cmx : utils/warnings.cmx utils/misc.cmx driver/main_args.cmx \
parsing/location.cmx driver/errors.cmx utils/config.cmx \
driver/compile.cmx utils/clflags.cmx bytecomp/bytepackager.cmx \
bytecomp/bytelink.cmx bytecomp/bytelibrarian.cmx driver/main.cmi
-driver/main_args.cmo : utils/warnings.cmi driver/main_args.cmi
-driver/main_args.cmx : utils/warnings.cmx driver/main_args.cmi
driver/optcompile.cmo : utils/warnings.cmi typing/typemod.cmi \
typing/typedtree.cmi bytecomp/translmod.cmi typing/stypes.cmi \
bytecomp/simplif.cmi typing/printtyp.cmi bytecomp/printlambda.cmi \
diff --git a/Makefile b/Makefile
index 7ac24ec..0634e76 100644
--- a/Makefile
+++ b/Makefile
@@ -77,7 +77,7 @@ ASMCOMP=asmcomp/arch.cmo asmcomp/debuginfo.cmo \
asmcomp/cmm.cmo asmcomp/printcmm.cmo \
asmcomp/reg.cmo asmcomp/mach.cmo asmcomp/proc.cmo \
asmcomp/clambda.cmo asmcomp/printclambda.cmo asmcomp/compilenv.cmo \
- asmcomp/closure.cmo asmcomp/cmmgen.cmo \
+ asmcomp/closure.cmo asmcomp/cmmgen.cmo asmcomp/boolsimplif.cmo \
asmcomp/printmach.cmo asmcomp/selectgen.cmo asmcomp/selection.cmo \
asmcomp/comballoc.cmo asmcomp/liveness.cmo \
asmcomp/spill.cmo asmcomp/split.cmo \
diff --git a/asmcomp/amd64/arch.ml b/asmcomp/amd64/arch.ml
index 4770c3f..ff57816 100644
--- a/asmcomp/amd64/arch.ml
+++ b/asmcomp/amd64/arch.ml
@@ -40,6 +40,8 @@ type specific_operation =
| Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
| Ifloatarithmem of float_operation * addressing_mode
(* Float arith operation with memory *)
+ | Ibswap of int (* endiannes conversion *)
+
and float_operation =
Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv
@@ -115,3 +117,5 @@ let print_specific_operation printreg op ppf arg =
fprintf ppf "%a %s float64[%a]" printreg arg.(0) (op_name op)
(print_addressing printreg addr)
(Array.sub arg 1 (Array.length arg - 1))
+ | Ibswap i ->
+ fprintf ppf "bswap_%i %a" i printreg arg.(0)
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp
index 2ff57dd..8e8d932 100644
--- a/asmcomp/amd64/emit.mlp
+++ b/asmcomp/amd64/emit.mlp
@@ -546,6 +546,15 @@ let emit_instr fallthrough i =
` addq ${emit_int n}, {emit_addressing addr i.arg 0}\n`
| Lop(Ispecific(Ifloatarithmem(op, addr))) ->
` {emit_string(instr_for_floatarithmem op)} {emit_addressing addr i.arg 1}, {emit_reg i.res.(0)}\n`
+ | Lop(Ispecific(Ibswap size)) ->
+ begin match size with
+ | 32 ->
+ ` bswap {emit_reg i.res.(0)}\n`;
+ ` sarq $32, {emit_reg i.res.(0)}\n`
+ | 64 ->
+ ` bswap {emit_reg i.res.(0)}\n`
+ | _ -> assert false
+ end
| Lreloadretaddr ->
()
| Lreturn ->
diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
index 9c4464a..85b7fc8 100644
--- a/asmcomp/amd64/selection.ml
+++ b/asmcomp/amd64/selection.ml
@@ -88,7 +88,8 @@ let pseudoregs_for_operation op arg res =
([|res.(0); arg.(1)|], res)
(* One-address unary operations: arg.(0) and res.(0) must be the same *)
| Iintop_imm((Iadd|Isub|Imul|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _)
- | Iabsf | Inegf ->
+ | Iabsf | Inegf
+ | Ispecific(Ibswap _) ->
(res, res)
| Ispecific(Ifloatarithmem(_,_)) ->
let arg' = Array.copy arg in
diff --git a/asmcomp/boolsimplif.ml b/asmcomp/boolsimplif.ml
new file mode 100644
index 0000000..5945c6e
--- /dev/null
+++ b/asmcomp/boolsimplif.ml
@@ -0,0 +1,359 @@
+open Cmm
+
+(** Represents a value {v} computed from the value returned by the expression {base}
+ and applying the bit manipulation represented by {bits} *)
+type moves = {
+ base : expression; (** the original value *)
+ bits : bit_state array; (** correspondance between each bits {v} and {base} *)
+}
+
+and bit_state =
+ | Set of bool (** fixed value *)
+ | Var of int * bool (** (position, negated)
+ If negated = false, this represents the value of the bit {position} of base.
+ if negated = true this is its negation *)
+ | Undefined (** value that can't be represented as a boolean or a bit position *)
+
+let bit_array i =
+ let bit n =
+ if Nativeint.logand (Nativeint.shift_right_logical i n) 0b1n = 0b1n
+ then true
+ else false
+ in
+ Array.init (Arch.size_int*8) bit
+
+let apply_shift_left moves n =
+ { moves with
+ bits = Array.init (Arch.size_int*8)
+ (fun i -> if i - n < 0 then Set false else moves.bits.(i - n)) }
+
+let apply_shift_right_logical moves n =
+ { moves with
+ bits = Array.init (Arch.size_int*8)
+ (fun i -> if i + n >= (Arch.size_int*8) then Set false else moves.bits.(i + n)) }
+
+let apply_shift_right moves n =
+ { moves with
+ bits = Array.init (Arch.size_int*8)
+ (fun i -> if i + n >= (Arch.size_int*8)
+ then moves.bits.(Arch.size_int*8-1)
+ else moves.bits.(i + n)) }
+
+let not_bit = function
+ | Set b -> Set (not b)
+ | Undefined -> Undefined
+ | Var (i,b) -> Var (i,not b)
+
+(** Bit sum: var + b1 + b2 knowing that b1 and b2 are fixed bits
+ Returns the couple (sum bit, carry bit) *)
+let add_var_bits var b1 b2 =
+ match b1, b2 with
+ | false, false -> var, Set false
+ | true, false
+ | false, true -> not_bit var, var
+ | true, true -> var, Set true
+
+(** Bit sum: var + Var(i,neg1) + Var(i,neg2)
+ Returns the couple (sum bit, carry bit) *)
+let add_same i neg1 neg2 var =
+ if neg1 = neg2
+ then var, Var (i,neg1)
+ else add_var_bits var true false
+
+(** Bit sum: bit1 + bit2 + bit3
+ Returns the couple (sum bit, carry bit) *)
+let add_bits bit1 bit2 bit3 =
+ match bit1, bit2, bit3 with
+ | Set b1, Set b2, var
+ | Set b1, var, Set b2
+ | var, Set b1, Set b2 -> add_var_bits var b1 b2
+ | Var (i1,neg1), Var (i2,neg2), var
+ | Var (i1,neg1), var, Var (i2,neg2)
+ | var, Var (i1,neg1), Var (i2,neg2) ->
+ if i1 = i2
+ then add_same i1 neg1 neg2 var
+ else
+ begin match var with
+ | Var (i3, neg3) when i3 = i1 ->
+ add_same i1 neg1 neg3 (Var (i2,neg2))
+ | Var (i3, neg3) when i3 = i2 ->
+ add_same i2 neg1 neg3 (Var (i1,neg1))
+ | _ -> Undefined, Undefined
+ end
+ | _, _, _ -> Undefined, Undefined
+
+(** sum bits1 + bits2 assuming they have the same {base} expression *)
+let apply_add_same_var bits1 bits2 =
+ assert(Array.length bits1 = Array.length bits2);
+ let len = Array.length bits1 in
+ let rec aux i carry =
+ if i = len
+ then []
+ else
+ let res, carry = add_bits carry bits1.(i) bits2.(i) in
+ res::(aux (i+1) carry)
+ in
+ Array.of_list (aux 0 (Set false))
+
+let apply_not moves = { moves with bits = Array.map not_bit moves.bits }
+
+(** bitwise and of bits1 and bits2 assuming they have the same {base} expression *)
+let apply_and_same_var bits1 bits2 =
+ assert(Array.length bits1 = Array.length bits2);
+ let aux b1 b2 =
+ match b1,b2 with
+ | Set false, _
+ | _, Set false -> Set false
+ | Set true, b
+ | b, Set true -> b
+ | Var (v1, neg1), Var (v2, neg2) when v1 = v2 ->
+ if neg1 = neg2
+ then Var (v1, neg1)
+ else Set false
+ | _ -> Undefined
+ in
+ Array.mapi (fun i b2 -> aux bits1.(i) b2) bits2
+
+(** bitwise or of bits1 and bits2 assuming they have the same {base} expression *)
+let apply_or_same_var bits1 bits2 =
+ assert(Array.length bits1 = Array.length bits2);
+ let aux b1 b2 =
+ match b1,b2 with
+ | Set true, _
+ | _, Set true -> Set true
+ | Set false, b
+ | b, Set false -> b
+ | Var (v1, neg1), Var (v2, neg2) when v1 = v2 ->
+ if neg1 = neg2
+ then Var (v1, neg1)
+ else Set true
+ | _ -> Undefined
+ in
+ Array.mapi (fun i b2 -> aux bits1.(i) b2) bits2
+
+(** bitwise xor of bits1 and bits2 assuming they have the same {base} expression *)
+let apply_xor_same_var bits1 bits2 =
+ assert(Array.length bits1 = Array.length bits2);
+ let aux b1 b2 =
+ match b1,b2 with
+ | Set true, b
+ | b, Set true -> not_bit b
+ | Set false, b
+ | b, Set false -> b
+ | Var (v1, neg1), Var (v2, neg2) when v1 = v2 ->
+ if neg1 = neg2
+ then Set false
+ else Set true
+ | _ -> Undefined
+ in
+ Array.mapi (fun i b2 -> aux bits1.(i) b2) bits2
+
+let init_var base =
+ { base;
+ bits = Array.init (Arch.size_int*8) (fun i -> Var (i,false)) }
+
+let const_bits v = Array.map (fun b -> Set b) (bit_array v)
+
+let init_const v =
+ { base = Cconst_natint v;
+ bits = const_bits v }
+
+let has_undefined v =
+ let bits = v.bits in
+ let len = Array.length bits in
+ let rec aux i =
+ if i = len
+ then false
+ else
+ if bits.(i) = Undefined
+ then true
+ else aux (i+1)
+ in
+ aux 0
+
+let check_undefined exp v =
+ if has_undefined v
+ then init_var exp
+ else v
+
+let rec bits_moves exp =
+ match exp with
+ | Cop(Cand, [v1; v2]) ->
+ and_moves exp v1 v2
+ | Cop(Cor, [v1; v2]) ->
+ or_moves exp v1 v2
+ | Cop(Cxor, [v1; v2]) ->
+ xor_moves exp v1 v2
+ | Cop(Casr, [v; Cconst_int i]) ->
+ let bits = bits_moves v in
+ apply_shift_right bits i
+ | Cop(Clsr, [v; Cconst_int i]) ->
+ let bits = bits_moves v in
+ apply_shift_right_logical bits i
+ | Cop(Clsl, [v; Cconst_int i]) ->
+ let bits = bits_moves v in
+ apply_shift_left bits i
+ | Cop(Caddi, [v1; v2]) ->
+ add_moves exp v1 v2
+ | Cop(Csubi, [v1; Cconst_int i]) ->
+ add_moves exp v1 (Cconst_int (-i))
+ | Cop(Csubi, [v1; Cconst_natint i]) ->
+ add_moves exp v1 (Cconst_natint (Nativeint.neg i))
+ | Cop(Csubi, [Cconst_int 2; v]) ->
+ apply_not (bits_moves v)
+ | Cconst_int i ->
+ init_const (Nativeint.of_int i)
+ | Cconst_natint i ->
+ init_const i
+ | exp -> init_var exp
+
+and add_moves exp v1 v2 =
+ let const_add v i =
+ let mask = const_bits i in
+ let moves = bits_moves v in
+ check_undefined exp { moves with bits = apply_add_same_var moves.bits mask } in
+ match v1, v2 with
+ | Cconst_int i, v
+ | v, Cconst_int i ->
+ const_add v (Nativeint.of_int i)
+ | Cconst_natint i, v
+ | v, Cconst_natint i ->
+ const_add v i
+ | v1, v2 ->
+ let moves1 = bits_moves v1 in
+ let moves2 = bits_moves v2 in
+ if moves1.base = moves2.base
+ then check_undefined exp
+ { moves1 with bits = apply_add_same_var moves1.bits moves2.bits }
+ else init_var exp
+
+and and_moves exp v1 v2 =
+ let const_and v i =
+ let mask = const_bits i in
+ let moves = bits_moves v in
+ { moves with bits = apply_and_same_var moves.bits mask } in
+ match v1, v2 with
+ | Cconst_int i, v
+ | v, Cconst_int i ->
+ const_and v (Nativeint.of_int i)
+ | Cconst_natint i, v
+ | v, Cconst_natint i ->
+ const_and v i
+ | v1, v2 ->
+ let moves1 = bits_moves v1 in
+ let moves2 = bits_moves v2 in
+ if moves1.base = moves2.base
+ then check_undefined exp
+ { moves1 with bits = apply_and_same_var moves1.bits moves2.bits }
+ else init_var exp
+
+and or_moves exp v1 v2 =
+ let const_or v i =
+ let mask = const_bits i in
+ let moves = bits_moves v in
+ { moves with bits = apply_or_same_var moves.bits mask } in
+ match v1, v2 with
+ | Cconst_int i, v
+ | v, Cconst_int i ->
+ const_or v (Nativeint.of_int i)
+ | Cconst_natint i, v
+ | v, Cconst_natint i ->
+ const_or v i
+ | v1, v2 ->
+ let moves1 = bits_moves v1 in
+ let moves2 = bits_moves v2 in
+ if moves1.base = moves2.base
+ then check_undefined exp
+ { moves1 with bits = apply_or_same_var moves1.bits moves2.bits }
+ else init_var exp
+
+and xor_moves exp v1 v2 =
+ let const_xor v i =
+ let mask = const_bits i in
+ let moves = bits_moves v in
+ { moves with bits = apply_xor_same_var moves.bits mask } in
+ match v1, v2 with
+ | Cconst_int i, v
+ | v, Cconst_int i ->
+ const_xor v (Nativeint.of_int i)
+ | Cconst_natint i, v
+ | v, Cconst_natint i ->
+ const_xor v i
+ | v1, v2 ->
+ let moves1 = bits_moves v1 in
+ let moves2 = bits_moves v2 in
+ if moves1.base = moves2.base
+ then check_undefined exp
+ { moves1 with bits = apply_xor_same_var moves1.bits moves2.bits }
+ else init_var exp
+
+let bits_moves exp =
+ let r = bits_moves exp in
+ assert (not (has_undefined r));
+ r
+
+type pattern_64 =
+ | PatBswap16_tagged (* bswap16 applied on int *)
+ | PatBswap32_boxed (* bswap32 applied on Int32 *)
+ | PatBswap64_boxed (* bswap64 applied on Int64 *)
+ | PatConstant (* constant value *)
+ | PatNone (* no pattern found *)
+
+let check_pattern_64' = function
+ | [| Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _; |] ->
+ PatConstant
+
+ | [| Var (24, false); Var (25, false); Var (26, false); Var (27, false); Var (28, false); Var (29, false); Var (30, false); Var (31, false);
+ Var (16, false); Var (17, false); Var (18, false); Var (19, false); Var (20, false); Var (21, false); Var (22, false); Var (23, false);
+ Var (8, false); Var (9, false); Var (10, false); Var (11, false); Var (12, false); Var (13, false); Var (14, false); Var (15, false);
+ Var (0, false); Var (1, false); Var (2, false); Var (3, false); Var (4, false); Var (5, false); Var (6, false); Var (7, false);
+ Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false);
+ Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false);
+ Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false);
+ Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); Var (7, false); |] ->
+ PatBswap32_boxed
+
+ | [| Var (56,false); Var (57,false); Var (58,false); Var (59,false); Var (60,false); Var (61,false); Var (62,false); Var (63,false);
+ Var (48,false); Var (49,false); Var (50,false); Var (51,false); Var (52,false); Var (53,false); Var (54,false); Var (55,false);
+ Var (40,false); Var (41,false); Var (42,false); Var (43,false); Var (44,false); Var (45,false); Var (46,false); Var (47,false);
+ Var (32,false); Var (33,false); Var (34,false); Var (35,false); Var (36,false); Var (37,false); Var (38,false); Var (39,false);
+ Var (24,false); Var (25,false); Var (26,false); Var (27,false); Var (28,false); Var (29,false); Var (30,false); Var (31,false);
+ Var (16,false); Var (17,false); Var (18,false); Var (19,false); Var (20,false); Var (21,false); Var (22,false); Var (23,false);
+ Var ( 8,false); Var ( 9,false); Var (10,false); Var (11,false); Var (12,false); Var (13,false); Var (14,false); Var (15,false);
+ Var ( 0,false); Var ( 1,false); Var ( 2,false); Var ( 3,false); Var ( 4,false); Var ( 5,false); Var ( 6,false); Var ( 7,false); |] ->
+ PatBswap64_boxed
+
+ | [| Set _ ; Var ( 9,false); Var (10,false); Var (11,false); Var (12,false); Var (13,false); Var (14,false); Var (15,false);
+ Var (16,false); Var ( 1,false); Var ( 2,false); Var ( 3,false); Var ( 4,false); Var ( 5,false); Var ( 6,false); Var ( 7,false);
+ Var ( 8,false); Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _;
+ Set _; Set _; Set _; Set _; Set _; Set _; Set _; Set _; |] ->
+ PatBswap16_tagged
+
+ | _ -> PatNone
+
+let check_pattern_64 v = check_pattern_64' v.bits
+
+let constant_of_moves moves =
+ let first_bit = Nativeint.shift_left 1n (Arch.size_int*8 - 1) in
+ Array.fold_left
+ (fun v bit ->
+ match bit with
+ | Set b ->
+ let v = Nativeint.shift_right_logical v 1 in
+ if b
+ then Nativeint.logor v first_bit
+ else v
+ | _ -> failwith "Boolsimplif.constant_of_moves: non constant moves")
+ 0n
+ moves.bits
diff --git a/asmcomp/boolsimplif.mli b/asmcomp/boolsimplif.mli
new file mode 100644
index 0000000..8c5c55a
--- /dev/null
+++ b/asmcomp/boolsimplif.mli
@@ -0,0 +1,30 @@
+open Cmm
+
+(** Represents a value {v} computed from the value returned by the expression {base}
+ and applying the bit manipulation represented by {bits} *)
+type moves = {
+ base : expression; (** the original value *)
+ bits : bit_state array; (** correspondance between each bits {v} and {base} *)
+}
+
+and bit_state =
+ | Set of bool (** fixed value *)
+ | Var of int * bool (** (position, negated)
+ If negated = false, this represents the value of the bit {position} of base.
+ if negated = true this is its negation *)
+ | Undefined (** value that can't be represented as a boolean or a bit position *)
+
+val bits_moves : Cmm.expression -> moves
+
+(** patterns on 64 bit architectures *)
+type pattern_64 =
+ | PatBswap16_tagged (* bswap16 applied on int *)
+ | PatBswap32_boxed (* bswap32 applied on Int32 *)
+ | PatBswap64_boxed (* bswap64 applied on Int64 *)
+ | PatConstant (* constant value *)
+ | PatNone (* no pattern found *)
+
+val check_pattern_64 : moves -> pattern_64
+
+(** fails if moves are not only constants *)
+val constant_of_moves : moves -> nativeint
diff --git a/asmcomp/cmmgen.ml b/asmcomp/cmmgen.ml
index 8a9d425..7759dfb 100644
--- a/asmcomp/cmmgen.ml
+++ b/asmcomp/cmmgen.ml
@@ -1376,7 +1376,9 @@ and transl_prim_2 p arg1 arg2 dbg =
ignore_low_bit_int(transl arg2)]);
Cconst_int 1])
| Plslint ->
- incr_int(lsl_int (decr_int(transl arg1)) (untag_int(transl arg2)))
+ incr_int(lsl_int
+ (Cop(Cand, [Cconst_natint (Nativeint.lognot 0b1n);
+ (transl arg1)])) (untag_int(transl arg2)))
| Plsrint ->
Cop(Cor, [Cop(Clsr, [transl arg1; untag_int(transl arg2)]);
Cconst_int 1])
--
1.7.10.4
amd64_selection.patch [^] (1,537 bytes) 2012-10-08 11:07 [Show Content] [Hide Content]diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
index 9c4464a..6fbb9bb 100644
--- a/asmcomp/amd64/selection.ml
+++ b/asmcomp/amd64/selection.ml
@@ -88,7 +88,8 @@ let pseudoregs_for_operation op arg res =
([|res.(0); arg.(1)|], res)
(* One-address unary operations: arg.(0) and res.(0) must be the same *)
| Iintop_imm((Iadd|Isub|Imul|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _)
- | Iabsf | Inegf ->
+ | Iabsf | Inegf
+ | Ispecific(Ibswap _) ->
(res, res)
| Ispecific(Ifloatarithmem(_,_)) ->
let arg' = Array.copy arg in
@@ -155,6 +156,25 @@ method! select_store addr exp =
method! select_operation op args =
match op with
+ | Caddi | Csubi | Cxor | Cor | Cand | Casr | Clsl | Clsr ->
+ let moves = Boolsimplif.bits_moves (Cop(op, args)) in
+ begin
+ match Boolsimplif.check_pattern_64 moves with
+ | Boolsimplif.PatNone ->
+ self#select_operation' op args
+ | Boolsimplif.PatBswap16_tagged ->
+ self#select_operation' op args
+ | Boolsimplif.PatBswap32_boxed ->
+ (Ispecific(Ibswap 32), [moves.Boolsimplif.base])
+ | Boolsimplif.PatBswap64_boxed ->
+ (Ispecific(Ibswap 64), [moves.Boolsimplif.base])
+ | Boolsimplif.PatConstant ->
+ (Iconst_int (Boolsimplif.constant_of_moves moves), [])
+ end
+ | _ -> self#select_operation' op args
+
+method select_operation' op args =
+ match op with
(* Recognize the LEA instruction *)
Caddi | Cadda | Csubi | Csuba ->
begin match self#select_addressing Word (Cop(op, args)) with
0001-Add-bswap-primitives.patch [^] (17,038 bytes) 2012-11-09 20:37 [Show Content] [Hide Content]From f5b2f6ee228d3253dbbc85101f0d74a70e3e73a9 Mon Sep 17 00:00:00 2001
From: Pierre Chambart <pierre.chambart@ocamlpro.org>
Date: Fri, 9 Nov 2012 20:35:38 +0100
Subject: [PATCH] Add %bswap primitives
---
asmcomp/amd64/arch.ml | 4 +++
asmcomp/amd64/emit.mlp | 12 +++++++
asmcomp/amd64/emit_nt.mlp | 12 +++++++
asmcomp/amd64/selection.ml | 27 ++++++++++++++-
asmcomp/closure.ml | 3 ++
asmcomp/cmmgen.ml | 13 +++++++
bytecomp/bytegen.ml | 2 ++
bytecomp/lambda.ml | 3 ++
bytecomp/lambda.mli | 3 ++
bytecomp/printlambda.ml | 2 ++
bytecomp/translcore.ml | 4 +++
byterun/int64_emul.h | 14 ++++++++
byterun/int64_native.h | 9 +++++
byterun/ints.c | 65 +++++++++++++++++++++++++++++++++++
testsuite/tests/prim-bswap/Makefile | 2 ++
testsuite/tests/prim-bswap/bswap.ml | 44 ++++++++++++++++++++++++
16 files changed, 218 insertions(+), 1 deletion(-)
create mode 100644 testsuite/tests/prim-bswap/Makefile
create mode 100644 testsuite/tests/prim-bswap/bswap.ml
create mode 100644 testsuite/tests/prim-bswap/bswap.reference
diff --git a/asmcomp/amd64/arch.ml b/asmcomp/amd64/arch.ml
index 43d513a..1c657a5 100644
--- a/asmcomp/amd64/arch.ml
+++ b/asmcomp/amd64/arch.ml
@@ -38,6 +38,8 @@ type specific_operation =
| Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
| Ifloatarithmem of float_operation * addressing_mode
(* Float arith operation with memory *)
+ | Ibswap of int (* endiannes conversion *)
+
and float_operation =
Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv
@@ -113,3 +115,5 @@ let print_specific_operation printreg op ppf arg =
fprintf ppf "%a %s float64[%a]" printreg arg.(0) (op_name op)
(print_addressing printreg addr)
(Array.sub arg 1 (Array.length arg - 1))
+ | Ibswap i ->
+ fprintf ppf "bswap_%i %a" i printreg arg.(0)
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp
index 06f6aba..3f44d39 100644
--- a/asmcomp/amd64/emit.mlp
+++ b/asmcomp/amd64/emit.mlp
@@ -543,6 +543,18 @@ let emit_instr fallthrough i =
` addq ${emit_int n}, {emit_addressing addr i.arg 0}\n`
| Lop(Ispecific(Ifloatarithmem(op, addr))) ->
` {emit_string(instr_for_floatarithmem op)} {emit_addressing addr i.arg 1}, {emit_reg i.res.(0)}\n`
+ | Lop(Ispecific(Ibswap size)) ->
+ begin match size with
+ | 16 ->
+ ` xchg %ah, %al\n`;
+ ` movzwq {emit_reg16 i.res.(0)}, {emit_reg i.res.(0)}\n`
+ | 32 ->
+ ` bswap {emit_reg32 i.res.(0)}\n`;
+ ` movslq {emit_reg32 i.res.(0)}, {emit_reg i.res.(0)}\n`
+ | 64 ->
+ ` bswap {emit_reg i.res.(0)}\n`
+ | _ -> assert false
+ end
| Lreloadretaddr ->
()
| Lreturn ->
diff --git a/asmcomp/amd64/emit_nt.mlp b/asmcomp/amd64/emit_nt.mlp
index bc1750c..54aa755 100644
--- a/asmcomp/amd64/emit_nt.mlp
+++ b/asmcomp/amd64/emit_nt.mlp
@@ -537,6 +537,18 @@ let emit_instr fallthrough i =
` add QWORD PTR {emit_addressing addr i.arg 0}, {emit_int n}\n`
| Lop(Ispecific(Ifloatarithmem(op, addr))) ->
` {emit_string(instr_for_floatarithmem op)} {emit_reg i.res.(0)}, REAL8 PTR {emit_addressing addr i.arg 1}\n`
+ | Lop(Ispecific(Ibswap size)) ->
+ begin match size with
+ | 16 ->
+ ` xchg %ah, %al\n`;
+ ` movzwq {emit_reg i.res.(0)}, {emit_reg16 i.res.(0)}\n`
+ | 32 ->
+ ` bswap {emit_reg32 i.res.(0)}\n`;
+ ` movslq {emit_reg i.res.(0)}, {emit_reg32 i.res.(0)}\n`
+ | 64 ->
+ ` bswap {emit_reg i.res.(0)}\n`
+ | _ -> assert false
+ end
| Lreloadretaddr ->
()
| Lreturn ->
diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
index cc07133..d57017d 100644
--- a/asmcomp/amd64/selection.ml
+++ b/asmcomp/amd64/selection.ml
@@ -84,8 +84,13 @@ let pseudoregs_for_operation op arg res =
([|res.(0); arg.(1)|], res)
(* One-address unary operations: arg.(0) and res.(0) must be the same *)
| Iintop_imm((Iadd|Isub|Imul|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _)
- | Iabsf | Inegf ->
+ | Iabsf | Inegf
+ | Ispecific(Ibswap (32|64)) ->
(res, res)
+ (* For xchg, args must be a register allowing access to high 8 bit register
+ (rax, rbx, rcx or rdx). Keep it simple, just force the argument in rax. *)
+ | Ispecific(Ibswap 16) ->
+ ([| rax |], [| rax |])
| Ispecific(Ifloatarithmem(_,_)) ->
let arg' = Array.copy arg in
arg'.(0) <- res.(0);
@@ -107,6 +112,10 @@ let pseudoregs_for_operation op arg res =
(* Other instructions are regular *)
| _ -> raise Use_default
+let inline_ops =
+ [ "sqrt"; "caml_bswap16_direct"; "caml_int32_direct_bswap";
+ "caml_int64_direct_bswap"; "caml_nativeint_direct_bswap" ]
+
(* The selector class *)
class selector = object (self)
@@ -117,6 +126,15 @@ method is_immediate n = n <= 0x7FFFFFFF && n >= -0x80000000
method is_immediate_natint n = n <= 0x7FFFFFFFn && n >= -0x80000000n
+method! is_simple_expr e =
+ match e with
+ | Cop(Cextcall(fn, _, _, _), args)
+ when List.mem fn inline_ops ->
+ (* inlined ops are simple if their arguments are *)
+ List.for_all self#is_simple_expr args
+ | _ ->
+ super#is_simple_expr e
+
method select_addressing chunk exp =
let (a, d) = select_addr exp in
(* PR#4625: displacement must be a signed 32-bit immediate *)
@@ -192,6 +210,13 @@ method! select_operation op args =
| _ ->
super#select_operation op args
end
+ | Cextcall("caml_bswap16_direct", _, _, _) ->
+ (Ispecific (Ibswap 16), args)
+ | Cextcall("caml_int32_direct_bswap", _, _, _) ->
+ (Ispecific (Ibswap 32), args)
+ | Cextcall("caml_int64_direct_bswap", _, _, _)
+ | Cextcall("caml_nativeint_direct_bswap", _, _, _) ->
+ (Ispecific (Ibswap 64), args)
| _ -> super#select_operation op args
(* Recognize float arithmetic with mem *)
diff --git a/asmcomp/closure.ml b/asmcomp/closure.ml
index 1bca455..a151319 100644
--- a/asmcomp/closure.ml
+++ b/asmcomp/closure.ml
@@ -197,6 +197,9 @@ let simplif_prim_pure p (args, approxs) dbg =
begin match p with
Pidentity -> make_const_int x
| Pnegint -> make_const_int (-x)
+ | Pbswap16 ->
+ make_const_int (((x land 0xff) lsl 8) lor
+ ((x land 0xff00) lsr 8))
| Poffsetint y -> make_const_int (x + y)
| _ -> (Uprim(p, args, dbg), Value_unknown)
end
diff --git a/asmcomp/cmmgen.ml b/asmcomp/cmmgen.ml
index 2b9f0c9..13ae8da 100644
--- a/asmcomp/cmmgen.ml
+++ b/asmcomp/cmmgen.ml
@@ -844,6 +844,7 @@ let simplif_primitive_32bits = function
| Pstring_set_64(_) -> Pccall (default_prim "caml_string_set64")
| Pbigstring_load_64(_) -> Pccall (default_prim "caml_ba_uint8_get64")
| Pbigstring_set_64(_) -> Pccall (default_prim "caml_ba_uint8_set64")
+ | Pbbswap Pint64 -> Pccall (default_prim "caml_int64_bswap")
| p -> p
let simplif_primitive p =
@@ -960,6 +961,7 @@ let is_unboxed_number = function
| Pstring_load_64(_) -> Boxed_integer Pint64
| Pbigstring_load_32(_) -> Boxed_integer Pint32
| Pbigstring_load_64(_) -> Boxed_integer Pint64
+ | Pbbswap bi -> Boxed_integer bi
| _ -> No_unboxing
end
| _ -> No_unboxing
@@ -1333,6 +1335,17 @@ and transl_prim_1 p arg dbg =
box_int bi2 (transl_unbox_int bi1 arg)
| Pnegbint bi ->
box_int bi (Cop(Csubi, [Cconst_int 0; transl_unbox_int bi arg]))
+ | Pbbswap bi ->
+ let prim = match bi with
+ | Pnativeint -> "nativeint"
+ | Pint32 -> "int32"
+ | Pint64 -> "int64" in
+ box_int bi (Cop(Cextcall(Printf.sprintf "caml_%s_direct_bswap" prim,
+ typ_int, false, Debuginfo.none),
+ [transl_unbox_int bi arg]))
+ | Pbswap16 ->
+ tag_int (Cop(Cextcall("caml_bswap16_direct", typ_int, false, Debuginfo.none),
+ [untag_int (transl arg)]))
| _ ->
fatal_error "Cmmgen.transl_prim_1"
diff --git a/bytecomp/bytegen.ml b/bytecomp/bytegen.ml
index e4fae30..e933df5 100644
--- a/bytecomp/bytegen.ml
+++ b/bytecomp/bytegen.ml
@@ -395,6 +395,8 @@ let comp_primitive p args =
| Pbigstring_set_16(_) -> Kccall("caml_ba_uint8_set16", 3)
| Pbigstring_set_32(_) -> Kccall("caml_ba_uint8_set32", 3)
| Pbigstring_set_64(_) -> Kccall("caml_ba_uint8_set64", 3)
+ | Pbswap16 -> Kccall("caml_bswap16", 1)
+ | Pbbswap(bi) -> comp_bint_primitive bi "bswap" args
| _ -> fatal_error "Bytegen.comp_primitive"
let is_immed n = immed_min <= n && n <= immed_max
diff --git a/bytecomp/lambda.ml b/bytecomp/lambda.ml
index 6a847c8..cfced85 100644
--- a/bytecomp/lambda.ml
+++ b/bytecomp/lambda.ml
@@ -110,6 +110,9 @@ type primitive =
| Pbigstring_set_64 of bool
(* Compile time constants *)
| Pctconst of compile_time_constant
+ (* byte swap *)
+ | Pbswap16
+ | Pbbswap of boxed_integer
and comparison =
Ceq | Cneq | Clt | Cgt | Cle | Cge
diff --git a/bytecomp/lambda.mli b/bytecomp/lambda.mli
index d2e3761..17da073 100644
--- a/bytecomp/lambda.mli
+++ b/bytecomp/lambda.mli
@@ -110,6 +110,9 @@ type primitive =
| Pbigstring_set_64 of bool
(* Compile time constants *)
| Pctconst of compile_time_constant
+ (* byte swap *)
+ | Pbswap16
+ | Pbbswap of boxed_integer
and comparison =
Ceq | Cneq | Clt | Cgt | Cle | Cge
diff --git a/bytecomp/printlambda.ml b/bytecomp/printlambda.ml
index e570cf1..6531670 100644
--- a/bytecomp/printlambda.ml
+++ b/bytecomp/printlambda.ml
@@ -227,6 +227,8 @@ let primitive ppf = function
| Pbigstring_set_64(unsafe) ->
if unsafe then fprintf ppf "bigarray.array1.unsafe_set64"
else fprintf ppf "bigarray.array1.set64"
+ | Pbswap16 -> fprintf ppf "bswap16"
+ | Pbbswap(bi) -> print_boxed_integer "bswap" ppf bi
let rec lam ppf = function
| Lvar id ->
diff --git a/bytecomp/translcore.ml b/bytecomp/translcore.ml
index 256395b..4e8de1b 100644
--- a/bytecomp/translcore.ml
+++ b/bytecomp/translcore.ml
@@ -305,6 +305,10 @@ let primitives_table = create_hashtable 57 [
"%caml_bigstring_set32u", Pbigstring_set_32(true);
"%caml_bigstring_set64", Pbigstring_set_64(false);
"%caml_bigstring_set64u", Pbigstring_set_64(true);
+ "%bswap16", Pbswap16;
+ "%bswap_int32", Pbbswap(Pint32);
+ "%bswap_int64", Pbbswap(Pint64);
+ "%bswap_native", Pbbswap(Pnativeint);
]
let prim_makearray =
diff --git a/byterun/int64_emul.h b/byterun/int64_emul.h
index e9f5d85..ba7904a 100644
--- a/byterun/int64_emul.h
+++ b/byterun/int64_emul.h
@@ -270,4 +270,18 @@ static int64 I64_of_double(double f)
return res;
}
+static int64 I64_bswap(int64 x)
+{
+ int64 res;
+ res.h = (((x.l & 0x000000FF) << 24) |
+ ((x.l & 0x0000FF00) << 8) |
+ ((x.l & 0x00FF0000) >> 8) |
+ ((x.l & 0xFF000000) >> 24));
+ res.l = (((x.h & 0x000000FF) << 24) |
+ ((x.h & 0x0000FF00) << 8) |
+ ((x.h & 0x00FF0000) >> 8) |
+ ((x.h & 0xFF000000) >> 24));
+ return res;
+}
+
#endif /* CAML_INT64_EMUL_H */
diff --git a/byterun/int64_native.h b/byterun/int64_native.h
index 5e84dda..09b5d65 100644
--- a/byterun/int64_native.h
+++ b/byterun/int64_native.h
@@ -49,4 +49,13 @@
#define I64_to_double(x) ((double)(x))
#define I64_of_double(x) ((int64)(x))
+#define I64_bswap(x) ((((x) & 0x00000000000000FF) << 56) | \
+ (((x) & 0x000000000000FF00) << 40) | \
+ (((x) & 0x0000000000FF0000) << 24) | \
+ (((x) & 0x00000000FF000000) << 8) | \
+ (((x) & 0x000000FF00000000) >> 8) | \
+ (((x) & 0x0000FF0000000000) >> 24) | \
+ (((x) & 0x00FF000000000000) >> 40) | \
+ (((x) & 0xFF00000000000000) >> 56))
+
#endif /* CAML_INT64_NATIVE_H */
diff --git a/byterun/ints.c b/byterun/ints.c
index f8491b1..ffeac81 100644
--- a/byterun/ints.c
+++ b/byterun/ints.c
@@ -114,6 +114,19 @@ intnat caml_safe_mod(intnat p, intnat q)
}
#endif
+CAMLprim value caml_bswap16_direct(value x)
+{
+ return ((((x & 0x00FF) << 8) |
+ ((x & 0xFF00) >> 8)));
+}
+
+CAMLprim value caml_bswap16(value v)
+{
+ intnat x = Int_val(v);
+ return (Val_int ((((x & 0x00FF) << 8) |
+ ((x & 0xFF00) >> 8))));
+}
+
/* Tagged integers */
CAMLprim value caml_int_compare(value v1, value v2)
@@ -296,6 +309,20 @@ CAMLprim value caml_int32_shift_right(value v1, value v2)
CAMLprim value caml_int32_shift_right_unsigned(value v1, value v2)
{ return caml_copy_int32((uint32)Int32_val(v1) >> Int_val(v2)); }
+static int32 swap32(int32 x)
+{
+ return (((x & 0x000000FF) << 24) |
+ ((x & 0x0000FF00) << 8) |
+ ((x & 0x00FF0000) >> 8) |
+ ((x & 0xFF000000) >> 24));
+}
+
+CAMLprim value caml_int32_direct_bswap(value v)
+{ return swap32(v); }
+
+CAMLprim value caml_int32_bswap(value v)
+{ return caml_copy_int32(swap32(Int32_val(v))); }
+
CAMLprim value caml_int32_of_int(value v)
{ return caml_copy_int32(Long_val(v)); }
@@ -486,6 +513,26 @@ CAMLprim value caml_int64_shift_right(value v1, value v2)
CAMLprim value caml_int64_shift_right_unsigned(value v1, value v2)
{ return caml_copy_int64(I64_lsr(Int64_val(v1), Int_val(v2))); }
+#ifdef ARCH_SIXTYFOUR
+static value swap64(value x)
+{
+ return (((((x) & 0x00000000000000FF) << 56) |
+ (((x) & 0x000000000000FF00) << 40) |
+ (((x) & 0x0000000000FF0000) << 24) |
+ (((x) & 0x00000000FF000000) << 8) |
+ (((x) & 0x000000FF00000000) >> 8) |
+ (((x) & 0x0000FF0000000000) >> 24) |
+ (((x) & 0x00FF000000000000) >> 40) |
+ (((x) & 0xFF00000000000000) >> 56)));
+}
+
+CAMLprim value caml_int64_direct_bswap(value v)
+{ return swap64(v); }
+#endif
+
+CAMLprim value caml_int64_bswap(value v)
+{ return caml_copy_int64(I64_bswap(Int64_val(v))); }
+
CAMLprim value caml_int64_of_int(value v)
{ return caml_copy_int64(I64_of_intnat(Long_val(v))); }
@@ -738,6 +785,24 @@ CAMLprim value caml_nativeint_shift_right(value v1, value v2)
CAMLprim value caml_nativeint_shift_right_unsigned(value v1, value v2)
{ return caml_copy_nativeint((uintnat)Nativeint_val(v1) >> Int_val(v2)); }
+CAMLprim value caml_nativeint_direct_bswap(value v)
+{
+#ifdef ARCH_SIXTYFOUR
+ return swap64(v);
+#else
+ return swap32(v);
+#endif
+}
+
+CAMLprim value caml_nativeint_bswap(value v)
+{
+#ifdef ARCH_SIXTYFOUR
+ return caml_copy_nativeint(swap64(Nativeint_val(v)));
+#else
+ return caml_copy_nativeint(swap32(Nativeint_val(v)));
+#endif
+}
+
CAMLprim value caml_nativeint_of_int(value v)
{ return caml_copy_nativeint(Long_val(v)); }
diff --git a/testsuite/tests/prim-bswap/Makefile b/testsuite/tests/prim-bswap/Makefile
new file mode 100644
index 0000000..791076d
--- /dev/null
+++ b/testsuite/tests/prim-bswap/Makefile
@@ -0,0 +1,2 @@
+include $(BASEDIR)/makefiles/Makefile.several
+include $(BASEDIR)/makefiles/Makefile.common
diff --git a/testsuite/tests/prim-bswap/bswap.ml b/testsuite/tests/prim-bswap/bswap.ml
new file mode 100644
index 0000000..ac127fe
--- /dev/null
+++ b/testsuite/tests/prim-bswap/bswap.ml
@@ -0,0 +1,44 @@
+
+external bswap16 : int -> int = "%bswap16"
+external bswap32 : int32 -> int32 = "%bswap_int32"
+external bswap64 : int64 -> int64 = "%bswap_int64"
+external bswapnative : nativeint -> nativeint = "%bswap_native"
+
+let s = " "
+let () =
+ s.[0] <- Char.chr 0x12;
+ s.[1] <- Char.chr 0x34;
+ s.[2] <- Char.chr 0xFF
+let v = (* 0x1234 not inlined *)
+ (Char.code s.[0] lsl 8) lor (Char.code s.[1])
+let v' = (* 0x34FF not inlined *)
+ (Char.code s.[1] lsl 8) lor (Char.code s.[2])
+
+let () =
+ (* check constant propagation *)
+ assert( bswap16 0x1234 = 0x3412 );
+ assert( bswap16 0xaabb1234 = 0x3412 );
+ assert( bswap16 0xaabb12FF = 0xFF12 );
+ assert( bswap16 (-0x1234) = 0xcced );
+ (* check effective operation *)
+ assert( bswap16 v = 0x3412 );
+ assert( bswap16 v' = 0xFF34 );
+ assert( bswap16 (0xaabb0000 lor v) = 0x3412 );
+ assert( bswap32 0x12345678l = 0x78563412l );
+ assert( bswap32 0x123456FFl = 0xFF563412l );
+ assert( bswap64 0x1234567890abcdefL = 0xefcdab9078563412L )
+
+let n1 = 0x12345678n
+let n2 = 0x90abcdefn
+let n = Nativeint.logor (Nativeint.shift_left n1 32) n2
+
+let n1' = Nativeint.of_int32 (bswap32 (Nativeint.to_int32 n1))
+let n2' = Nativeint.of_int32 (bswap32 (Nativeint.to_int32 n2))
+let n' = Nativeint.logor (Nativeint.shift_left n2' 32) n1'
+
+let () =
+ if Sys.word_size = 4
+ then
+ assert ( bswapnative n = n2' )
+ else
+ assert ( bswapnative n = n' )
diff --git a/testsuite/tests/prim-bswap/bswap.reference b/testsuite/tests/prim-bswap/bswap.reference
new file mode 100644
index 0000000..e69de29
--
1.7.10.4
|