Version française
Home     About     Download     Resources     Contact us    
Browse thread
AW: [Caml-list] generic Hashtbl.to_array
[ Home ] [ Index: by date | by threads ]
[ Search: ]

[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
Date: -- (:)
From: Christoph Bauer <christoph.bauer@l...>
Subject: AW: [Caml-list] generic Hashtbl.to_array
Hi,

> 
> You could also try inverting the Hashtbl fold into an 
> iterator+closure and pass the closure into the Array.init 
> function, but I'm not sure how complicated/efficient that would be.

Something like:

let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a


> 
> I suppose it just depends on how efficient you need it to be. 
> If it's just some simple stuff, I'd just use the intermediary list.


benchmarking shows, that all three approaches are similar
with respect to efficiency.

Regards,
Christoph Bauer

Benchmark:

Throughputs for to_array_1, to_array_2, to_array_3, each running 5 times for
at least 1 CPU seconds:
to_array_1:  1 WALL ( 1.14 usr +  0.00 sys =  1.14 CPU) @ 491.23/s (n=560)
             1 WALL ( 1.14 usr +  0.00 sys =  1.14 CPU) @ 491.23/s (n=560)
             2 WALL ( 1.14 usr +  0.00 sys =  1.14 CPU) @ 491.23/s (n=560)
             1 WALL ( 1.15 usr +  0.00 sys =  1.15 CPU) @ 486.96/s (n=560)
             1 WALL ( 1.15 usr +  0.00 sys =  1.15 CPU) @ 486.96/s (n=560)
to_array_2:  1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             2 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
to_array_3:  1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             2 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)

            Rate    to_array_2 to_array_3 to_array_1
to_array_2 482/s            --      [-0%]        -1%
to_array_3 482+-0/s       [0%]         --        -1%
to_array_1 490+-2/s         2%         2%         --

open Benchmark

let to_array_1 t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold
         (fun k v (a, i) ->
            if i = 0 then  
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 0)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))

let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a

let to_array_3 t =
  Array.of_list (Hashtbl.fold (fun a b c -> (a, b) :: c) t [])


let h () = 
  let h = Hashtbl.create 100000 in
    for i = 0 to (Hashtbl.length h) do
      Hashtbl.add h (Random.int max_int) (Random.int max_int);
    done;
    h
      
let main () =
  let h = h () in
  let res = throughputN ~repeat:5 1
    [("to_array_1", to_array_1, h);
     ("to_array_2", to_array_2, h);
     ("to_array_3", to_array_3, h); ] in
    tabulate res


let () =  main ()