Version française
Home     About     Download     Resources     Contact us    
Browse thread
Hashtbl.create 401
[ Home ] [ Index: by date | by threads ]
[ Search: ]

[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
Date: -- (:)
From: sejourne_kevin <sejourne_kevin@y...>
Subject: Re: [Caml-list] Hashtbl.create 401
padiolea@irisa.fr a écrit :
>>On Thu, 21 Apr 2005 21:09:18 -0600
>>Matt Gushee <mgushee@havenrock.com> wrote:
>>
>>
>>>Hello, all--
>>>
>>>While browsing through the Labltk source code (and perhaps in some other
>>>places), I noticed several instances of:
>>>
>>>   Hashtbl.create 401
>>>
>>>Why 401?
>>
>>Hash tables distribute data across a set of hash buckets. If
>>the number of hash buckets is prime (401 is prime) then their
>>is a better chance of the data being evenly distributed across
>>the buckets.
> 
> 
> but it seems that this number is not that much important since,
> as said in the ml source:
> (* We do dynamic hashing, and resize the table and rehash the elements
>    when buckets become too long. *)
If you want to have a HashTbl with only a prime number of hash buckets,
you can do things like that (with code stolen from the stdlib):

let table = (* list of (next_primes 2n+1) of root 3 *)
[| 3; 11; 29; 61; 127; 257; 521; 1049; 2111; 4229; 8461; 16927; 33857;
67723; 135449; 270913; 541831; 1083689; 2167393;(*max 32 bits*) 4334791;
8669593; 17339197; 34678421; 69356857; 138713717; 277427441; 554854889
|]
;;
type ('a, 'b) t =
  {
    mutable size: int;                        (* number of elements *)
    mutable index: int;                       (* index of the next array
size*)
    mutable data: ('a, 'b) bucketlist array   (* the buckets *)
  }

and ...
let create initial_size =
  let i_m, _ =
      fold_lefti (fun i ((_,v_m) as u) x ->
		    let v = abs (x - initial_size) in
		      if v < v_m  then (i,v)  else u
		 ) (0,max_int) table
  in
  let s = min (max 1 table.(i_m)) Sys.max_array_length
  in { size = 0; index = i_m; data = Array.make s Empty }


let resize hashfun tbl =
  let _ = tbl.index <- succ tbl.index in
  let osize = Array.length tbl.data in
  let nsize = try min (table.(tbl.index)) Sys.max_array_length with |_->
osize
  in
    if nsize <> osize then
      begin
	let ndata = Array.create nsize Empty in
	let rec insert_bucket = function
            Empty -> ()
	  | Cons(key, data, rest) ->
              insert_bucket rest; (* preserve original order of elements *)
              let nidx = (hashfun key) mod nsize in
		ndata.(nidx) <- Cons(key, data, ndata.(nidx)) in
	  for i = 0 to osize - 1 do
	    insert_bucket tbl.data.(i)
	  done;
	  tbl.data <- ndata
      end

let copy h =
  { size = h.size;
    index = h.index;
    data = Array.copy h.data }

....