Browse thread
Hashtbl.create 401
-
Matt Gushee
- Radu Grigore
-
Erik de Castro Lopo
-
padiolea@i...
- sejourne_kevin
-
padiolea@i...
[
Home
]
[ Index:
by date
|
by threads
]
[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
| Date: | -- (:) |
| From: | sejourne_kevin <sejourne_kevin@y...> |
| Subject: | Re: [Caml-list] Hashtbl.create 401 |
padiolea@irisa.fr a écrit :
>>On Thu, 21 Apr 2005 21:09:18 -0600
>>Matt Gushee <mgushee@havenrock.com> wrote:
>>
>>
>>>Hello, all--
>>>
>>>While browsing through the Labltk source code (and perhaps in some other
>>>places), I noticed several instances of:
>>>
>>> Hashtbl.create 401
>>>
>>>Why 401?
>>
>>Hash tables distribute data across a set of hash buckets. If
>>the number of hash buckets is prime (401 is prime) then their
>>is a better chance of the data being evenly distributed across
>>the buckets.
>
>
> but it seems that this number is not that much important since,
> as said in the ml source:
> (* We do dynamic hashing, and resize the table and rehash the elements
> when buckets become too long. *)
If you want to have a HashTbl with only a prime number of hash buckets,
you can do things like that (with code stolen from the stdlib):
let table = (* list of (next_primes 2n+1) of root 3 *)
[| 3; 11; 29; 61; 127; 257; 521; 1049; 2111; 4229; 8461; 16927; 33857;
67723; 135449; 270913; 541831; 1083689; 2167393;(*max 32 bits*) 4334791;
8669593; 17339197; 34678421; 69356857; 138713717; 277427441; 554854889
|]
;;
type ('a, 'b) t =
{
mutable size: int; (* number of elements *)
mutable index: int; (* index of the next array
size*)
mutable data: ('a, 'b) bucketlist array (* the buckets *)
}
and ...
let create initial_size =
let i_m, _ =
fold_lefti (fun i ((_,v_m) as u) x ->
let v = abs (x - initial_size) in
if v < v_m then (i,v) else u
) (0,max_int) table
in
let s = min (max 1 table.(i_m)) Sys.max_array_length
in { size = 0; index = i_m; data = Array.make s Empty }
let resize hashfun tbl =
let _ = tbl.index <- succ tbl.index in
let osize = Array.length tbl.data in
let nsize = try min (table.(tbl.index)) Sys.max_array_length with |_->
osize
in
if nsize <> osize then
begin
let ndata = Array.create nsize Empty in
let rec insert_bucket = function
Empty -> ()
| Cons(key, data, rest) ->
insert_bucket rest; (* preserve original order of elements *)
let nidx = (hashfun key) mod nsize in
ndata.(nidx) <- Cons(key, data, ndata.(nidx)) in
for i = 0 to osize - 1 do
insert_bucket tbl.data.(i)
done;
tbl.data <- ndata
end
let copy h =
{ size = h.size;
index = h.index;
data = Array.copy h.data }
....