Version française
Home     About     Download     Resources     Contact us    
Browse thread
[Caml-list] My function is too eager
[ Home ] [ Index: by date | by threads ]
[ Search: ]

[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
Date: -- (:)
From: Diego Olivier Fernandez Pons <Diego.FERNANDEZ_PONS@e...>
Subject: [Caml-list] My function is too eager
    Bonjour,

I have to compute statistics over all the subtrees of a tree keeping
generality and efficiency. The function I have written is clearly too
eager but I haven't been able to rewritte it in a proper lazy way
because the lazyness I need seems to be "transversal".

The problem is to count all subtrees having a property (P) which is
checked via a boolean function f : int -> int -> int -> int -> bool
where the first four parameters are the
- shortest path of the tree
- longest path of the tree
- size of the tree
- total path lengt of the tree

all computed incrementally in a bottom-up manner. Then, they have to
be sorted according to a parameter among these.

example :

avl_subtrees Shortest x;;
- : (int * int * int) list =
[(1, 50, 501); (2, 21, 251); (4, 13, 63); (5, 3, 31); (6, 1, 16);
(7, 0, 8); (8, 0, 4); (9, 1, 2)]

means that in the tree x there are 501 subtrees of shortest path equal
to 1 and 50 of them are avl trees, etc.


I wrote the following code. It just computes the data incrementally,
checks the boolean function and updates the counters (a bag -
insertion increments the counter if the element is already there).


type evaluation = Shortest | Longest | Size | Average

let extract_statistics = function stats -> DoubleBag.fold_right (fun k
v v' l -> (k, v, v') :: l) stats []

(* Core function strict version *)

let make_distribution_function = fun f statistic_mode ->
  let rec subtree bag = function
    | E -> (0, 0, 0, 0, bag)
    | N (l, v, r, _) ->
      let (l_shortest, l_longest, l_size, l_total, bag') =
          subtree bag l in
      let (r_shortest, r_longest, r_size, r_total, bag'') =
          subtree bag' r in
      let
          shortest = 1 + min l_shortest r_shortest and
          longest  = 1 + max l_longest r_longest and
          size     = 1 + l_size + r_size and
          total    = 1 + l_size + r_size + l_total + r_total
      in
      let parameter = match statistic_mode with
        | Shortest -> shortest
        | Longest -> longest
        | Size -> size
        | Average -> total / (size + 1)
      in
        (shortest, longest, size, total,
         if f l_shortest r_shortest
              l_longest  r_longest
              l_size     r_size
         then DoubleBag.insert_both parameter bag''
         else DoubleBag.insert_two parameter bag'')
      in
        subtree

let make_subtrees = fun f mode tree ->
  let (_, _, _, _, stats) =
    (make_distribution_function f mode) DoubleBag.empty tree
  in extract_statistics stats


(* boolean function examples *)

let avl_subtrees = fun mode tree ->
  make_subtrees (fun _ _ ll lr _ _ -> abs (ll - lr) <= 1) mode tree

let red_black_subtrees = fun mode tree ->
  make_subtrees (ls rs ll rl _ _ -> ls <= 2 * ll && rs <= 2 * rl) mode
tree

etc.

One can see that only part of the data will be really used
  fun _ _ ll lr _ _ -> abs (ll - lr) <= 1

therefor I made my function lazy applying a classical transformation

let
   shortest = lazy (1 + min (force l_shortest) (force r_shortest))
   longest = lazy (1 + max (force l_longest) (force l_longest))
   ...
in

but computing these lazily is not faster than their strict
counterpart. What I would need is to make lazy the complete
incremental computation "data by data" and not "level by level".

Does anyone have an idea of how I could achieve this ?

    Cordialement,

        Diego Olivier

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners