| Attached Files | 0001-Fix-a-count-concat-bug-for-Stream-i-l-cons-app-and-s.patch [^] (9,870 bytes) 2012-06-10 10:15 [Show Content] [Hide Content]From 2eec0b990bbf846c4db0540aecbe4ec4e404952b Mon Sep 17 00:00:00 2001
From: Gabriel Scherer <gabriel.scherer@inria.fr>
Date: Fri, 8 Jun 2012 19:58:15 +0200
Subject: [PATCH 1/2] Fix a "count/concat" bug for Stream ({i,l}{cons,app} and
slazy)
There is a bug in the way concatenating operations work when combined
with `Sgen`-defined stream (Stream.from, Stream.of_string): the
concatenation functions reset the `count` field to 0, which disturbs
the Sgen producer.
While the fix in the Scons case is easy (instead of 0, set
the count to `original_count - 1`), fixing the Sapp case is more
delicate (we can't predict the size of the prepended stream). Our
technique is to change the stored left-hand-side to not the stream
data only, but the whole stream, count included.
Once we detect the prepended stream was completely consumed, we can
then restore the count to its previous value, so that Sgen's function
can be provided correct count information. This required a change in
the internal `get_data` implementation.
Slazy-constructed streams have the exact same issue: we don't know
their count before forcin them. Again, `get_data` is changed to
dynamically update the count at forcing time.
---
stdlib/stream.ml | 91 ++++++++++++-------
testsuite/tests/lib-stream/Makefile | 4 +
testsuite/tests/lib-stream/count_concat_bug.ml | 57 ++++++++++++
.../tests/lib-stream/count_concat_bug.reference | 2 +
4 files changed, 120 insertions(+), 34 deletions(-)
create mode 100644 testsuite/tests/lib-stream/Makefile
create mode 100644 testsuite/tests/lib-stream/count_concat_bug.ml
create mode 100644 testsuite/tests/lib-stream/count_concat_bug.reference
diff --git a/stdlib/stream.ml b/stdlib/stream.ml
index fc66acb..55bf31d 100644
--- a/stdlib/stream.ml
+++ b/stdlib/stream.ml
@@ -21,8 +21,8 @@ type 'a t = { count : int; data : 'a data }
and 'a data =
Sempty
| Scons of 'a * 'a data
- | Sapp of 'a data * 'a data
- | Slazy of 'a data Lazy.t
+ | Sapp of 'a data * 'a t
+ | Slazy of 'a t Lazy.t
| Sgen of 'a gen
| Sbuffio of buffio
and 'a gen = { mutable curr : 'a option option; func : int -> 'a option }
@@ -42,26 +42,37 @@ let fill_buff b =
b.len <- input b.ic b.buff 0 (String.length b.buff); b.ind <- 0
;;
-let rec get_data count d = match d with
- (* Returns either Sempty or Scons(a, _) even when d is a generator
- or a buffer. In those cases, the item a is seen as extracted from
- the generator/buffer.
- The count parameter is used for calling `Sgen-functions'. *)
+let rec get_data s d = match d with
+ (* Only return a "forced stream", that is either Sempty or
+ Scons(a,_). If d is a generator or a buffer, the item a is seen as
+ extracted from the generator/buffer.
+
+ Forcing also updates the "count" field of the delayed stream,
+ in the Sapp and Slazy cases (see slazy/lapp implementation below). *)
Sempty | Scons (_, _) -> d
- | Sapp (d1, d2) ->
- begin match get_data count d1 with
- Scons (a, d11) -> Scons (a, Sapp (d11, d2))
- | Sempty -> get_data count d2
+ | Sapp (d1, s2) ->
+ begin match get_data s d1 with
+ Scons (a, d11) -> Scons (a, Sapp (d11, s2))
+ | Sempty ->
+ set_count s s2.count;
+ get_data s s2.data
| _ -> assert false
end
- | Sgen {curr = Some None; func = _ } -> Sempty
- | Sgen ({curr = Some(Some a); func = f} as g) ->
+ | Sgen {curr = Some None; _ } -> Sempty
+ | Sgen ({curr = Some(Some a); _ } as g) ->
g.curr <- None; Scons(a, d)
- | Sgen g ->
- begin match g.func count with
+ | Sgen ({curr = None; _} as g) ->
+ (* Warning: anyone using g thinks that an item has been read *)
+ begin match g.func s.count with
None -> g.curr <- Some(None); Sempty
- | Some a -> Scons(a, d)
- (* Warning: anyone using g thinks that an item has been read *)
+ | Some a ->
+ (* One must not update g.curr here, because there Scons(a,d)
+ result of get_data, if the outer stream s was a Sapp, will
+ be used to update the outer stream to Scons(a,s): there is
+ already a memoization process at the outer layer. If g.curr
+ was updated here, the saved element would be produced twice,
+ once by the outer layer, once by Sgen/g.curr. *)
+ Scons(a, d)
end
| Sbuffio b ->
if b.ind >= b.len then fill_buff b;
@@ -69,7 +80,10 @@ let rec get_data count d = match d with
let r = Obj.magic (String.unsafe_get b.buff b.ind) in
(* Warning: anyone using g thinks that an item has been read *)
b.ind <- succ b.ind; Scons(r, d)
- | Slazy f -> get_data count (Lazy.force f)
+ | Slazy f ->
+ let s2 = Lazy.force f in
+ set_count s s2.count;
+ get_data s s2.data
;;
let rec peek s =
@@ -78,14 +92,20 @@ let rec peek s =
Sempty -> None
| Scons (a, _) -> Some a
| Sapp (_, _) ->
- begin match get_data s.count s.data with
- Scons(a, _) as d -> set_data s d; Some a
+ begin match get_data s s.data with
+ | Scons(a, _) as d -> set_data s d; Some a
| Sempty -> None
| _ -> assert false
end
- | Slazy f -> set_data s (Lazy.force f); peek s
- | Sgen {curr = Some a} -> a
- | Sgen g -> let x = g.func s.count in g.curr <- Some x; x
+ | Slazy f ->
+ let s2 = Lazy.force f in
+ set_count s s2.count;
+ set_data s s2.data;
+ peek s
+ | Sgen {curr = Some a; _ } -> a
+ | Sgen ({curr = None; _ } as g) ->
+ let x = g.func s.count in
+ g.curr <- Some x; x
| Sbuffio b ->
if b.ind >= b.len then fill_buff b;
if b.len == 0 then begin set_data s Sempty; None end
@@ -157,18 +177,21 @@ let of_channel ic =
(* Stream expressions builders *)
-let iapp i s = {count = 0; data = Sapp (i.data, s.data)};;
-let icons i s = {count = 0; data = Scons (i, s.data)};;
-let ising i = {count = 0; data = Scons (i, Sempty)};;
+(* In the slazy and lapp case, we can't statically predict the value
+ of the "count" field. We put a dummy 0 value, which will be updated
+ when the parameter stream is forced (see update code in [get_data]
+ and [peek]). *)
-let lapp f s =
- {count = 0; data = Slazy (lazy(Sapp ((f ()).data, s.data)))}
-;;
-let lcons f s = {count = 0; data = Slazy (lazy(Scons (f (), s.data)))};;
-let lsing f = {count = 0; data = Slazy (lazy(Scons (f (), Sempty)))};;
+let ising i = {count = 0; data = Scons (i, Sempty)};;
+let icons i s = {count = s.count - 1; data = Scons (i, s.data)};;
+let iapp i s = {count = i.count; data = Sapp (i.data, s)};;
let sempty = {count = 0; data = Sempty};;
-let slazy f = {count = 0; data = Slazy (lazy(f ()).data)};;
+let slazy f = {count = 0; data = Slazy (lazy (f()))};;
+
+let lsing f = {count = 0; data = Slazy (lazy (ising (f())))};;
+let lcons f s = {count = 0; data = Slazy (lazy (icons (f()) s))};;
+let lapp f s = {count = 0; data = Slazy (lazy(iapp (f()) s))};;
(* For debugging use *)
@@ -188,11 +211,11 @@ and dump_data f =
print_string ", ";
dump_data f d;
print_string ")"
- | Sapp (d1, d2) ->
+ | Sapp (d1, s2) ->
print_string "Sapp (";
dump_data f d1;
print_string ", ";
- dump_data f d2;
+ dump f s2;
print_string ")"
| Slazy _ -> print_string "Slazy"
| Sgen _ -> print_string "Sgen"
diff --git a/testsuite/tests/lib-stream/Makefile b/testsuite/tests/lib-stream/Makefile
new file mode 100644
index 0000000..65ecf12
--- /dev/null
+++ b/testsuite/tests/lib-stream/Makefile
@@ -0,0 +1,4 @@
+BASEDIR=../..
+MODULES=testing
+include $(BASEDIR)/makefiles/Makefile.several
+include $(BASEDIR)/makefiles/Makefile.common
diff --git a/testsuite/tests/lib-stream/count_concat_bug.ml b/testsuite/tests/lib-stream/count_concat_bug.ml
new file mode 100644
index 0000000..97ec6bc
--- /dev/null
+++ b/testsuite/tests/lib-stream/count_concat_bug.ml
@@ -0,0 +1,57 @@
+let is_empty s =
+ try Stream.empty s; true with Stream.Failure -> false
+
+let test_icons =
+ let s = Stream.of_string "ab" in
+ let s = Stream.icons 'c' s in
+ Testing.test (Stream.next s = 'c');
+ Testing.test (Stream.next s = 'a');
+ Testing.test (Stream.next s = 'b');
+ Testing.test (is_empty s);
+ ()
+
+let test_lcons =
+ let s = Stream.of_string "ab" in
+ let s = Stream.lcons (fun () -> 'c') s in
+ Testing.test (Stream.next s = 'c');
+ Testing.test (Stream.next s = 'a');
+ Testing.test (Stream.next s = 'b');
+ Testing.test (is_empty s);
+ ()
+
+let test_iapp =
+ let s = Stream.of_string "ab" in
+ let s = Stream.iapp (Stream.of_list ['c']) s in
+ Testing.test (Stream.next s = 'c');
+ Testing.test (Stream.next s = 'a');
+ Testing.test (Stream.next s = 'b');
+ Testing.test (is_empty s);
+ ()
+
+let test_lapp_right =
+ let s1 = Stream.of_list ['c'] in
+ let s2 = Stream.of_string "ab" in
+ let s = Stream.lapp (fun () -> s1) s2 in
+ Testing.test (Stream.next s = 'c');
+ Testing.test (Stream.next s = 'a');
+ Testing.test (Stream.next s = 'b');
+ Testing.test (is_empty s);
+ ()
+
+let test_lapp_left =
+ let s1 = Stream.of_string "bc" in
+ let s2 = Stream.of_list ['a'] in
+ Testing.test (Stream.next s1 = 'b');
+ let s = Stream.lapp (fun () -> s1) s2 in
+ Testing.test (Stream.next s = 'c');
+ Testing.test (Stream.next s = 'a');
+ Testing.test (is_empty s);
+ ()
+
+let test_slazy =
+ let s = Stream.of_string "ab" in
+ Testing.test (Stream.next s = 'a');
+ let s = Stream.slazy (fun () -> s) in
+ Testing.test (Stream.next s = 'b');
+ Testing.test (is_empty s);
+ ()
diff --git a/testsuite/tests/lib-stream/count_concat_bug.reference b/testsuite/tests/lib-stream/count_concat_bug.reference
new file mode 100644
index 0000000..acdc75c
--- /dev/null
+++ b/testsuite/tests/lib-stream/count_concat_bug.reference
@@ -0,0 +1,2 @@
+0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
+All tests succeeded.
--
1.7.7.3
0002-simplify-stdlib-stream.ml-by-collapsing-two-memoizat.patch [^] (3,594 bytes) 2012-06-10 10:16 [Show Content] [Hide Content]From d7b98e57839ba8f8e3c6f0d1978f494503bac766 Mon Sep 17 00:00:00 2001
From: Gabriel Scherer <gabriel.scherer@inria.fr>
Date: Sat, 9 Jun 2012 12:45:48 +0200
Subject: [PATCH 2/2] simplify stdlib/stream.ml by collapsing two memoization
layers
There currently are two different memoization logics present in
stdlib/stream.ml:
- get_data/set_data memoizes the result of forcing Sapp nodes
- Sgen nodes have a specific "curr" mutable field
This commit simplifies the implemantation by removing the
Sgen-specific memoization and reusing the get_data/set_data logic
instead.
---
stdlib/stream.ml | 39 +++++++++++----------------------------
1 files changed, 11 insertions(+), 28 deletions(-)
diff --git a/stdlib/stream.ml b/stdlib/stream.ml
index 55bf31d..9e01b02 100644
--- a/stdlib/stream.ml
+++ b/stdlib/stream.ml
@@ -23,9 +23,8 @@ and 'a data =
| Scons of 'a * 'a data
| Sapp of 'a data * 'a t
| Slazy of 'a t Lazy.t
- | Sgen of 'a gen
+ | Sgen of (int -> 'a option)
| Sbuffio of buffio
-and 'a gen = { mutable curr : 'a option option; func : int -> 'a option }
and buffio =
{ ic : in_channel; buff : string; mutable len : int; mutable ind : int }
;;
@@ -58,21 +57,11 @@ let rec get_data s d = match d with
get_data s s2.data
| _ -> assert false
end
- | Sgen {curr = Some None; _ } -> Sempty
- | Sgen ({curr = Some(Some a); _ } as g) ->
- g.curr <- None; Scons(a, d)
- | Sgen ({curr = None; _} as g) ->
+ | Sgen gen ->
(* Warning: anyone using g thinks that an item has been read *)
- begin match g.func s.count with
- None -> g.curr <- Some(None); Sempty
- | Some a ->
- (* One must not update g.curr here, because there Scons(a,d)
- result of get_data, if the outer stream s was a Sapp, will
- be used to update the outer stream to Scons(a,s): there is
- already a memoization process at the outer layer. If g.curr
- was updated here, the saved element would be produced twice,
- once by the outer layer, once by Sgen/g.curr. *)
- Scons(a, d)
+ begin match gen s.count with
+ None -> Sempty
+ | Some a -> Scons(a, d)
end
| Sbuffio b ->
if b.ind >= b.len then fill_buff b;
@@ -91,21 +80,15 @@ let rec peek s =
match s.data with
Sempty -> None
| Scons (a, _) -> Some a
- | Sapp (_, _) ->
- begin match get_data s s.data with
- | Scons(a, _) as d -> set_data s d; Some a
- | Sempty -> None
- | _ -> assert false
- end
+ | Sapp (_, _) | Sgen _ ->
+ let d = get_data s s.data in
+ set_data s d;
+ peek s
| Slazy f ->
let s2 = Lazy.force f in
set_count s s2.count;
set_data s s2.data;
peek s
- | Sgen {curr = Some a; _ } -> a
- | Sgen ({curr = None; _ } as g) ->
- let x = g.func s.count in
- g.curr <- Some x; x
| Sbuffio b ->
if b.ind >= b.len then fill_buff b;
if b.len == 0 then begin set_data s Sempty; None end
@@ -115,7 +98,7 @@ let rec peek s =
let rec junk s =
match s.data with
Scons (_, d) -> set_count s (succ s.count); set_data s d
- | Sgen ({curr = Some _} as g) -> set_count s (succ s.count); g.curr <- None
+ | Sgen _ -> set_count s (succ s.count)
| Sbuffio b -> set_count s (succ s.count); b.ind <- succ b.ind
| _ ->
match peek s with
@@ -160,7 +143,7 @@ let iter f strm =
(* Stream building functions *)
-let from f = {count = 0; data = Sgen {curr = None; func = f}};;
+let from f = {count = 0; data = Sgen f};;
let of_list l =
{count = 0; data = List.fold_right (fun x l -> Scons (x, l)) l Sempty}
--
1.7.7.3
|