Browse thread
[Caml-list] How to read three integers from a text-file... ?
[
Home
]
[ Index:
by date
|
by threads
]
[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
[ Message by date: previous | next ] [ Message in thread: previous | next ] [ Thread: previous | next ]
| Date: | -- (:) |
| From: | Lauri Alanko <la@i...> |
| Subject: | [Caml-list] input_line (Re: pervasives) |
On Sat, Apr 27, 2002 at 02:43:26PM +1000, John Max Skaller wrote:
> I have a philosophy .. a bit extreme perhaps .. I NEVER read anything
> other than lines (or whole files).
Vaguely related to this, I have some minor gripes about input_line.
Here's the implementation in 3.04:
let rec input_line chan =
let n = input_scan_line chan in
if n = 0 then (* n = 0: we are at EOF *)
raise End_of_file
else if n > 0 then begin (* n > 0: newline found in buffer *)
let res = string_create (n-1) in
ignore (unsafe_input chan res 0 (n-1));
ignore (input_char chan); (* skip the newline *)
res
end else begin (* n < 0: newline not found *)
let beg = string_create (-n) in
ignore(unsafe_input chan beg 0 (-n));
try
beg ^ input_line chan
with End_of_file ->
beg
end
It's obvious that this doesn't handle obnoxiously large newlineless
inputs very gracefully. Its complexity is quadratic and it's not tail
recursive. And worst of all, there are no limits on the size of string
to be created. So a maliciously designed huge input could blow either
the stack or the heap. I wouldn't want to use input_line in a network
application.
(All right, on 32-bit architectures input_line will terminate at ~16M
when string_create fails, but I wouldn't call that a solution.)
So here's an alternative implementation. It's tail recursive, its
amortized cpu usage is linear, and the space usage can be bounded:
exception Buffer_overflow of string
let expand_buf buf old_size expand =
if old_size == 0 then
string_create expand
else
let new_buf = string_create (old_size + expand) in
string_blit buf 0 new_buf 0 old_size;
new_buf
let rec input_bounded_line_to_buf chan buf offset maxlen =
let n = input_scan_line chan in
if n > maxlen + 1 || n < (-maxlen) then
let err_buf = expand_buf buf offset maxlen in
ignore (unsafe_input chan err_buf offset maxlen);
raise (Buffer_overflow err_buf)
else if n > 0 then
let ret_buf = expand_buf buf offset (n - 1) in
ignore (unsafe_input chan ret_buf offset (n - 1));
ignore (input_char chan);
ret_buf
else if n < 0 then
let m = (-n) in
let new_offset = offset + m in
let old_len = string_length buf in
let new_buf =
if new_offset > old_len then
expand_buf buf offset (new_offset + old_len * 2)
else
buf
in
ignore (unsafe_input chan new_buf offset m);
input_bounded_line_to_buf chan new_buf new_offset (maxlen - m)
else if offset = 0 then
raise End_of_file
else
expand_buf buf offset 0
let input_bounded_line chan maxlen =
input_bounded_line_to_buf chan "" 0 maxlen
let input_line chan = input_bounded_line chan max_int
I hope that something similar to this could be included in Pervasives in
the future.
Lauri Alanko
la@iki.fi
-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners