<?xml version="1.0" encoding="ISO-8859-1"?>

<!DOCTYPE message PUBLIC
  "-//MLarc//DTD MLarc output files//EN"
  "../../mlarc.dtd"[
  <!ATTLIST message
    listname CDATA #REQUIRED
    title CDATA #REQUIRED
  >
]>

  <?xml-stylesheet href="../../mlarc.xsl" type="text/xsl"?>


<message 
  url="2009/10/e28672ee7670d5ac0dd2226f1258f67f"
  from="Daniel_Bünzli &lt;daniel.buenzli@e...&gt;"
  author="Daniel_Bünzli"
  date="2009-10-28T02:22:23"
  subject="Re: [Caml-list] xpath or alternatives"
  prev="2009/10/a542f981ec63f1fb137050f0370eebab"
  next="2009/10/0dfed61557d5dd73dce96defba674f66"
  prev-in-thread="2009/09/9772fb8dda9a0ff9a4e709af8a74f01b"
  next-in-thread="2009/09/ee8d71ba703fb8804da576c3c1c5b441"
  prev-thread="2009/09/2cba67e23ddae5dfa0bfa57ddcd55d62"
  next-thread="2009/09/8454d13a99d9e759bda031e09bc9573c"
  root="../../"
  period="month"
  listname="caml-list"
  title="Archives of the Caml mailing list">

<thread subject="xpath or alternatives">
<msg 
  url="2009/09/37d47812ea59cfd681ac9d890e2f8370"
  from="Richard Jones &lt;rich@a...&gt;"
  author="Richard Jones"
  date="2009-09-28T12:17:46"
  subject="xpath or alternatives">
<msg 
  url="2009/09/94f6e44c494293473efdb95b288c071e"
  from="Yaron Minsky &lt;yminsky@g...&gt;"
  author="Yaron Minsky"
  date="2009-09-28T12:49:05"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/2cf490c413ae3290b3a90106269fbf79"
  from="Till Varoquaux &lt;till@p...&gt;"
  author="Till Varoquaux"
  date="2009-09-28T15:06:26"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/1db07fab684fbe08ff4bc8cfab159aa4"
  from="Mikkel_Fahnøe_Jørgensen &lt;mikkel@d...&gt;"
  author="Mikkel_Fahnøe_Jørgensen"
  date="2009-09-29T23:00:18"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/9772fb8dda9a0ff9a4e709af8a74f01b"
  from="Richard Jones &lt;rich@a...&gt;"
  author="Richard Jones"
  date="2009-09-30T10:16:48"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/10/e28672ee7670d5ac0dd2226f1258f67f"
  from="Daniel_Bünzli &lt;daniel.buenzli@e...&gt;"
  author="Daniel_Bünzli"
  date="2009-10-28T02:22:23"
  subject="Re: [Caml-list] xpath or alternatives">
</msg>
<msg 
  url="2009/09/ee8d71ba703fb8804da576c3c1c5b441"
  from="Sebastien Mondet &lt;sebastien.mondet@g...&gt;"
  author="Sebastien Mondet"
  date="2009-09-30T10:36:42"
  subject="Re: [Caml-list] xpath or alternatives">
</msg>
<msg 
  url="2009/09/347537cf3a21778735a8cbdb96ffb273"
  from="Mikkel_Fahnøe_Jørgensen &lt;mikkel@d...&gt;"
  author="Mikkel_Fahnøe_Jørgensen"
  date="2009-09-30T10:49:13"
  subject="Re: [Caml-list] xpath or alternatives">
</msg>
<msg 
  url="2009/09/b794fda73aacec8db6b84b61cbe9403a"
  from="Dario Teixeira &lt;darioteixeira@y...&gt;"
  author="Dario Teixeira"
  date="2009-09-30T11:05:04"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/2064f5fb7d49b2456fb854a19772c542"
  from="Richard Jones &lt;rich@a...&gt;"
  author="Richard Jones"
  date="2009-09-30T11:57:24"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/dba7d78f6d41e1d47511efe862e139ff"
  from="Richard Jones &lt;rich@a...&gt;"
  author="Richard Jones"
  date="2009-09-30T12:59:05"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/470c2d910911dd60e56656c91bcba0f4"
  from="Till Varoquaux &lt;till@p...&gt;"
  author="Till Varoquaux"
  date="2009-09-30T13:33:09"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/e3543bc02c57573a391aadeea7233899"
  from="Richard Jones &lt;rich@a...&gt;"
  author="Richard Jones"
  date="2009-09-30T14:01:08"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/064c50be12238fa4ea6cd1f584eb9796"
  from="Till Varoquaux &lt;till@p...&gt;"
  author="Till Varoquaux"
  date="2009-09-30T14:28:16"
  subject="Re: [Caml-list] xpath or alternatives">
</msg>
<msg 
  url="2009/09/90f67ecd940b310c22566f58329532eb"
  from="Alain Frisch &lt;alain@f...&gt;"
  author="Alain Frisch"
  date="2009-09-30T14:51:07"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/ff9965ae8b2c9d3bb79f9c2833cbe7e3"
  from="Richard Jones &lt;rich@a...&gt;"
  author="Richard Jones"
  date="2009-09-30T15:09:19"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/d6c38e04f32593321e6a9be81d204f02"
  from="Alain Frisch &lt;alain@f...&gt;"
  author="Alain Frisch"
  date="2009-09-30T15:19:00"
  subject="Re: [Caml-list] xpath or alternatives">
</msg>
</msg>
</msg>
</msg>
</msg>
</msg>
</msg>
</msg>
</msg>
</msg>
</msg>
</msg>
<msg 
  url="2009/09/838fb8e82e9153a36e636eca411026bd"
  from="Stefano Zacchiroli &lt;zack@u...&gt;"
  author="Stefano Zacchiroli"
  date="2009-09-30T13:40:13"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/b4b6cac7297abdd7c98cda9cd8b4e12e"
  from="Gerd Stolpmann &lt;gerd@g...&gt;"
  author="Gerd Stolpmann"
  date="2009-09-30T14:45:08"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/7dd2bfca16401cbb22d3bff621ae6fb7"
  from="Stefano Zacchiroli &lt;zack@u...&gt;"
  author="Stefano Zacchiroli"
  date="2009-09-30T15:13:16"
  subject="Re: [Caml-list] xpath or alternatives">
<msg 
  url="2009/09/5ce9b504d879c5ff02ba328c5427e2ea"
  from="Jordan Schatz &lt;white.armor@g...&gt;"
  author="Jordan Schatz"
  date="2009-09-30T15:22:39"
  subject="Re: [Caml-list] xpath or alternatives">
</msg>
</msg>
</msg>
</msg>
</msg>
</thread>

<contents>
Sorry for the late reply.

On Wed, Sep 30, 2009 at 01:00:15AM +0200, Mikkel Fahnøe Jørgensen wrote:

&gt; Otherwise there is xmlm which is self-contained in single xml file,
&gt; and as I recall, has some sort of zipper navigator. (I initially
&gt; intended to use it before deciding on the json format):

The cursor api was removed from the library in 1.0.0.


On Wed, Sep 30, 2009 at 6:16 PM, Richard Jones &lt;rich@annexia.org&gt; wrote:

&gt; It's interesting you mention xmlm, because I couldn't write
&gt; the code using xmlm at all.

Why ? That doesn't feel like an insurmontable task.

Below is a function that extracts from a (sub)tree's sequence of
signals the attributes' data of an absolute path (i.e. the particular
xpath pattern you're after if I understand correctly). Each
attribute's data is stored in a separate list. The function is simpler
than it looks, in essence it's just a recursive case analysis on
signals. In the function [aux], [pos] maintains the current path in
the parse tree.  [mismatch] counts the level of mismatch w.r.t. the
[path] we are looking for.

let absolute_path_atts i path atts =
  let rec aux i pos mismatch path accs = match Xmlm.input i with
  | `El_start (tag, atts) -&gt;
      if mismatch &gt; 0 then aux i (tag :: pos) (mismatch + 1) path accs else
      begin match path with
      | n :: path' when n = tag -&gt;
	  if path' &lt;&gt; [] then aux i (tag :: pos) 0 path' accs else
	  let update_acc ((att, acc) as v) =
	    try att, (List.assoc att atts) :: acc with Not_found -&gt; v
	  in
	  aux i (tag :: pos) 0 [] (List.map update_acc accs)
      | _ -&gt; aux i (tag :: pos) (mismatch + 1) path accs
      end
  | `El_end -&gt;
      begin match pos with
      | _ :: [] -&gt; List.rev_map (fun (att, acc) -&gt; List.rev acc) accs
      | tag :: pos' -&gt;
	  if mismatch &gt; 0 then aux i pos' (mismatch - 1) path accs else
	  aux i pos' 0 (tag :: path) accs
      | [] -&gt; assert false
      end
  | `Data _ -&gt; aux i pos mismatch path accs
  | `Dtd _ -&gt; assert false
  in
  let accs = List.rev_map (fun att -&gt; att, []) atts in
  begin match Xmlm.peek i with
  | `El_start _ -&gt; aux i [] 0 path accs
  | `Dtd _ | `El_end | `Data _ -&gt; invalid_arg "no subtree here"
  end

Now your function becomes something like this :

let get_devices_from_xml xml =
  try
    let i = Xmlm.make_input (`String (0, xml)) in
    ignore (Xmlm.input i); (* `Dtd signal *)
    let path = ["", "domain"; "","devices"; "", "disk"; "", "source"] in
    match absolute_path_atts i path ["", "dev"; "", "file"] with
    | [devs; files] when Xmlm.eoi i -&gt; devs @ files
    | _ -&gt; failwith "xml document not well-formed"
  with
  | Xmlm.Error ((l,c), e) -&gt;
      failwith (Printf.sprintf "%d:%d: %s" l c (Xmlm.error_message e))

I know this is still more effort than you'd like, but
Xmlm is purposedly low-level and will remain. It provides only a
robust xmlm parser convenient (I believe) to develop higher-level
abstractions to process the insane uses of this standard. It would be
nice to develop a module using xmlm to provide a (non-camlp4) dsl for
xml queries. Unfortunately I do not have the time for that at the
moment (unless someone wants to fund me to do that...).

Best,

Daniel

</contents>

</message>

