+(* Read the input file and parse it into internal structures. This is
+ * by no means a complete RELAX NG parser, but is just enough to be
+ * able to parse the specific input file.
+ *)
+type rng =
+ | Element of string * rng list (* <element name=name/> *)
+ | Attribute of string * rng list (* <attribute name=name/> *)
+ | Interleave of rng list (* <interleave/> *)
+ | ZeroOrMore of rng (* <zeroOrMore/> *)
+ | OneOrMore of rng (* <oneOrMore/> *)
+ | Optional of rng (* <optional/> *)
+ | Choice of string list (* <choice><value/>*</choice> *)
+ | Value of string (* <value>str</value> *)
+ | Text (* <text/> *)
+
+let rec string_of_rng = function
+ | Element (name, xs) ->
+ "Element (\"" ^ name ^ "\", (" ^ string_of_rng_list xs ^ "))"
+ | Attribute (name, xs) ->
+ "Attribute (\"" ^ name ^ "\", (" ^ string_of_rng_list xs ^ "))"
+ | Interleave xs -> "Interleave (" ^ string_of_rng_list xs ^ ")"
+ | ZeroOrMore rng -> "ZeroOrMore (" ^ string_of_rng rng ^ ")"
+ | OneOrMore rng -> "OneOrMore (" ^ string_of_rng rng ^ ")"
+ | Optional rng -> "Optional (" ^ string_of_rng rng ^ ")"
+ | Choice values -> "Choice [" ^ String.concat ", " values ^ "]"
+ | Value value -> "Value \"" ^ value ^ "\""
+ | Text -> "Text"
+
+and string_of_rng_list xs =
+ String.concat ", " (List.map string_of_rng xs)
+
+let rec parse_rng ?defines context = function
+ | [] -> []
+ | Xml.Element ("element", ["name", name], children) :: rest ->
+ Element (name, parse_rng ?defines context children)
+ :: parse_rng ?defines context rest
+ | Xml.Element ("attribute", ["name", name], children) :: rest ->
+ Attribute (name, parse_rng ?defines context children)
+ :: parse_rng ?defines context rest
+ | Xml.Element ("interleave", [], children) :: rest ->
+ Interleave (parse_rng ?defines context children)
+ :: parse_rng ?defines context rest
+ | Xml.Element ("zeroOrMore", [], [child]) :: rest ->
+ let rng = parse_rng ?defines context [child] in
+ (match rng with
+ | [child] -> ZeroOrMore child :: parse_rng ?defines context rest
+ | _ ->
+ failwithf "%s: <zeroOrMore> contains more than one child element"
+ context
+ )
+ | Xml.Element ("oneOrMore", [], [child]) :: rest ->
+ let rng = parse_rng ?defines context [child] in
+ (match rng with
+ | [child] -> OneOrMore child :: parse_rng ?defines context rest
+ | _ ->
+ failwithf "%s: <oneOrMore> contains more than one child element"
+ context
+ )
+ | Xml.Element ("optional", [], [child]) :: rest ->
+ let rng = parse_rng ?defines context [child] in
+ (match rng with
+ | [child] -> Optional child :: parse_rng ?defines context rest
+ | _ ->
+ failwithf "%s: <optional> contains more than one child element"
+ context
+ )
+ | Xml.Element ("choice", [], children) :: rest ->
+ let values = List.map (
+ function Xml.Element ("value", [], [Xml.PCData value]) -> value
+ | _ ->
+ failwithf "%s: can't handle anything except <value> in <choice>"
+ context
+ ) children in
+ Choice values
+ :: parse_rng ?defines context rest
+ | Xml.Element ("value", [], [Xml.PCData value]) :: rest ->
+ Value value :: parse_rng ?defines context rest
+ | Xml.Element ("text", [], []) :: rest ->
+ Text :: parse_rng ?defines context rest
+ | Xml.Element ("ref", ["name", name], []) :: rest ->
+ (* Look up the reference. Because of limitations in this parser,
+ * we can't handle arbitrarily nested <ref> yet. You can only
+ * use <ref> from inside <start>.
+ *)
+ (match defines with
+ | None ->
+ failwithf "%s: contains <ref>, but no refs are defined yet" context
+ | Some map ->
+ let rng = StringMap.find name map in
+ rng @ parse_rng ?defines context rest
+ )
+ | x :: _ ->
+ failwithf "%s: can't handle '%s' in schema" context (Xml.to_string x)
+
+let grammar =
+ let xml = Xml.parse_file rng_input in
+ match xml with
+ | Xml.Element ("grammar", _,
+ Xml.Element ("start", _, gram) :: defines) ->
+ (* The <define/> elements are referenced in the <start> section,
+ * so build a map of those first.
+ *)
+ let defines = List.fold_left (
+ fun map ->
+ function Xml.Element ("define", ["name", name], defn) ->
+ StringMap.add name defn map
+ | _ ->
+ failwithf "%s: expected <define name=name/>" rng_input
+ ) StringMap.empty defines in
+ let defines = StringMap.mapi parse_rng defines in
+
+ (* Parse the <start> clause, passing the defines. *)
+ parse_rng ~defines "<start>" gram
+ | _ ->
+ failwithf "%s: input is not <grammar><start/><define>*</grammar>"
+ rng_input
+
+let name_of_field = function
+ | Element (name, _) | Attribute (name, _)
+ | ZeroOrMore (Element (name, _))
+ | OneOrMore (Element (name, _))
+ | Optional (Element (name, _)) -> name
+ | Optional (Attribute (name, _)) -> name
+ | Text -> (* an unnamed field in an element *)
+ "data"
+ | rng ->
+ failwithf "name_of_field failed at: %s" (string_of_rng rng)
+
+(* At the moment this function only generates OCaml types. However we
+ * should parameterize it later so it can generate types/structs in a
+ * variety of languages.
+ *)
+let generate_types xs =
+ (* A simple type is one that can be printed out directly, eg.
+ * "string option". A complex type is one which has a name and has
+ * to be defined via another toplevel definition, eg. a struct.
+ *
+ * generate_type generates code for either simple or complex types.
+ * In the simple case, it returns the string ("string option"). In
+ * the complex case, it returns the name ("mountpoint"). In the
+ * complex case it has to print out the definition before returning,
+ * so it should only be called when we are at the beginning of a
+ * new line (BOL context).
+ *)
+ let rec generate_type = function
+ | Text -> (* string *)
+ "string", true
+ | Choice values -> (* [`val1|`val2|...] *)
+ "[" ^ String.concat "|" (List.map ((^)"`") values) ^ "]", true
+ | ZeroOrMore rng -> (* <rng> list *)
+ let t, is_simple = generate_type rng in
+ t ^ " list (* 0 or more *)", is_simple
+ | OneOrMore rng -> (* <rng> list *)
+ let t, is_simple = generate_type rng in
+ t ^ " list (* 1 or more *)", is_simple
+ (* virt-inspector hack: bool *)
+ | Optional (Attribute (name, [Value "1"])) ->
+ "bool", true
+ | Optional rng -> (* <rng> list *)
+ let t, is_simple = generate_type rng in
+ t ^ " option", is_simple
+ (* type name = { fields ... } *)
+ | Element (name, fields) when is_attrs_interleave fields ->
+ generate_type_struct name (get_attrs_interleave fields)
+ | Element (name, [field]) (* type name = field *)
+ | Attribute (name, [field]) ->
+ let t, is_simple = generate_type field in
+ if is_simple then (t, true)
+ else (
+ pr "type %s = %s\n" name t;
+ name, false
+ )
+ | Element (name, fields) -> (* type name = { fields ... } *)
+ generate_type_struct name fields
+ | rng ->
+ failwithf "generate_type failed at: %s" (string_of_rng rng)
+
+ and is_attrs_interleave = function
+ | [Interleave _] -> true
+ | Attribute _ :: fields -> is_attrs_interleave fields
+ | Optional (Attribute _) :: fields -> is_attrs_interleave fields
+ | _ -> false
+
+ and get_attrs_interleave = function
+ | [Interleave fields] -> fields
+ | ((Attribute _) as field) :: fields
+ | ((Optional (Attribute _)) as field) :: fields ->
+ field :: get_attrs_interleave fields
+ | _ -> assert false
+
+ and generate_types xs =
+ List.iter (fun x -> ignore (generate_type x)) xs
+
+ and generate_type_struct name fields =
+ (* Calculate the types of the fields first. We have to do this
+ * before printing anything so we are still in BOL context.
+ *)
+ let types = List.map fst (List.map generate_type fields) in