all: bitmatch.cma bitmatch_persistent.cma \
bitmatch.cmxa bitmatch_persistent.cmxa \
- pa_bitmatch.cmo
+ pa_bitmatch.cmo \
+ bitmatch-objinfo
@for d in $(SUBDIRS); do $(MAKE) -C $$d $@; done
bitmatch.cma: bitmatch_types.cmo bitmatch_config.cmo bitmatch.cmo
$(OCAMLFIND) ocamlc bitmatch.cma -I +camlp4 camlp4lib.cma \
-pp camlp4of -c $< -o $@
+bitmatch-objinfo: bitmatch_objinfo.cmo bitmatch.cma bitmatch_persistent.cma
+ $(OCAMLFIND) ocamlc -I +camlp4 unix.cma camlp4lib.cma \
+ bitmatch.cma bitmatch_persistent.cma \
+ $< -o $@
+
# Tests and examples.
PP = -pp "camlp4o bitmatch.cma bitmatch_persistent.cma pa_bitmatch.cmo"
-test: pa_bitmatch.cmo bitmatch.cma bitmatch_persistent.cma
+test: pa_bitmatch.cmo bitmatch.cma bitmatch_persistent.cma tests/test.bmpp
@for f in $(TESTS); do \
echo Building $$f; \
$(OCAMLFIND) ocamlc $(OCAMLCFLAGS) $(PP) \
done
@for d in $(SUBDIRS); do $(MAKE) -C $$d $@; done
+tests/test.bmpp: create_test_pattern
+ ./$< $@.new
+ mv $@.new $@
+
+create_test_pattern: create_test_pattern.cmo
+ $(OCAMLFIND) ocamlc $(OCAMLCFLAGS) -pp camlp4of \
+ unix.cma -I +camlp4 camlp4lib.cma \
+ -I . bitmatch.cma bitmatch_persistent.cma $< -o $@
+
+create_test_pattern.cmo: create_test_pattern.ml
+ $(OCAMLFIND) ocamlc $(OCAMLCFLAGS) -pp camlp4of \
+ unix.cma -I +camlp4 \
+ -I . -c $< -o $@
+
examples: pa_bitmatch.cmo bitmatch.cma bitmatch_persistent.cma
@for f in $(EXAMPLES); do \
echo Building $$f; \
- $(OCAMLFIND) ocamlc $(OCAMLFLAGS) $(PP) \
+ $(OCAMLFIND) ocamlc $(OCAMLCFLAGS) $(PP) \
-package unix -linkpkg -I . bitmatch.cma $$f.ml -o $$f; \
if [ $$? -ne 0 ]; then exit 1; fi; \
done
doc:
rm -rf html
mkdir html
- $(OCAMLDOC) $(OCAMLDOCFLAGS) -d html bitmatch.mli bitmatch.ml \
- bitmatch_persistent.mli bitmatch_persistent.ml
+ $(OCAMLDOC) $(OCAMLDOCFLAGS) -d html -I +camlp4 \
+ bitmatch.mli bitmatch_persistent.mli
endif
# Install.
still need to be a runtime check to enforce the
size).
+ {2 Named patterns and persistent patterns}
+
+ Please see {!Bitmatch_persistent} for documentation on this subject.
+
{2 Compiling}
Using the compiler directly you can do:
let package = "@PACKAGE_NAME@"
let version = "@PACKAGE_VERSION@"
+let ocamllibdir = "@OCAMLLIB@"
--- /dev/null
+(* Bitmatch syntax extension.
+ * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * $Id$
+ *)
+
+open Printf
+
+open Bitmatch
+module P = Bitmatch_persistent
+
+let () =
+ if Array.length Sys.argv <= 1 then
+ failwith "bitmatch_objinfo filename.bmpp";
+ let filename = Sys.argv.(1) in
+ let chan = open_in filename in
+ let names = ref [] in
+ (try
+ let rec loop () =
+ let name = P.named_from_channel chan in
+ names := name :: !names
+ in
+ loop ()
+ with End_of_file -> ()
+ );
+ close_in chan;
+ let names = List.rev !names in
+ List.iter (
+ function
+ | name, P.Pattern patt ->
+ printf "let bitmatch %s =\n%s\n"
+ name (P.string_of_pattern patt)
+ | name, P.Constructor cons ->
+ printf "let BITSTRING %s =\n%s\n"
+ name (P.string_of_constructor cons)
+ ) names
signed : bool; (* true if signed, false if unsigned *)
t : field_type; (* type *)
_loc : Loc.t; (* location in source code *)
- printer : 'a -> string; (* turn the field into a string *)
+
+ (* Turn the field into a string. This used to be a function,
+ * but that would prevent this structure from being marshalled.
+ * This is unsatisfactory at the moment because it means we
+ * can't print out the 'a field.
+ *)
+ printer : printer_t;
}
and field_type = Int | String | Bitstring (* field type *)
and endian_expr =
| ConstantEndian of Bitmatch.endian (* a constant little/big/nativeendian *)
| EndianExpr of expr (* an endian expression *)
+and printer_t = PattPrinter | ExprPrinter | NoPrinter
type pattern = patt field list
type constructor = expr field list
+type named = string * alt
+and alt =
+ | Pattern of pattern
+ | Constructor of constructor
+
(* Work out if an expression is an integer constant.
*
* Returns [Some i] if so (where i is the integer value), else [None].
let loc_line = Loc.start_line _loc in
let loc_char = Loc.start_off _loc - Loc.start_bol _loc in
- sprintf "%s : %s : %s, %s, %s @ (%S, %d, %d)"
- (printer field) flen t endian signed loc_fname loc_line loc_char
+ sprintf "[field] : %s : %s, %s, %s @ (%S, %d, %d)"
+ (*printer field*) flen t endian signed loc_fname loc_line loc_char
let string_of_pattern pattern =
- "{ " ^ String.concat "; " (List.map string_of_field pattern) ^ " }"
+ "{ " ^ String.concat ";\n " (List.map string_of_field pattern) ^ " }\n"
let string_of_constructor constructor =
- "{ " ^ String.concat "; " (List.map string_of_field constructor) ^ " }"
+ "{ " ^ String.concat ";\n " (List.map string_of_field constructor) ^ " }\n"
-let pattern_to_channel chan patt = Marshal.to_channel chan patt []
-let constructor_to_channel chan cons = Marshal.to_channel chan cons []
+let named_to_channel chan n = Marshal.to_channel chan n []
-let pattern_to_string patt = Marshal.to_string patt []
-let constructor_to_string cons = Marshal.to_string cons []
+let named_to_string n = Marshal.to_string n []
-let pattern_to_buffer str ofs len patt =
- Marshal.to_buffer str ofs len patt []
-let constructor_to_buffer str ofs len cons =
- Marshal.to_buffer str ofs len cons []
+let named_to_buffer str ofs len n = Marshal.to_buffer str ofs len n []
-let pattern_from_channel = Marshal.from_channel
-let constructor_from_channel = Marshal.from_channel
+let named_from_channel = Marshal.from_channel
-let pattern_from_string = Marshal.from_string
-let constructor_from_string = Marshal.from_string
+let named_from_string = Marshal.from_string
let create_pattern_field _loc =
{
signed = false;
t = Int;
_loc = _loc;
- printer = patt_printer;
+ printer = PattPrinter;
}
let set_lident_patt field id =
signed = false;
t = Int;
_loc = _loc;
- printer = expr_printer;
+ printer = ExprPrinter;
}
let set_lident_expr field id =
*)
(**
+ {b Warning:} This documentation is for ADVANCED USERS ONLY.
+ If you are not an advanced user, you are probably looking
+ for {{:Bitmatch.html}the Bitmatch documentation}.
+
{{:#reference}Jump straight to the reference section for
documentation on types and functions}.
Bitmatch allows you to name sets of fields and reuse them
elsewhere. For example if you frequently need to parse
- Pascal-style strings in the form [length byte + string], then you
- could name the [{ strlen : 8 : int; str : strlen*8 : string}]
+ Pascal-style strings in the form length byte + string, then you
+ could name the [{ strlen : 8 : int; str : strlen*8 : string }]
pattern and reuse it everywhere by name.
These are called {b persistent patterns}.
The basic usage is:
{v
-(* Create a persistent pattern called 'pascal' which
+(* Create a persistent pattern called 'pascal_string' which
* matches Pascal-style strings (length byte + string).
*)
-bitmatch pascal =
- { strlen : 8 : int;
+let bitmatch pascal_string =
+ \{ strlen : 8 : int;
str : strlen*8 : string }
let is_pascal_string bits =
bitmatch bits with
- | { pascal } ->
+ | \{ :pascal_string } ->
printf "matches a Pascal string %s, len %d bytes\n"
str strlen
v}
patterns (not unless your program runs [ocamlc] to make a [*.cmo]
file then dynamically links to the [*.cmo] file).
+ {2 Named patterns}
+
+ A named pattern is a way to name a pattern and use it later
+ in the same source file. To name a pattern, use:
+
+ [let bitmatch name = { fields ... } ;;]
+
+ and you can then use the name later on inside another pattern,
+ by prefixing the name with a colon.
+ For example:
+
+ [bitmatch bits with { :name } -> ...]
+
+ You can use named patterns within named patterns.
+
+ Currently the use of named patterns is somewhat limited.
+ The restrictions are:
+
+ Named patterns can only be used within the same source file, and
+ the names occupy a completely separate namespace from anything
+ else in the source file.
+
+ The [let bitmatch] syntax only works at the top level. We may
+ add a [let bitmatch ... in] for inner levels later.
+
+ Because you cannot rename the bound identifiers in named
+ patterns, you can effectively only use them once in a
+ pattern. For example, [{ :name; :name }] is legal, but
+ any bindings in the first name would be overridden by
+ the second name.
+
+ There are no "named constructors" yet, but the machinery
+ is in place to do this, and we may add them later.
+
+ {2 Persistent patterns in files}
+ More useful than just naming patterns, you can load
+ persistent patterns from external files. The patterns
+ in these external files can come from a variety of sources:
+ for example, in the [cil-tools] subdirectory are some
+ {{:http://cil.sf.net/}Cil-based} tools for importing C
+ structures from header files. You can also generate
+ your own files or write your own tools, as described below.
+ To use the persistent pattern(s) from a file do:
+ [open bitmatch "filename.bmpp" ;;]
+ A list of zero or more {!named} patterns are read from the file
+ and each is bound to a name (as contained in the file),
+ and then the patterns can be used with the usual [:name]
+ syntax described above.
+ {3 Extension}
+ The standard extension is [.bmpp]. This is just a convention
+ and you can use any extension you want.
+ {3 Directory search order}
+ If the filename is an absolute or explicit path, then we try to
+ load it from that path and stop if it fails. See the [Filename]
+ module in the standard OCaml library for the definitions of
+ "absolute path" and "explicit path". Otherwise we use the
+ following directory search order:
+
+ - Relative to the current directory
+ - Relative to the OCaml library directory
+
+ {3 bitmatch-objinfo}
+
+ The [bitmatch-objinfo] command can be run on a file in order
+ to print out the patterns in the file.
+
+ {3 Constructors}
+
+ We haven't implemented persistent constructors yet, although
+ the machinery is in place to make this happen. Any constructors
+ found in the file are ignored.
+
+ {2 Creating your own persistent patterns}
+
+ If you want to write a tool to import bitstrings from an
+ exotic location or markup language, you will need
+ to use the functions found in the {{:#reference}reference section}.
+
+ I will describe using an example here of how you would
+ programmatically create a persistent pattern which
+ matches Pascal-style "length byte + data" strings.
+ Firstly note that there are two fields, so our pattern
+ will be a list of length 2 and type {!pattern}.
+
+ You will need to create a camlp4 location object ([Loc.t])
+ describing the source file. This source file is used
+ to generate useful error messages for the user, so
+ you may want to set it to be the name and location in
+ the file that your tool reads for input. By convention,
+ locations are bound to name [_loc]:
+
+{v
+ let _loc = Loc.move_line 42 (Loc.mk "input.xml")
+v}
+
+ Create a pattern field representing a length field which is 8 bits wide,
+ bound to the identifier [len]:
+
+{v
+ let len_field = create_pattern_field _loc
+ let len_field = set_length_int len_field 8
+ let len_field = set_lident_patt len_field "len"
+v}
+
+ Create a pattern field representing a string of [len*8] bits.
+ Note that the use of [<:expr< >>] quotation requires
+ you to preprocess your source with [camlp4of]
+ (see {{:http://brion.inria.fr/gallium/index.php/Reflective_OCaml}this
+ page on Reflective OCaml}).
+
+{v
+ let str_field = create_pattern_field _loc
+ let str_field = set_length str_field <:expr< len*8 >>
+ let str_field = set_lident_patt str_field "str"
+ let str_field = set_type_string str_field
+v}
+
+ Join the two fields together and name it:
+
+{v
+ let named_pattern = "pascal_string", Pattern [len_field; str_field]
+v}
+
+ Save it to a file:
+
+{v
+ let chan = open_out "output.bmpp" in
+ named_to_channel chan named_pattern;
+ close_out chan
+v}
+
+ You can now use this pattern in another program like this:
+
+{v
+ open bitmatch "output.bmpp" ;;
+ let parse_pascal_string bits =
+ bitmatch bits with
+ | \{ :pascal_string } -> str, len
+ | \{ _ } -> invalid_arg "not a Pascal string"
+v}
{2:reference Reference}
- {3 Internal}
+ {3 Types}
*)
type patt = Camlp4.PreCast.Syntax.Ast.patt
type expr = Camlp4.PreCast.Syntax.Ast.expr
type loc_t = Camlp4.PreCast.Syntax.Ast.Loc.t
-
-(** {3 Types} *)
+(** Just short names for the camlp4 types. *)
type 'a field
(** A field in a persistent pattern or persistent constructor. *)
type pattern = patt field list
-(** A persistent pattern (used in [bitmatch] operator), is just a list
- of pattern fields. *)
+(** A persistent pattern (used in [bitmatch] operator), is just a
+ list of pattern fields. *)
type constructor = expr field list
(** A persistent constructor (used in [BITSTRING] operator), is just a
list of constructor fields. *)
+type named = string * alt
+and alt =
+ | Pattern of pattern (** Pattern *)
+ | Constructor of constructor (** Constructor *)
+(** A named pattern or constructor.
+
+ The name is used when binding a pattern from a file, but
+ is otherwise ignored. *)
+
(** {3 Printers} *)
val string_of_pattern : pattern -> string
val string_of_constructor : constructor -> string
val string_of_field : 'a field -> string
-(** Convert patterns, constructors, or individual fields
+(** Convert patterns, constructors or individual fields
into printable strings for debugging purposes.
The strings look similar to the syntax used by bitmatch, but
(** {3 Persistence} *)
-val pattern_to_channel : out_channel -> pattern -> unit
-val constructor_to_channel : out_channel -> constructor -> unit
+val named_to_channel : out_channel -> named -> unit
(** Save a pattern/constructor to an output channel. *)
-val pattern_to_string : pattern -> string
-val constructor_to_string : constructor -> string
+val named_to_string : named -> string
(** Serialize a pattern/constructor to a string. *)
-val pattern_to_buffer : string -> int -> int -> pattern -> int
-val constructor_to_buffer : string -> int -> int -> constructor -> int
+val named_to_buffer : string -> int -> int -> named -> int
(** Serialize a pattern/constructor to part of a string, return the length. *)
-val pattern_from_channel : in_channel -> pattern
-val constructor_from_channel : in_channel -> constructor
+val named_from_channel : in_channel -> named
(** Load a pattern/constructor from an output channel.
Note: This is not type safe. The pattern/constructor must
have been written out under the same version of OCaml and
the same version of bitmatch. *)
-val pattern_from_string : string -> int -> pattern
-val constructor_from_string : string -> int -> constructor
+val named_from_string : string -> int -> named
(** Load a pattern/constructor from a string at offset within the string.
Note: This is not type safe. The pattern/constructor must
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
-# $Id: Makefile.in 81 2008-05-21 09:59:21Z richard.wm.jones $
+# $Id$
PACKAGE = @PACKAGE_NAME@
VERSION = @PACKAGE_VERSION@
OCAMLCFLAGS = -g
OCAMLCPACKAGES = -package unix,str,extlib,cil -I ..
-OCAMLCLIBS = $(OCAMLCPACKAGES) -linkpkg ../bitmatch.cma
+OCAMLCLIBS = $(OCAMLCPACKAGES) -linkpkg ../bitmatch.cma ../bitmatch_persistent.cma
OCAMLOPTFLAGS =
OCAMLOPTPACKAGES = $(OCAMLCPACKAGES)
-OCAMLOPTLIBS = $(OCAMLOPTPACKAGES) -linkpkg ../bitmatch.cmxa
+OCAMLOPTLIBS = $(OCAMLOPTPACKAGES) -linkpkg ../bitmatch.cmxa ../bitmatch_persistent.cmxa
OCAMLDOCFLAGS = -html -sort
--- /dev/null
+(* Create persistent pattern
+ * $Id$
+ *)
+
+open Bitmatch_persistent
+
+open Camlp4.PreCast
+open Syntax
+open Ast
+
+let () =
+ let _loc = Loc.ghost in
+
+ let len_field = create_pattern_field _loc in
+ let len_field = set_length_int len_field 8 in
+ let len_field = set_lident_patt len_field "len" in
+
+ let str_field = create_pattern_field _loc in
+ let str_field = set_length str_field <:expr< len*8 >> in
+ let str_field = set_lident_patt str_field "str" in
+ let str_field = set_type_string str_field in
+
+ let named_pattern = "pascal_string", Pattern [len_field; str_field] in
+
+ let chan = open_out Sys.argv.(1) in
+ named_to_channel chan named_pattern;
+ close_out chan
*)
let debug = false
+(* Hashtable storing named persistent patterns. *)
+let pattern_hash : (string, P.pattern) Hashtbl.t = Hashtbl.create 13
+
(* Work out if an expression is an integer constant.
*
* Returns [Some i] if so (where i is the integer value), else [None].
$int:loc_line$, $int:loc_char$))
>>
+(* Add a named pattern. *)
+let add_named_pattern _loc name pattern =
+ Hashtbl.add pattern_hash name pattern
+
+(* Expand a named pattern from the pattern_hash. *)
+let expand_named_pattern _loc name =
+ try Hashtbl.find pattern_hash name
+ with Not_found ->
+ Loc.raise _loc (Failure (sprintf "named pattern not found: %s" name))
+
+(* Add named patterns from a file. See the documentation on the
+ * directory search path in bitmatch_persistent.mli
+ *)
+let load_patterns_from_file _loc filename =
+ let chan =
+ if Filename.is_relative filename && Filename.is_implicit filename then (
+ (* Try current directory. *)
+ try open_in filename
+ with _ ->
+ (* Try OCaml library directory. *)
+ try open_in (Filename.concat Bitmatch_config.ocamllibdir filename)
+ with exn -> Loc.raise _loc exn
+ ) else (
+ try open_in filename
+ with exn -> Loc.raise _loc exn
+ ) in
+ let names = ref [] in
+ (try
+ let rec loop () =
+ let name = P.named_from_channel chan in
+ names := name :: !names
+ in
+ loop ()
+ with End_of_file -> ()
+ );
+ close_in chan;
+ let names = List.rev !names in
+ List.iter (
+ function
+ | name, P.Pattern patt -> add_named_pattern _loc name patt
+ | _, P.Constructor _ -> () (* just ignore these for now *)
+ ) names
+
EXTEND Gram
- GLOBAL: expr;
+ GLOBAL: expr str_item;
(* Qualifiers are a list of identifiers ("string", "bigendian", etc.)
* followed by an optional expression (used in certain cases). Note
SEP "," ]
];
- (* Field used in the bitmatch operator (a pattern). *)
+ (* Field used in the bitmatch operator (a pattern). This can actually
+ * return multiple fields, in the case where the 'field' is a named
+ * persitent pattern.
+ *)
patt_field: [
[ fpatt = patt; ":"; len = expr LEVEL "top";
qs = OPT [ ":"; qs = qualifiers -> qs ] ->
let field = P.create_pattern_field _loc in
let field = P.set_patt field fpatt in
let field = P.set_length field len in
- parse_field _loc field qs
+ [parse_field _loc field qs] (* Normal, single field. *)
+ | ":"; name = LIDENT ->
+ expand_named_pattern _loc name (* Named -> list of fields. *)
]
];
(* Case inside bitmatch operator. *)
- match_case: [
+ patt_fields: [
[ "{";
fields = LIST0 patt_field SEP ";";
- "}";
+ "}" ->
+ List.concat fields
+ ]
+ ];
+
+ patt_case: [
+ [ fields = patt_fields;
bind = OPT [ "as"; name = LIDENT -> name ];
whenclause = OPT [ "when"; e = expr -> e ]; "->";
code = expr ->
]
];
+ constr_fields: [
+ [ "{";
+ fields = LIST0 constr_field SEP ";";
+ "}" ->
+ fields
+ ]
+ ];
+
(* 'bitmatch' expressions. *)
expr: LEVEL ";" [
[ "bitmatch";
bs = expr; "with"; OPT "|";
- cases = LIST1 match_case SEP "|" ->
+ cases = LIST1 patt_case SEP "|" ->
output_bitmatch _loc bs cases
]
(* Constructor. *)
- | [ "BITSTRING"; "{";
- fields = LIST0 constr_field SEP ";";
- "}" ->
+ | [ "BITSTRING";
+ fields = constr_fields ->
output_constructor _loc fields
]
];
+ (* Named persistent patterns.
+ *
+ * NB: Currently only allowed at the top level. We can probably lift
+ * this restriction later if necessary. We only deal with patterns
+ * at the moment, not constructors, but the infrastructure to do
+ * constructors is in place.
+ *)
+ str_item: LEVEL "top" [
+ [ "let"; "bitmatch";
+ name = LIDENT; "="; fields = patt_fields ->
+ add_named_pattern _loc name fields;
+ (* The statement disappears, but we still need a str_item so ... *)
+ <:str_item< >>
+ | "open"; "bitmatch"; filename = STRING ->
+ load_patterns_from_file _loc filename;
+ <:str_item< >>
+ ]
+ ];
+
END
--- /dev/null
+(* Named pattern
+ * $Id$
+ *)
+
+open Printf
+open Bitmatch
+
+(* A byte+length Pascal string. *)
+let bitmatch pascal_string =
+ { len : 8;
+ str : len*8 : string }
+
+let () =
+ let bits = bitstring_of_string "\022Mary had a little lamb" in
+ bitmatch bits with
+ | { :pascal_string } ->
+ () (*printf "it's a Pascal string, len = %d, string = %S\n" len str*)
+ | { _ } ->
+ eprintf "not matching error\n";
+ exit 1
--- /dev/null
+(* Open a persistent pattern
+ * $Id$
+ *)
+
+open Printf
+open Bitmatch
+
+open bitmatch "tests/test.bmpp"
+
+let () =
+ let bits = bitstring_of_string "\022Mary had a little lamb" in
+ bitmatch bits with
+ | { :pascal_string } ->
+ () (*printf "it's a Pascal string, len = %d, string = %S\n" len str*)
+ | { _ } ->
+ eprintf "not matching error\n";
+ exit 1