From: Richard W.M. Jones Date: Thu, 12 Jun 2008 15:46:00 +0000 (+0000) Subject: Split out field handling from pa_bitmatch into a common library, in preparation for... X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=091a76c07b4ddd25c3459b7e6a9fd3e1245a57a7;hp=c4206b0394401252481d028c25b0b877edce2128;p=ocaml-bitstring.git Split out field handling from pa_bitmatch into a common library, in preparation for pattern persistence. --- diff --git a/.depend b/.depend index 4756662..ce2229d 100644 --- a/.depend +++ b/.depend @@ -1,2 +1,5 @@ bitmatch.cmo: bitmatch_types.cmo bitmatch_config.cmo bitmatch.cmi bitmatch.cmx: bitmatch_types.cmx bitmatch_config.cmx bitmatch.cmi +bitmatch_persistent.cmo: bitmatch.cmi bitmatch_persistent.cmi +bitmatch_persistent.cmx: bitmatch.cmx bitmatch_persistent.cmi +bitmatch_persistent.cmi: bitmatch.cmi diff --git a/Makefile.in b/Makefile.in index b535b7a..a28fb92 100644 --- a/Makefile.in +++ b/Makefile.in @@ -41,24 +41,44 @@ EXAMPLES = $(patsubst %.ml,%,$(wildcard examples/*.ml)) TESTS = $(patsubst %.ml,%,$(wildcard tests/*.ml)) -all: bitmatch.cma bitmatch.cmxa pa_bitmatch.cmo +all: bitmatch.cma bitmatch_persistent.cma \ + bitmatch.cmxa bitmatch_persistent.cmxa \ + pa_bitmatch.cmo @for d in $(SUBDIRS); do $(MAKE) -C $$d $@; done bitmatch.cma: bitmatch_types.cmo bitmatch_config.cmo bitmatch.cmo $(OCAMLFIND) ocamlc -a -o $@ $^ +bitmatch_persistent.cma: bitmatch_persistent.cmo + $(OCAMLFIND) ocamlc -a -o $@ $^ + +bitmatch_persistent.cmo: bitmatch_persistent.ml + $(OCAMLFIND) ocamlc $(OCAMLCFLAGS) $(OCAMLCPACKAGES) \ + -I +camlp4 -pp camlp4of -c $< + bitmatch.cmxa: bitmatch_types.cmx bitmatch_config.cmx bitmatch.cmx $(OCAMLFIND) ocamlopt -a -o $@ $^ -pa_bitmatch.cmo: pa_bitmatch.ml bitmatch.cma +bitmatch_persistent.cmxa: bitmatch_persistent.cmx + $(OCAMLFIND) ocamlopt -a -o $@ $^ + +bitmatch_persistent.cmx: bitmatch_persistent.ml + $(OCAMLFIND) ocamlopt $(OCAMLOPTFLAGS) $(OCAMLOPTPACKAGES) \ + -I +camlp4 -pp camlp4of -c $< + +bitmatch_persistent.cmi: bitmatch_persistent.mli + $(OCAMLFIND) ocamlc $(OCAMLCFLAGS) $(OCAMLCPACKAGES) \ + -I +camlp4 -pp camlp4of -c $< + +pa_bitmatch.cmo: pa_bitmatch.ml bitmatch.cma bitmatch_persistent.cma $(OCAMLFIND) ocamlc bitmatch.cma -I +camlp4 camlp4lib.cma \ - -pp camlp4of.opt -c $< -o $@ + -pp camlp4of -c $< -o $@ # Tests and examples. -PP = -pp "camlp4o bitmatch.cma pa_bitmatch.cmo" +PP = -pp "camlp4o bitmatch.cma bitmatch_persistent.cma pa_bitmatch.cmo" -test: pa_bitmatch.cmo bitmatch.cma +test: pa_bitmatch.cmo bitmatch.cma bitmatch_persistent.cma @for f in $(TESTS); do \ echo Building $$f; \ $(OCAMLFIND) ocamlc $(OCAMLCFLAGS) $(PP) \ @@ -70,7 +90,7 @@ test: pa_bitmatch.cmo bitmatch.cma done @for d in $(SUBDIRS); do $(MAKE) -C $$d $@; done -examples: pa_bitmatch.cmo bitmatch.cma +examples: pa_bitmatch.cmo bitmatch.cma bitmatch_persistent.cma @for f in $(EXAMPLES); do \ echo Building $$f; \ $(OCAMLFIND) ocamlc $(OCAMLFLAGS) $(PP) \ @@ -79,10 +99,11 @@ examples: pa_bitmatch.cmo bitmatch.cma done @for d in $(SUBDIRS); do $(MAKE) -C $$d $@; done -print-tests: pa_bitmatch.cmo +print-tests: pa_bitmatch.cmo bitmatch_persistent.cma @for f in $(TESTS); do \ echo Test: $$f.ml; \ - cmd="camlp4o bitmatch.cma pa_bitmatch.cmo -printer pr_o.cmo $$f.ml"; \ + cmd="camlp4o bitmatch.cma bitmatch_persistent.cma pa_bitmatch.cmo \ + -printer pr_o.cmo $$f.ml"; \ echo $$cmd; \ $$cmd; \ done @@ -90,7 +111,8 @@ print-tests: pa_bitmatch.cmo print-examples: pa_bitmatch.cmo @for f in $(EXAMPLES); do \ echo Example: $$f.ml; \ - camlp4o bitmatch.cma pa_bitmatch.cmo -printer pr_o.cmo $$f.ml; \ + camlp4o bitmatch.cma bitmatch_persistent.cma pa_bitmatch.cmo \ + -printer pr_o.cmo $$f.ml; \ if [ $$? -ne 0 ]; then exit 1; fi; \ done @@ -114,7 +136,8 @@ ifneq ($(OCAMLDOC),) doc: rm -rf html mkdir html - $(OCAMLDOC) $(OCAMLDOCFLAGS) -d html bitmatch.mli bitmatch.ml + $(OCAMLDOC) $(OCAMLDOCFLAGS) -d html bitmatch.mli bitmatch.ml \ + bitmatch_persistent.mli bitmatch_persistent.ml endif # Install. @@ -122,6 +145,7 @@ endif install: ocamlfind install bitmatch META *.mli *.cmx *.cma *.cmxa *.a \ bitmatch.cmi \ + bitmatch_persistent.cmi \ pa_bitmatch.cmo @for d in $(SUBDIRS); do $(MAKE) -C $$d $@; done @@ -137,9 +161,11 @@ install: depend: .depend @for d in $(SUBDIRS); do $(MAKE) -C $$d $@; done -.depend: bitmatch.ml bitmatch.mli +.depend: bitmatch.ml bitmatch.mli bitmatch_persistent.ml bitmatch_persistent.mli rm -f .depend - $(OCAMLFIND) ocamldep $(OCAMLCPACKAGES) $^ > $@ + $(OCAMLFIND) ocamldep $(OCAMLCPACKAGES) bitmatch.ml bitmatch.mli >> $@ + $(OCAMLFIND) ocamldep $(OCAMLCPACKAGES) -pp camlp4of \ + bitmatch_persistent.ml bitmatch_persistent.mli >> $@ ifeq ($(wildcard .depend),.depend) include .depend diff --git a/bitmatch_persistent.ml b/bitmatch_persistent.ml new file mode 100644 index 0000000..8ad07d9 --- /dev/null +++ b/bitmatch_persistent.ml @@ -0,0 +1,213 @@ +(* Bitmatch persistent patterns. + * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * $Id$ + *) + +open Printf + +open Camlp4.PreCast +open Syntax +open Ast + +type patt = Camlp4.PreCast.Syntax.Ast.patt +type expr = Camlp4.PreCast.Syntax.Ast.expr +type loc_t = Camlp4.PreCast.Syntax.Ast.Loc.t + +(* Field. In bitmatch (patterns) the type is [patt field]. In + * BITSTRING (constructor) the type is [expr field]. + *) +type 'a field = { + field : 'a; (* field ('a is either patt or expr) *) + flen : expr; (* length in bits, may be non-const *) + endian : endian_expr; (* endianness *) + signed : bool; (* true if signed, false if unsigned *) + t : field_type; (* type *) + _loc : Loc.t; (* location in source code *) + printer : 'a -> string; (* turn the field into a string *) +} +and field_type = Int | String | Bitstring (* field type *) +and endian_expr = + | ConstantEndian of Bitmatch.endian (* a constant little/big/nativeendian *) + | EndianExpr of expr (* an endian expression *) + +type pattern = patt field list + +type constructor = expr field list + +(* Work out if an expression is an integer constant. + * + * Returns [Some i] if so (where i is the integer value), else [None]. + * + * Fairly simplistic algorithm: we can only detect simple constant + * expressions such as [k], [k+c], [k-c] etc. + *) +let rec expr_is_constant = function + | <:expr< $int:i$ >> -> (* Literal integer constant. *) + Some (int_of_string i) + | <:expr< $a$ + $b$ >> -> (* Addition of constants. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a+b) + | _ -> None) + | <:expr< $a$ - $b$ >> -> (* Subtraction. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a-b) + | _ -> None) + | <:expr< $a$ * $b$ >> -> (* Multiplication. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a*b) + | _ -> None) + | <:expr< $a$ / $b$ >> -> (* Division. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a/b) + | _ -> None) + | <:expr< $a$ lsl $b$ >> -> (* Shift left. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a lsl b) + | _ -> None) + | <:expr< $a$ lsr $b$ >> -> (* Shift right. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a lsr b) + | _ -> None) + | _ -> None (* Anything else is not constant. *) + +let string_of_field_type = function + | Int -> "int" + | String -> "string" + | Bitstring -> "bitstring" + +let patt_printer = function + | <:patt< $lid:id$ >> -> id + | <:patt< _ >> -> "_" + | _ -> "[pattern]" + +let expr_printer = function + | <:expr< $lid:id$ >> -> id + | <:expr< $int:i$ >> -> i + | _ -> "[expression]" + +let string_of_field { field = field; flen = flen; + endian = endian; signed = signed; t = t; + _loc = _loc; + printer = printer} = + let flen = + match expr_is_constant flen with + | Some i -> string_of_int i + | None -> "[non-const-len]" in + let endian = + match endian with + | ConstantEndian endian -> Bitmatch.string_of_endian endian + | EndianExpr _ -> "endian [expr]" in + let signed = if signed then "signed" else "unsigned" in + let t = string_of_field_type t in + let loc_fname = Loc.file_name _loc in + let loc_line = Loc.start_line _loc in + let loc_char = Loc.start_off _loc - Loc.start_bol _loc in + + sprintf "%s : %s : %s, %s, %s @ (%S, %d, %d)" + (printer field) flen t endian signed loc_fname loc_line loc_char + +let string_of_pattern pattern = + "{ " ^ String.concat "; " (List.map string_of_field pattern) ^ " }" + +let string_of_constructor constructor = + "{ " ^ String.concat "; " (List.map string_of_field constructor) ^ " }" + +let pattern_to_channel chan patt = Marshal.to_channel chan patt [] +let constructor_to_channel chan cons = Marshal.to_channel chan cons [] + +let pattern_to_string patt = Marshal.to_string patt [] +let constructor_to_string cons = Marshal.to_string cons [] + +let pattern_to_buffer str ofs len patt = + Marshal.to_buffer str ofs len patt [] +let constructor_to_buffer str ofs len cons = + Marshal.to_buffer str ofs len cons [] + +let pattern_from_channel = Marshal.from_channel +let constructor_from_channel = Marshal.from_channel + +let pattern_from_string = Marshal.from_string +let constructor_from_string = Marshal.from_string + +let create_pattern_field _loc = + { + field = <:patt< _ >>; + flen = <:expr< 32 >>; + endian = ConstantEndian Bitmatch.BigEndian; + signed = false; + t = Int; + _loc = _loc; + printer = patt_printer; + } + +let set_lident_patt field id = + let _loc = field._loc in + { field with field = <:patt< $lid:id$ >> } +let set_int_patt field i = + let _loc = field._loc in + { field with field = <:patt< $`int:i$ >> } +let set_string_patt field str = + let _loc = field._loc in + { field with field = <:patt< $str:str$ >> } +let set_unbound_patt field = + let _loc = field._loc in + { field with field = <:patt< _ >> } +let set_patt field patt = { field with field = patt } +let set_length_int field flen = + let _loc = field._loc in + { field with flen = <:expr< $`int:flen$ >> } +let set_length field flen = { field with flen = flen } +let set_endian field endian = { field with endian = ConstantEndian endian } +let set_endian_expr field expr = { field with endian = EndianExpr expr } +let set_signed field signed = { field with signed = signed } +let set_type_int field = { field with t = Int } +let set_type_string field = { field with t = String } +let set_type_bitstring field = { field with t = Bitstring } +let set_location field loc = { field with _loc = loc } + +let create_constructor_field _loc = + { + field = <:expr< 0 >>; + flen = <:expr< 32 >>; + endian = ConstantEndian Bitmatch.BigEndian; + signed = false; + t = Int; + _loc = _loc; + printer = expr_printer; + } + +let set_lident_expr field id = + let _loc = field._loc in + { field with field = <:expr< $lid:id$ >> } +let set_int_expr field i = + let _loc = field._loc in + { field with field = <:expr< $`int:i$ >> } +let set_string_expr field str = + let _loc = field._loc in + { field with field = <:expr< $str:str$ >> } +let set_expr field expr = + let _loc = field._loc in + { field with field = expr } + +let get_patt field = field.field +let get_expr field = field.field +let get_length field = field.flen +let get_endian field = field.endian +let get_signed field = field.signed +let get_type field = field.t +let get_location field = field._loc diff --git a/bitmatch_persistent.mli b/bitmatch_persistent.mli new file mode 100644 index 0000000..7e3f37b --- /dev/null +++ b/bitmatch_persistent.mli @@ -0,0 +1,321 @@ +(** Bitmatch persistent patterns. *) +(* Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * $Id$ + *) + +(** + {{:#reference}Jump straight to the reference section for + documentation on types and functions}. + + {2 Introduction} + + Bitmatch allows you to name sets of fields and reuse them + elsewhere. For example if you frequently need to parse + Pascal-style strings in the form [length byte + string], then you + could name the [{ strlen : 8 : int; str : strlen*8 : string}] + pattern and reuse it everywhere by name. + + These are called {b persistent patterns}. + + The basic usage is: + +{v +(* Create a persistent pattern called 'pascal' which + * matches Pascal-style strings (length byte + string). + *) +bitmatch pascal = + { strlen : 8 : int; + str : strlen*8 : string } + +let is_pascal_string bits = + bitmatch bits with + | { pascal } -> + printf "matches a Pascal string %s, len %d bytes\n" + str strlen +v} + + {3 Important notes} + + There are some important things you should know about + persistent patterns before you decide to use them: + + 'Persistent' refers to the fact that they can be saved into binary + files. However these binary files use OCaml [Marshal] module and + depend (sometimes) on the version of OCaml used to generate them + and (sometimes) the version of bitmatch used. So your build system + should rebuild these files from source when your code is rebuilt. + + Persistent patterns are syntactic. They work in the same way + as cutting and pasting (or [#include]-ing) code. For example + if a persistent pattern binds a field named [len], then any + uses of [len] following in the surrounding pattern could + be affected. + + Programs which generate and manipulate persistent patterns have to + link to camlp4. Since camlp4 in OCaml >= 3.10 is rather large, we + have placed this code into this separate submodule, so that + programs which just use bitmatch don't need to pull in the whole of + camlp4. This restriction does not apply to generated code which + only uses persistent patterns. If the distinction isn't clear, + use [ocamlobjinfo] to look at the dependencies of your [*.cmo] + files. + + Persistent patterns can be generated in several ways, but they + can only be {i used} by the [pa_bitmatch] syntax extension. + This means they are purely compile-time constructs. You + cannot use them to make arbitrary patterns and run those + patterns (not unless your program runs [ocamlc] to make a [*.cmo] + file then dynamically links to the [*.cmo] file). + + + + + + + + + + + {2:reference Reference} + + {3 Internal} +*) + +type patt = Camlp4.PreCast.Syntax.Ast.patt +type expr = Camlp4.PreCast.Syntax.Ast.expr +type loc_t = Camlp4.PreCast.Syntax.Ast.Loc.t + +(** {3 Types} *) + +type 'a field +(** A field in a persistent pattern or persistent constructor. *) + +type pattern = patt field list +(** A persistent pattern (used in [bitmatch] operator), is just a list + of pattern fields. *) + +type constructor = expr field list +(** A persistent constructor (used in [BITSTRING] operator), is just a + list of constructor fields. *) + +(** {3 Printers} *) + +val string_of_pattern : pattern -> string +val string_of_constructor : constructor -> string +val string_of_field : 'a field -> string +(** Convert patterns, constructors, or individual fields + into printable strings for debugging purposes. + + The strings look similar to the syntax used by bitmatch, but + some things cannot be printed fully, eg. length expressions. *) + +(** {3 Persistence} *) + +val pattern_to_channel : out_channel -> pattern -> unit +val constructor_to_channel : out_channel -> constructor -> unit +(** Save a pattern/constructor to an output channel. *) + +val pattern_to_string : pattern -> string +val constructor_to_string : constructor -> string +(** Serialize a pattern/constructor to a string. *) + +val pattern_to_buffer : string -> int -> int -> pattern -> int +val constructor_to_buffer : string -> int -> int -> constructor -> int +(** Serialize a pattern/constructor to part of a string, return the length. *) + +val pattern_from_channel : in_channel -> pattern +val constructor_from_channel : in_channel -> constructor +(** Load a pattern/constructor from an output channel. + + Note: This is not type safe. The pattern/constructor must + have been written out under the same version of OCaml and + the same version of bitmatch. *) + +val pattern_from_string : string -> int -> pattern +val constructor_from_string : string -> int -> constructor +(** Load a pattern/constructor from a string at offset within the string. + + Note: This is not type safe. The pattern/constructor must + have been written out under the same version of OCaml and + the same version of bitmatch. *) + +(** {3 Create pattern fields} + + These fields are used in pattern matches ([bitmatch]). *) + +val create_pattern_field : loc_t -> patt field +(** Create a pattern field. + + The pattern is unbound, the type is set to [int], bit length to [32], + endianness to [BigEndian], signedness to unsigned ([false]), + and source code location to the [_loc] parameter. + + To create a complete field you need to call the [set_*] + functions. For example, to create [{ len : 8 : int }] + you would do: + +{v + let field = create_pattern_field _loc in + let field = set_lident_patt field "len" in + let field = set_length_int field 8 in +v} +*) + +val set_lident_patt : patt field -> string -> patt field +(** Sets the pattern to the pattern binding an identifier + given in the string. + + The effect is that the field [{ len : 8 : int }] could + be created by calling [set_lident_patt field "len"]. *) + +val set_int_patt : patt field -> int -> patt field +(** Sets the pattern field to the pattern which matches an integer. + + The effect is that the field [{ 2 : 8 : int }] could + be created by calling [set_int_patt field 2]. *) + +val set_string_patt : patt field -> string -> patt field +(** Sets the pattern field to the pattern which matches a string. + + The effect is that the field [{ "MAGIC" : 8*5 : string }] could + be created by calling [set_int_patt field "MAGIC"]. *) + +val set_unbound_patt : patt field -> patt field +(** Sets the pattern field to the unbound pattern (usually written [_]). + + The effect is that the field [{ _ : 8 : int }] could + be created by calling [set_unbound_patt field]. *) + +val set_patt : patt field -> patt -> patt field +(** Sets the pattern field to an arbitrary OCaml pattern match. *) + +val set_length_int : 'a field -> int -> 'a field +(** Sets the length in bits of a field to a constant integer. + + The effect is that the field [{ len : 8 : string }] could + be created by calling [set_length field 8]. *) + +val set_length : 'a field -> expr -> 'a field +(** Sets the length in bits of a field to an OCaml expression. + + The effect is that the field [{ len : 2*i : string }] could + be created by calling [set_length field <:expr< 2*i >>]. *) + +val set_endian : 'a field -> Bitmatch.endian -> 'a field +(** Sets the endianness of a field to the constant endianness. + + The effect is that the field [{ _ : 16 : bigendian }] could + be created by calling [set_endian field Bitmatch.BigEndian]. *) + +val set_endian_expr : 'a field -> expr -> 'a field +(** Sets the endianness of a field to an endianness expression. + + The effect is that the field [{ _ : 16 : endian(e) }] could + be created by calling [set_endian_expr field e]. *) + +val set_signed : 'a field -> bool -> 'a field +(** Sets the signedness of a field to a constant signedness. + + The effect is that the field [{ _ : 16 : signed }] could + be created by calling [set_signed field true]. *) + +val set_type_int : 'a field -> 'a field +(** Sets the type of a field to [int]. + + The effect is that the field [{ _ : 16 : int }] could + be created by calling [set_type_int field]. *) + +val set_type_string : 'a field -> 'a field +(** Sets the type of a field to [string]. + + The effect is that the field [{ str : 16 : string }] could + be created by calling [set_type_string field]. *) + +val set_type_bitstring : 'a field -> 'a field +(** Sets the type of a field to [bitstring]. + + The effect is that the field [{ _ : 768 : bitstring }] could + be created by calling [set_type_bitstring field]. *) + +val set_location : 'a field -> loc_t -> 'a field +(** Sets the source code location of a field. This is used when + pa_bitmatch displays error messages. *) + +(** {3 Create constructor fields} + + These fields are used in constructors ([BITSTRING]). *) + +val create_constructor_field : loc_t -> expr field +(** Create a constructor field. + + The defaults are the same as for {!create_pattern_field} + except that the expression is initialized to [0]. +*) + +val set_lident_expr : expr field -> string -> expr field +(** Sets the expression in a constructor field to an expression + which uses the identifier. + + The effect is that the field [{ len : 8 : int }] could + be created by calling [set_lident_expr field "len"]. *) + +val set_int_expr : expr field -> int -> expr field +(** Sets the expression to the value of the integer. + + The effect is that the field [{ 2 : 8 : int }] could + be created by calling [set_int_expr field 2]. *) + +val set_string_expr : expr field -> string -> expr field +(** Sets the expression to the value of the string. + + The effect is that the field [{ "MAGIC" : 8*5 : string }] could + be created by calling [set_int_expr field "MAGIC"]. *) + +val set_expr : expr field -> expr -> expr field +(** Sets the expression field to an arbitrary OCaml expression. *) + +(** {3 Accessors} *) + +val get_patt : patt field -> patt +(** Get the pattern from a pattern field. *) + +val get_expr : expr field -> expr +(** Get the expression from an expression field. *) + +val get_length : 'a field -> expr +(** Get the length in bits from a field. Note that what is returned + is an OCaml expression, since lengths can be non-constant. *) + +type endian_expr = + | ConstantEndian of Bitmatch.endian + | EndianExpr of expr + +val get_endian : 'a field -> endian_expr +(** Get the endianness of a field. This is an {!endian_expr} which + could be a constant or an OCaml expression. *) + +val get_signed : 'a field -> bool +(** Get the signedness of a field. *) + +type field_type = Int | String | Bitstring + +val get_type : 'a field -> field_type +(** Get the type of a field, [Int], [String] or [Bitstring]. *) + +val get_location : 'a field -> loc_t +(** Get the source code location of a field. *) diff --git a/pa_bitmatch.ml b/pa_bitmatch.ml index fdd6082..82818f6 100644 --- a/pa_bitmatch.ml +++ b/pa_bitmatch.ml @@ -25,6 +25,7 @@ open Syntax open Ast open Bitmatch +module P = Bitmatch_persistent (* If this is true then we emit some debugging code which can * be useful to tell what is happening during matches. You @@ -70,23 +71,6 @@ let rec expr_is_constant = function | _ -> None) | _ -> None (* Anything else is not constant. *) -(* Field. In bitmatch (patterns) the type is [patt field]. In - * BITSTRING (constructor) the type is [expr field]. - *) -type 'a field = { - field : 'a; (* field ('a is either patt or expr) *) - flen : expr; (* length in bits, may be non-const *) - endian : endian_expr; (* endianness *) - signed : bool; (* true if signed, false if unsigned *) - t : t; (* type *) - _loc : Loc.t; (* location in source code *) - printer : 'a -> string; (* turn the field into a string *) -} -and t = Int | String | Bitstring (* field type *) -and endian_expr = - | ConstantEndian of endian (* a constant little/big/nativeendian *) - | EndianExpr of expr (* an endian expression *) - (* Generate a fresh, unique symbol each time called. *) let gensym = let i = ref 1000 in @@ -95,141 +79,99 @@ let gensym = sprintf "__pabitmatch_%s_%d" name i (* Deal with the qualifiers which appear for a field of both types. *) -let parse_field _loc field flen qs printer = - let endian, signed, t = +let parse_field _loc field qs = + let endian_set, signed_set, type_set, field = match qs with - | None -> (None, None, None) + | None -> (false, false, false, field) | Some qs -> List.fold_left ( - fun (endian, signed, t) qual_expr -> + fun (endian_set, signed_set, type_set, field) qual_expr -> match qual_expr with | "bigendian", None -> - if endian <> None then + if endian_set then Loc.raise _loc (Failure "an endian flag has been set already") else ( - let endian = Some (ConstantEndian BigEndian) in - (endian, signed, t) + let field = P.set_endian field BigEndian in + (true, signed_set, type_set, field) ) | "littleendian", None -> - if endian <> None then + if endian_set then Loc.raise _loc (Failure "an endian flag has been set already") else ( - let endian = Some (ConstantEndian LittleEndian) in - (endian, signed, t) + let field = P.set_endian field LittleEndian in + (true, signed_set, type_set, field) ) | "nativeendian", None -> - if endian <> None then + if endian_set then Loc.raise _loc (Failure "an endian flag has been set already") else ( - let endian = Some (ConstantEndian NativeEndian) in - (endian, signed, t) + let field = P.set_endian field NativeEndian in + (true, signed_set, type_set, field) ) | "endian", Some expr -> - if endian <> None then + if endian_set then Loc.raise _loc (Failure "an endian flag has been set already") else ( - let endian = Some (EndianExpr expr) in - (endian, signed, t) + let field = P.set_endian_expr field expr in + (true, signed_set, type_set, field) ) | "signed", None -> - if signed <> None then + if signed_set then Loc.raise _loc (Failure "a signed flag has been set already") else ( - let signed = Some true in - (endian, signed, t) + let field = P.set_signed field true in + (endian_set, true, type_set, field) ) | "unsigned", None -> - if signed <> None then + if signed_set then Loc.raise _loc (Failure "a signed flag has been set already") else ( - let signed = Some false in - (endian, signed, t) + let field = P.set_signed field false in + (endian_set, true, type_set, field) ) | "int", None -> - if t <> None then + if type_set then Loc.raise _loc (Failure "a type flag has been set already") else ( - let t = Some Int in - (endian, signed, t) + let field = P.set_type_int field in + (endian_set, signed_set, true, field) ) | "string", None -> - if t <> None then + if type_set then Loc.raise _loc (Failure "a type flag has been set already") else ( - let t = Some String in - (endian, signed, t) + let field = P.set_type_string field in + (endian_set, signed_set, true, field) ) | "bitstring", None -> - if t <> None then + if type_set then Loc.raise _loc (Failure "a type flag has been set already") else ( - let t = Some Bitstring in - (endian, signed, t) + let field = P.set_type_bitstring field in + (endian_set, signed_set, true, field) ) | s, Some _ -> Loc.raise _loc (Failure (s ^ ": unknown qualifier, or qualifier should not be followed by an expression")) | s, None -> Loc.raise _loc (Failure (s ^ ": unknown qualifier, or qualifier should be followed by an expression")) - ) (None, None, None) qs in + ) (false, false, false, field) qs in (* If type is set to string or bitstring then endianness and * signedness qualifiers are meaningless and must not be set. *) - if (t = Some Bitstring || t = Some String) - && (endian <> None || signed <> None) then + let () = + let t = P.get_type field in + if (t = P.Bitstring || t = P.String) && (endian_set || signed_set) then Loc.raise _loc ( Failure "string types and endian or signed qualifiers cannot be mixed" - ); - - (* Default endianness, signedness, type. *) - let endian = - match endian with None -> ConstantEndian BigEndian | Some e -> e in - let signed = match signed with None -> false | Some s -> s in - let t = match t with None -> Int | Some t -> t in - - { - field = field; - flen = flen; - endian = endian; - signed = signed; - t = t; - _loc = _loc; - printer = printer; - } - -let string_of_t = function - | Int -> "int" - | String -> "string" - | Bitstring -> "bitstring" - -let patt_printer = function - | <:patt< $lid:id$ >> -> id - | _ -> "[pattern]" - -let expr_printer = function - | <:expr< $lid:id$ >> -> id - | _ -> "[expression]" - -let string_of_field { field = field; flen = flen; - endian = endian; signed = signed; t = t; - _loc = _loc; - printer = printer} = - let flen = - match expr_is_constant flen with - | Some i -> string_of_int i - | None -> "[non-const-len]" in - let endian = - match endian with - | ConstantEndian endian -> string_of_endian endian - | EndianExpr _ -> "endian [expr]" in - let signed = if signed then "signed" else "unsigned" in - let t = string_of_t t in - let loc_fname = Loc.file_name _loc in - let loc_line = Loc.start_line _loc in - let loc_char = Loc.start_off _loc - Loc.start_bol _loc in + ) in - sprintf "%s : %s : %s, %s, %s @ (%S, %d, %d)" - (printer field) flen t endian signed loc_fname loc_line loc_char + (* Default endianness, signedness, type if not set already. *) + let field = if endian_set then field else P.set_endian field BigEndian in + let field = if signed_set then field else P.set_signed field false in + let field = if type_set then field else P.set_type_int field in + + field (* Generate the code for a constructor, ie. 'BITSTRING ...'. *) let output_constructor _loc fields = @@ -251,8 +193,14 @@ let output_constructor _loc fields = (* Convert each field to a simple bitstring-generating expression. *) let fields = List.map ( - fun {field=fexpr; flen=flen; endian=endian; signed=signed; - t=t; _loc=_loc} -> + fun field -> + let fexpr = P.get_expr field in + let flen = P.get_length field in + let endian = P.get_endian field in + let signed = P.get_signed field in + let t = P.get_type field in + let _loc = P.get_location field in + (* Is flen an integer constant? If so, what is it? This * is very simple-minded and only detects simple constants. *) @@ -269,71 +217,71 @@ let output_constructor _loc fields = <:expr> | ((2|3|4|5|6|7|8), _, true) -> <:expr> - | (i, ConstantEndian BigEndian, false) when i <= 31 -> + | (i, P.ConstantEndian BigEndian, false) when i <= 31 -> <:expr> - | (i, ConstantEndian BigEndian, true) when i <= 31 -> + | (i, P.ConstantEndian BigEndian, true) when i <= 31 -> <:expr> - | (i, ConstantEndian LittleEndian, false) when i <= 31 -> + | (i, P.ConstantEndian LittleEndian, false) when i <= 31 -> <:expr> - | (i, ConstantEndian LittleEndian, true) when i <= 31 -> + | (i, P.ConstantEndian LittleEndian, true) when i <= 31 -> <:expr> - | (i, ConstantEndian NativeEndian, false) when i <= 31 -> + | (i, P.ConstantEndian NativeEndian, false) when i <= 31 -> <:expr> - | (i, ConstantEndian NativeEndian, true) when i <= 31 -> + | (i, P.ConstantEndian NativeEndian, true) when i <= 31 -> <:expr> - | (i, EndianExpr expr, false) when i <= 31 -> + | (i, P.EndianExpr expr, false) when i <= 31 -> <:expr> - | (i, EndianExpr expr, true) when i <= 31 -> + | (i, P.EndianExpr expr, true) when i <= 31 -> <:expr> - | (32, ConstantEndian BigEndian, false) -> + | (32, P.ConstantEndian BigEndian, false) -> <:expr> - | (32, ConstantEndian BigEndian, true) -> + | (32, P.ConstantEndian BigEndian, true) -> <:expr> - | (32, ConstantEndian LittleEndian, false) -> + | (32, P.ConstantEndian LittleEndian, false) -> <:expr> - | (32, ConstantEndian LittleEndian, true) -> + | (32, P.ConstantEndian LittleEndian, true) -> <:expr> - | (32, ConstantEndian NativeEndian, false) -> + | (32, P.ConstantEndian NativeEndian, false) -> <:expr> - | (32, ConstantEndian NativeEndian, true) -> + | (32, P.ConstantEndian NativeEndian, true) -> <:expr> - | (32, EndianExpr expr, false) -> + | (32, P.EndianExpr expr, false) -> <:expr> - | (32, EndianExpr expr, true) -> + | (32, P.EndianExpr expr, true) -> <:expr> - | (_, ConstantEndian BigEndian, false) -> + | (_, P.ConstantEndian BigEndian, false) -> <:expr> - | (_, ConstantEndian BigEndian, true) -> + | (_, P.ConstantEndian BigEndian, true) -> <:expr> - | (_, ConstantEndian LittleEndian, false) -> + | (_, P.ConstantEndian LittleEndian, false) -> <:expr> - | (_, ConstantEndian LittleEndian, true) -> + | (_, P.ConstantEndian LittleEndian, true) -> <:expr> - | (_, ConstantEndian NativeEndian, false) -> + | (_, P.ConstantEndian NativeEndian, false) -> <:expr> - | (_, ConstantEndian NativeEndian, true) -> + | (_, P.ConstantEndian NativeEndian, true) -> <:expr> - | (_, EndianExpr expr, false) -> + | (_, P.EndianExpr expr, false) -> <:expr> - | (_, EndianExpr expr, true) -> + | (_, P.EndianExpr expr, true) -> <:expr> in let int_construct = function - | (ConstantEndian BigEndian, false) -> + | (P.ConstantEndian BigEndian, false) -> <:expr> - | (ConstantEndian BigEndian, true) -> + | (P.ConstantEndian BigEndian, true) -> <:expr> - | (ConstantEndian LittleEndian, false) -> + | (P.ConstantEndian LittleEndian, false) -> <:expr> - | (ConstantEndian LittleEndian, true) -> + | (P.ConstantEndian LittleEndian, true) -> <:expr> - | (ConstantEndian NativeEndian, false) -> + | (P.ConstantEndian NativeEndian, false) -> <:expr> - | (ConstantEndian NativeEndian, true) -> + | (P.ConstantEndian NativeEndian, true) -> <:expr> - | (EndianExpr expr, false) -> + | (P.EndianExpr expr, false) -> <:expr> - | (EndianExpr expr, true) -> + | (P.EndianExpr expr, true) -> <:expr> in @@ -345,7 +293,7 @@ let output_constructor _loc fields = * because that's a lot simpler w.r.t. types. It might * be better to move them here. XXX *) - | Int, Some i when i > 0 && i <= 64 -> + | P.Int, Some i when i > 0 && i <= 64 -> let construct_fn = int_construct_const (i,endian,signed) in exn_used := true; @@ -353,7 +301,7 @@ let output_constructor _loc fields = $construct_fn$ $lid:buffer$ $fexpr$ $`int:i$ $lid:exn$ >> - | Int, Some _ -> + | P.Int, Some _ -> Loc.raise _loc (Failure "length of int field must be [1..64]") (* Int field, non-constant length. We need to perform a runtime @@ -363,7 +311,7 @@ let output_constructor _loc fields = * because that's a lot simpler w.r.t. types. It might * be better to move them here. XXX *) - | Int, None -> + | P.Int, None -> let construct_fn = int_construct (endian,signed) in exn_used := true; @@ -378,7 +326,7 @@ let output_constructor _loc fields = >> (* String, constant length > 0, must be a multiple of 8. *) - | String, Some i when i > 0 && i land 7 = 0 -> + | P.String, Some i when i > 0 && i land 7 = 0 -> let bs = gensym "bs" in let j = i lsr 3 in <:expr< @@ -395,20 +343,20 @@ let output_constructor _loc fields = (* String, constant length -1, means variable length string * with no checks. *) - | String, Some (-1) -> + | P.String, Some (-1) -> <:expr< Bitmatch.construct_string $lid:buffer$ $fexpr$ >> (* String, constant length = 0 is probably an error, and so is * any other value. *) - | String, Some _ -> + | P.String, Some _ -> Loc.raise _loc (Failure "length of string must be > 0 and a multiple of 8, or the special value -1") (* String, non-constant length. * We check at runtime that the length is > 0, a multiple of 8, * and matches the declared length. *) - | String, None -> + | P.String, None -> let bslen = gensym "bslen" in let bs = gensym "bs" in <:expr< @@ -436,7 +384,7 @@ let output_constructor _loc fields = >> (* Bitstring, constant length > 0. *) - | Bitstring, Some i when i > 0 -> + | P.Bitstring, Some i when i > 0 -> let bs = gensym "bs" in <:expr< let $lid:bs$ = $fexpr$ in @@ -452,13 +400,13 @@ let output_constructor _loc fields = (* Bitstring, constant length -1, means variable length bitstring * with no checks. *) - | Bitstring, Some (-1) -> + | P.Bitstring, Some (-1) -> <:expr< Bitmatch.construct_bitstring $lid:buffer$ $fexpr$ >> (* Bitstring, constant length = 0 is probably an error, and so is * any other value. *) - | Bitstring, Some _ -> + | P.Bitstring, Some _ -> Loc.raise _loc (Failure "length of bitstring must be > 0 or the special value -1") @@ -467,7 +415,7 @@ let output_constructor _loc fields = * We check at runtime that the length is > 0 and matches * the declared length. *) - | Bitstring, None -> + | P.Bitstring, None -> let bslen = gensym "bslen" in let bs = gensym "bs" in <:expr< @@ -542,9 +490,12 @@ let output_bitmatch _loc bs cases = let rec output_field_extraction inner = function | [] -> inner | field :: fields -> - let {field=fpatt; flen=flen; endian=endian; signed=signed; - t=t; _loc=_loc} - = field in + let fpatt = P.get_patt field in + let flen = P.get_length field in + let endian = P.get_endian field in + let signed = P.get_signed field in + let t = P.get_type field in + let _loc = P.get_location field in (* Is flen an integer constant? If so, what is it? This * is very simple-minded and only detects simple constants. @@ -561,78 +512,78 @@ let output_bitmatch _loc bs cases = <:expr> | ((2|3|4|5|6|7|8), _, true) -> <:expr> - | (i, ConstantEndian BigEndian, false) when i <= 31 -> + | (i, P.ConstantEndian BigEndian, false) when i <= 31 -> <:expr> - | (i, ConstantEndian BigEndian, true) when i <= 31 -> + | (i, P.ConstantEndian BigEndian, true) when i <= 31 -> <:expr> - | (i, ConstantEndian LittleEndian, false) when i <= 31 -> + | (i, P.ConstantEndian LittleEndian, false) when i <= 31 -> <:expr> - | (i, ConstantEndian LittleEndian, true) when i <= 31 -> + | (i, P.ConstantEndian LittleEndian, true) when i <= 31 -> <:expr> - | (i, ConstantEndian NativeEndian, false) when i <= 31 -> + | (i, P.ConstantEndian NativeEndian, false) when i <= 31 -> <:expr> - | (i, ConstantEndian NativeEndian, true) when i <= 31 -> + | (i, P.ConstantEndian NativeEndian, true) when i <= 31 -> <:expr> - | (i, EndianExpr expr, false) when i <= 31 -> + | (i, P.EndianExpr expr, false) when i <= 31 -> <:expr> - | (i, EndianExpr expr, true) when i <= 31 -> + | (i, P.EndianExpr expr, true) when i <= 31 -> <:expr> - | (32, ConstantEndian BigEndian, false) -> + | (32, P.ConstantEndian BigEndian, false) -> <:expr> - | (32, ConstantEndian BigEndian, true) -> + | (32, P.ConstantEndian BigEndian, true) -> <:expr> - | (32, ConstantEndian LittleEndian, false) -> + | (32, P.ConstantEndian LittleEndian, false) -> <:expr> - | (32, ConstantEndian LittleEndian, true) -> + | (32, P.ConstantEndian LittleEndian, true) -> <:expr> - | (32, ConstantEndian NativeEndian, false) -> + | (32, P.ConstantEndian NativeEndian, false) -> <:expr> - | (32, ConstantEndian NativeEndian, true) -> + | (32, P.ConstantEndian NativeEndian, true) -> <:expr> - | (32, EndianExpr expr, false) -> + | (32, P.EndianExpr expr, false) -> <:expr> - | (32, EndianExpr expr, true) -> + | (32, P.EndianExpr expr, true) -> <:expr> - | (_, ConstantEndian BigEndian, false) -> + | (_, P.ConstantEndian BigEndian, false) -> <:expr> - | (_, ConstantEndian BigEndian, true) -> + | (_, P.ConstantEndian BigEndian, true) -> <:expr> - | (_, ConstantEndian LittleEndian, false) -> + | (_, P.ConstantEndian LittleEndian, false) -> <:expr> - | (_, ConstantEndian LittleEndian, true) -> + | (_, P.ConstantEndian LittleEndian, true) -> <:expr> - | (_, ConstantEndian NativeEndian, false) -> + | (_, P.ConstantEndian NativeEndian, false) -> <:expr> - | (_, ConstantEndian NativeEndian, true) -> + | (_, P.ConstantEndian NativeEndian, true) -> <:expr> - | (_, EndianExpr expr, false) -> + | (_, P.EndianExpr expr, false) -> <:expr> - | (_, EndianExpr expr, true) -> + | (_, P.EndianExpr expr, true) -> <:expr> in let int_extract = function - | (ConstantEndian BigEndian, false) -> + | (P.ConstantEndian BigEndian, false) -> <:expr> - | (ConstantEndian BigEndian, true) -> + | (P.ConstantEndian BigEndian, true) -> <:expr> - | (ConstantEndian LittleEndian, false) -> + | (P.ConstantEndian LittleEndian, false) -> <:expr> - | (ConstantEndian LittleEndian, true) -> + | (P.ConstantEndian LittleEndian, true) -> <:expr> - | (ConstantEndian NativeEndian, false) -> + | (P.ConstantEndian NativeEndian, false) -> <:expr> - | (ConstantEndian NativeEndian, true) -> + | (P.ConstantEndian NativeEndian, true) -> <:expr> - | (EndianExpr expr, false) -> + | (P.EndianExpr expr, false) -> <:expr> - | (EndianExpr expr, true) -> + | (P.EndianExpr expr, true) -> <:expr> in let expr = match t, flen_is_const with (* Common case: int field, constant flen *) - | Int, Some i when i > 0 && i <= 64 -> + | P.Int, Some i when i > 0 && i <= 64 -> let extract_fn = int_extract_const (i,endian,signed) in let v = gensym "val" in <:expr< @@ -643,14 +594,14 @@ let output_bitmatch _loc bs cases = ) >> - | Int, Some _ -> + | P.Int, Some _ -> Loc.raise _loc (Failure "length of int field must be [1..64]") (* Int field, non-const flen. We have to test the range of * the field at runtime. If outside the range it's a no-match * (not an error). *) - | Int, None -> + | P.Int, None -> let extract_fn = int_extract (endian,signed) in let v = gensym "val" in <:expr< @@ -662,7 +613,7 @@ let output_bitmatch _loc bs cases = >> (* String, constant flen > 0. *) - | String, Some i when i > 0 && i land 7 = 0 -> + | P.String, Some i when i > 0 && i land 7 = 0 -> let bs = gensym "bs" in <:expr< if $lid:len$ >= $`int:i$ then ( @@ -678,7 +629,7 @@ let output_bitmatch _loc bs cases = (* String, constant flen = -1, means consume all the * rest of the input. *) - | String, Some i when i = -1 -> + | P.String, Some i when i = -1 -> let bs = gensym "bs" in <:expr< let $lid:bs$, $lid:off$, $lid:len$ = @@ -688,13 +639,13 @@ let output_bitmatch _loc bs cases = | _ -> () >> - | String, Some _ -> + | P.String, Some _ -> Loc.raise _loc (Failure "length of string must be > 0 and a multiple of 8, or the special value -1") (* String field, non-const flen. We check the flen is > 0 * and a multiple of 8 (-1 is not allowed here), at runtime. *) - | String, None -> + | P.String, None -> let bs = gensym "bs" in <:expr< if $flen$ >= 0 && $flen$ <= $lid:len$ @@ -712,7 +663,7 @@ let output_bitmatch _loc bs cases = * At the moment all we can do is assign the bitstring to an * identifier. *) - | Bitstring, Some i when i >= 0 -> + | P.Bitstring, Some i when i >= 0 -> let ident = match fpatt with | <:patt< $lid:ident$ >> -> ident @@ -732,7 +683,7 @@ let output_bitmatch _loc bs cases = (* Bitstring, constant flen = -1, means consume all the * rest of the input. *) - | Bitstring, Some i when i = -1 -> + | P.Bitstring, Some i when i = -1 -> let ident = match fpatt with | <:patt< $lid:ident$ >> -> ident @@ -746,13 +697,13 @@ let output_bitmatch _loc bs cases = $inner$ >> - | Bitstring, Some _ -> + | P.Bitstring, Some _ -> Loc.raise _loc (Failure "length of bitstring must be >= 0 or the special value -1") (* Bitstring field, non-const flen. We check the flen is >= 0 * (-1 is not allowed here) at runtime. *) - | Bitstring, None -> + | P.Bitstring, None -> let ident = match fpatt with | <:patt< $lid:ident$ >> -> ident @@ -773,7 +724,7 @@ let output_bitmatch _loc bs cases = (* Emit extra debugging code. *) let expr = if not debug then expr else ( - let field = string_of_field field in + let field = P.string_of_field field in <:expr< if !Bitmatch.debug then ( @@ -865,7 +816,10 @@ EXTEND Gram patt_field: [ [ fpatt = patt; ":"; len = expr LEVEL "top"; qs = OPT [ ":"; qs = qualifiers -> qs ] -> - parse_field _loc fpatt len qs patt_printer + let field = P.create_pattern_field _loc in + let field = P.set_patt field fpatt in + let field = P.set_length field len in + parse_field _loc field qs ] ]; @@ -885,7 +839,10 @@ EXTEND Gram constr_field: [ [ fexpr = expr LEVEL "top"; ":"; len = expr LEVEL "top"; qs = OPT [ ":"; qs = qualifiers -> qs ] -> - parse_field _loc fexpr len qs expr_printer + let field = P.create_constructor_field _loc in + let field = P.set_expr field fexpr in + let field = P.set_length field len in + parse_field _loc field qs ] ];