(* Bitmatch syntax extension.
- * $Id: pa_bitmatch.ml,v 1.2 2008-04-01 08:56:43 rjones Exp $
+ * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version,
+ * with the OCaml linking exception described in COPYING.LIB.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * $Id$
*)
open Printf
open Syntax
open Ast
-type m = Fields of f list (* field ; field -> ... *)
- | Bind of string option (* _ -> ... *)
-and f = {
- ident : string; (* field name *)
- flen : expr; (* length in bits, may be non-const *)
- endian : endian; (* endianness *)
- signed : bool; (* true if signed, false if unsigned *)
- t : t; (* type *)
-}
-and endian = BigEndian | LittleEndian | NativeEndian
-and t = Int | Bitstring
+open Bitmatch
+module P = Bitmatch_persistent
+
+(* If this is true then we emit some debugging code which can
+ * be useful to tell what is happening during matches. You
+ * also need to do 'Bitmatch.debug := true' in your main program.
+ *
+ * If this is false then no extra debugging code is emitted.
+ *)
+let debug = false
+
+(* Hashtable storing named persistent patterns. *)
+let pattern_hash : (string, P.pattern) Hashtbl.t = Hashtbl.create 13
+
+let locfail _loc msg = Loc.raise _loc (Failure msg)
+
+(* Work out if an expression is an integer constant.
+ *
+ * Returns [Some i] if so (where i is the integer value), else [None].
+ *
+ * Fairly simplistic algorithm: we can only detect simple constant
+ * expressions such as [k], [k+c], [k-c] etc.
+ *)
+let rec expr_is_constant = function
+ | <:expr< $int:i$ >> -> (* Literal integer constant. *)
+ Some (int_of_string i)
+ | <:expr< $lid:op$ $a$ $b$ >> ->
+ (match expr_is_constant a, expr_is_constant b with
+ | Some a, Some b -> (* Integer binary operations. *)
+ let ops = ["+", (+); "-", (-); "*", ( * ); "/", (/);
+ "land", (land); "lor", (lor); "lxor", (lxor);
+ "lsl", (lsl); "lsr", (lsr); "asr", (asr);
+ "mod", (mod)] in
+ (try Some ((List.assoc op ops) a b) with Not_found -> None)
+ | _ -> None)
+ | _ -> None
(* Generate a fresh, unique symbol each time called. *)
let gensym =
incr i; let i = !i in
sprintf "__pabitmatch_%s_%d" name i
-(* Deal with the qualifiers which appear for a field. *)
-let output_field _loc name flen qs =
- let endian, signed, t =
+(* Deal with the qualifiers which appear for a field of both types. *)
+let parse_field _loc field qs =
+ let fail = locfail _loc in
+
+ let endian_set, signed_set, type_set, offset_set, field =
match qs with
- | None -> (None, None, None)
+ | None -> (false, false, false, false, field)
| Some qs ->
- List.fold_left (
- fun (endian, signed, t) q ->
- match q with
- | "bigendian" ->
- if endian <> None then
- Loc.raise _loc (Failure "an endian flag has been set already")
- else (
- let endian = Some BigEndian in
- (endian, signed, t)
- )
- | "littleendian" ->
- if endian <> None then
- Loc.raise _loc (Failure "an endian flag has been set already")
- else (
- let endian = Some LittleEndian in
- (endian, signed, t)
- )
- | "nativeendian" ->
- if endian <> None then
- Loc.raise _loc (Failure "an endian flag has been set already")
- else (
- let endian = Some NativeEndian in
- (endian, signed, t)
- )
- | "signed" ->
- if signed <> None then
- Loc.raise _loc (Failure "a signed flag has been set already")
- else (
- let signed = Some true in
- (endian, signed, t)
- )
- | "unsigned" ->
- if signed <> None then
- Loc.raise _loc (Failure "a signed flag has been set already")
- else (
- let signed = Some false in
- (endian, signed, t)
- )
- | "int" ->
- if t <> None then
- Loc.raise _loc (Failure "a type flag has been set already")
- else (
- let t = Some Int in
- (endian, signed, t)
- )
- | "bitstring" ->
- if t <> None then
- Loc.raise _loc (Failure "a type flag has been set already")
- else (
- let t = Some Bitstring in
- (endian, signed, t)
- )
- | s ->
- Loc.raise _loc (Failure (s ^ ": unknown qualifier"))
- ) (None, None, None) qs in
+ let check already_set msg = if already_set then fail msg in
+ let apply_qualifier
+ (endian_set, signed_set, type_set, offset_set, field) =
+ function
+ | "endian", Some expr ->
+ check endian_set "an endian flag has been set already";
+ let field = P.set_endian_expr field expr in
+ (true, signed_set, type_set, offset_set, field)
+ | "endian", None ->
+ fail "qualifier 'endian' should be followed by an expression"
+ | "offset", Some expr ->
+ check offset_set "an offset has been set already";
+ let field = P.set_offset field expr in
+ (endian_set, signed_set, type_set, true, field)
+ | "offset", None ->
+ fail "qualifier 'offset' should be followed by an expression"
+ | s, Some _ ->
+ fail (s ^ ": unknown qualifier, or qualifier should not be followed by an expression")
+ | qual, None ->
+ let endian_quals = ["bigendian", BigEndian;
+ "littleendian", LittleEndian;
+ "nativeendian", NativeEndian] in
+ let sign_quals = ["signed", true; "unsigned", false] in
+ let type_quals = ["int", P.set_type_int;
+ "string", P.set_type_string;
+ "bitstring", P.set_type_bitstring] in
+ if List.mem_assoc qual endian_quals then (
+ check endian_set "an endian flag has been set already";
+ let field = P.set_endian field (List.assoc qual endian_quals) in
+ (true, signed_set, type_set, offset_set, field)
+ ) else if List.mem_assoc qual sign_quals then (
+ check signed_set "a signed flag has been set already";
+ let field = P.set_signed field (List.assoc qual sign_quals) in
+ (endian_set, true, type_set, offset_set, field)
+ ) else if List.mem_assoc qual type_quals then (
+ check type_set "a type flag has been set already";
+ let field = List.assoc qual type_quals field in
+ (endian_set, signed_set, true, offset_set, field)
+ ) else
+ fail (qual ^ ": unknown qualifier, or qualifier should be followed by an expression") in
+ List.fold_left apply_qualifier (false, false, false, false, field) qs in
+
+ (* If type is set to string or bitstring then endianness and
+ * signedness qualifiers are meaningless and must not be set.
+ *)
+ let () =
+ let t = P.get_type field in
+ if (t = P.Bitstring || t = P.String) && (endian_set || signed_set) then
+ fail "string types and endian or signed qualifiers cannot be mixed" in
+
+ (* Default endianness, signedness, type if not set already. *)
+ let field = if endian_set then field else P.set_endian field BigEndian in
+ let field = if signed_set then field else P.set_signed field false in
+ let field = if type_set then field else P.set_type_int field in
+
+ field
+
+(* Choose the right constructor function. *)
+let build_bitmatch_call _loc funcname length endian signed =
+ match length, endian, signed with
+ (* XXX The meaning of signed/unsigned breaks down at
+ * 31, 32, 63 and 64 bits.
+ *)
+ | (Some 1, _, _) -> <:expr<Bitmatch.$lid:funcname ^ "_bit"$ >>
+ | (Some (2|3|4|5|6|7|8), _, sign) ->
+ let call = Printf.sprintf "%s_char_%s"
+ funcname (if sign then "signed" else "unsigned") in
+ <:expr< Bitmatch.$lid:call$ >>
+ | (len, endian, signed) ->
+ let t = match len with
+ | Some i when i <= 31 -> "int"
+ | Some 32 -> "int32"
+ | _ -> "int64" in
+ let sign = if signed then "signed" else "unsigned" in
+ match endian with
+ | P.ConstantEndian constant ->
+ let endianness = match constant with
+ | BigEndian -> "be"
+ | LittleEndian -> "le"
+ | NativeEndian -> "ne" in
+ let call = Printf.sprintf "%s_%s_%s_%s"
+ funcname t endianness sign in
+ <:expr< Bitmatch.$lid:call$ >>
+ | P.EndianExpr expr ->
+ let call = Printf.sprintf "%s_%s_%s_%s"
+ funcname t "ee" sign in
+ <:expr< Bitmatch.$lid:call$ $expr$ >>
+
+(* Generate the code for a constructor, ie. 'BITSTRING ...'. *)
+let output_constructor _loc fields =
+ (* This function makes code to raise a Bitmatch.Construct_failure exception
+ * containing a message and the current _loc context.
+ * (Thanks to Bluestorm for suggesting this).
+ *)
+ let construct_failure _loc msg =
+ <:expr<
+ Bitmatch.Construct_failure
+ ($`str:msg$,
+ $`str:Loc.file_name _loc$,
+ $`int:Loc.start_line _loc$,
+ $`int:Loc.start_off _loc - Loc.start_bol _loc$)
+ >>
+ in
+ let raise_construct_failure _loc msg =
+ <:expr< raise $construct_failure _loc msg$ >>
+ in
+
+ (* Bitstrings are created like the 'Buffer' module (in fact, using
+ * the Buffer module), by appending snippets to a growing buffer.
+ * This is reasonably efficient and avoids a lot of garbage.
+ *)
+ let buffer = gensym "buffer" in
+
+ (* General exception which is raised inside the constructor functions
+ * when an int expression is out of range at runtime.
+ *)
+ let exn = gensym "exn" in
+ let exn_used = ref false in
+
+ (* Convert each field to a simple bitstring-generating expression. *)
+ let fields = List.map (
+ fun field ->
+ let fexpr = P.get_expr field in
+ let flen = P.get_length field in
+ let endian = P.get_endian field in
+ let signed = P.get_signed field in
+ let t = P.get_type field in
+ let _loc = P.get_location field in
+ let offset = P.get_offset field in
+
+ let fail = locfail _loc in
+
+ (* offset() not supported in constructors. Implementation of
+ * forward-only offsets is fairly straightforward: we would
+ * need to just calculate the length of padding here and add
+ * it to what has been constructed. For general offsets,
+ * including going backwards, that would require a rethink in
+ * how we construct bitstrings.
+ *)
+ if offset <> None then
+ fail "offset expressions are not supported in BITSTRING constructors";
+
+ (* Is flen an integer constant? If so, what is it? This
+ * is very simple-minded and only detects simple constants.
+ *)
+ let flen_is_const = expr_is_constant flen in
+
+ let int_construct_const (i, endian, signed) =
+ build_bitmatch_call _loc "construct" (Some i) endian signed in
+ let int_construct (endian, signed) =
+ build_bitmatch_call _loc "construct" None endian signed in
+
+ let expr =
+ match t, flen_is_const with
+ (* Common case: int field, constant flen.
+ *
+ * Range checks are done inside the construction function
+ * because that's a lot simpler w.r.t. types. It might
+ * be better to move them here. XXX
+ *)
+ | P.Int, Some i when i > 0 && i <= 64 ->
+ let construct_fn = int_construct_const (i,endian,signed) in
+ exn_used := true;
+
+ <:expr<
+ $construct_fn$ $lid:buffer$ $fexpr$ $`int:i$ $lid:exn$
+ >>
+
+ | P.Int, Some _ ->
+ fail "length of int field must be [1..64]"
+
+ (* Int field, non-constant length. We need to perform a runtime
+ * test to ensure the length is [1..64].
+ *
+ * Range checks are done inside the construction function
+ * because that's a lot simpler w.r.t. types. It might
+ * be better to move them here. XXX
+ *)
+ | P.Int, None ->
+ let construct_fn = int_construct (endian,signed) in
+ exn_used := true;
+
+ <:expr<
+ if $flen$ >= 1 && $flen$ <= 64 then
+ $construct_fn$ $lid:buffer$ $fexpr$ $flen$ $lid:exn$
+ else
+ $raise_construct_failure _loc "length of int field must be [1..64]"$
+ >>
+
+ (* String, constant length > 0, must be a multiple of 8. *)
+ | P.String, Some i when i > 0 && i land 7 = 0 ->
+ let bs = gensym "bs" in
+ let j = i lsr 3 in
+ <:expr<
+ let $lid:bs$ = $fexpr$ in
+ if String.length $lid:bs$ = $`int:j$ then
+ Bitmatch.construct_string $lid:buffer$ $lid:bs$
+ else
+ $raise_construct_failure _loc "length of string does not match declaration"$
+ >>
+
+ (* String, constant length -1, means variable length string
+ * with no checks.
+ *)
+ | P.String, Some (-1) ->
+ <:expr< Bitmatch.construct_string $lid:buffer$ $fexpr$ >>
- (* If type is set to bitstring then endianness and signedness
- * qualifiers are meaningless and must not be set.
+ (* String, constant length = 0 is probably an error, and so is
+ * any other value.
+ *)
+ | P.String, Some _ ->
+ fail "length of string must be > 0 and a multiple of 8, or the special value -1"
+
+ (* String, non-constant length.
+ * We check at runtime that the length is > 0, a multiple of 8,
+ * and matches the declared length.
+ *)
+ | P.String, None ->
+ let bslen = gensym "bslen" in
+ let bs = gensym "bs" in
+ <:expr<
+ let $lid:bslen$ = $flen$ in
+ if $lid:bslen$ > 0 then (
+ if $lid:bslen$ land 7 = 0 then (
+ let $lid:bs$ = $fexpr$ in
+ if String.length $lid:bs$ = ($lid:bslen$ lsr 3) then
+ Bitmatch.construct_string $lid:buffer$ $lid:bs$
+ else
+ $raise_construct_failure _loc "length of string does not match declaration"$
+ ) else
+ $raise_construct_failure _loc "length of string must be a multiple of 8"$
+ ) else
+ $raise_construct_failure _loc "length of string must be > 0"$
+ >>
+
+ (* Bitstring, constant length >= 0. *)
+ | P.Bitstring, Some i when i >= 0 ->
+ let bs = gensym "bs" in
+ <:expr<
+ let $lid:bs$ = $fexpr$ in
+ if Bitmatch.bitstring_length $lid:bs$ = $`int:i$ then
+ Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$
+ else
+ $raise_construct_failure _loc "length of bitstring does not match declaration"$
+ >>
+
+ (* Bitstring, constant length -1, means variable length bitstring
+ * with no checks.
+ *)
+ | P.Bitstring, Some (-1) ->
+ <:expr< Bitmatch.construct_bitstring $lid:buffer$ $fexpr$ >>
+
+ (* Bitstring, constant length < -1 is an error. *)
+ | P.Bitstring, Some _ ->
+ fail "length of bitstring must be >= 0 or the special value -1"
+
+ (* Bitstring, non-constant length.
+ * We check at runtime that the length is >= 0 and matches
+ * the declared length.
+ *)
+ | P.Bitstring, None ->
+ let bslen = gensym "bslen" in
+ let bs = gensym "bs" in
+ <:expr<
+ let $lid:bslen$ = $flen$ in
+ if $lid:bslen$ >= 0 then (
+ let $lid:bs$ = $fexpr$ in
+ if Bitmatch.bitstring_length $lid:bs$ = $lid:bslen$ then
+ Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$
+ else
+ $raise_construct_failure _loc "length of bitstring does not match declaration"$
+ ) else
+ $raise_construct_failure _loc "length of bitstring must be > 0"$
+ >> in
+ expr
+ ) fields in
+
+ (* Create the final bitstring. Start by creating an empty buffer
+ * and then evaluate each expression above in turn which will
+ * append some more to the bitstring buffer. Finally extract
+ * the bitstring.
+ *
+ * XXX We almost have enough information to be able to guess
+ * a good initial size for the buffer.
*)
- if t = Some Bitstring && (endian <> None || signed <> None) then
- Loc.raise _loc (
- Failure "bitstring type and endian or signed qualifiers cannot be mixed"
- );
-
- (* Default endianness, signedness, type. *)
- let endian = match endian with None -> BigEndian | Some e -> e in
- let signed = match signed with None -> false | Some s -> s in
- let t = match t with None -> Int | Some t -> t in
-
- {
- ident = name;
- flen = flen;
- endian = endian;
- signed = signed;
- t = t;
- }
+ let fields =
+ match fields with
+ | [] -> <:expr< [] >>
+ | h::t -> List.fold_left (fun h t -> <:expr< $h$; $t$ >>) h t in
+
+ let expr =
+ <:expr<
+ let $lid:buffer$ = Bitmatch.Buffer.create () in
+ $fields$;
+ Bitmatch.Buffer.contents $lid:buffer$
+ >> in
+
+ if !exn_used then
+ <:expr<
+ let $lid:exn$ = $construct_failure _loc "value out of range"$ in
+ $expr$
+ >>
+ else
+ expr
(* Generate the code for a bitmatch statement. '_loc' is the
* location, 'bs' is the bitstring parameter, 'cases' are
* the list of cases to test against.
*)
let output_bitmatch _loc bs cases =
- let data = gensym "data" and off = gensym "off" and len = gensym "len" in
- let result = gensym "result" in
+ (* These symbols are used through the generated code to record our
+ * current position within the bitstring:
+ *
+ * data - original bitstring data (string, never changes)
+ *
+ * off - current offset within data (int, increments as we move through
+ * the bitstring)
+ * len - current remaining length within data (int, decrements as
+ * we move through the bitstring)
+ *
+ * original_off - saved offset at the start of the match (never changes)
+ * original_len - saved length at the start of the match (never changes)
+ *)
+ let data = gensym "data"
+ and off = gensym "off"
+ and len = gensym "len"
+ and original_off = gensym "original_off"
+ and original_len = gensym "original_len"
+ (* This is where the result will be stored (a reference). *)
+ and result = gensym "result" in
(* This generates the field extraction code for each
- * field a single case. Each field must be wider than
- * the minimum permitted for the type and there must be
- * enough remaining data in the bitstring to satisfy it.
+ * field in a single case. There must be enough remaining data
+ * in the bitstring to satisfy the field.
+ *
* As we go through the fields, symbols 'data', 'off' and 'len'
* track our position and remaining length in the bitstring.
*
- * The whole thing is a lot of nested 'if' statements. Code
- * is generated from the inner-most (last) field outwards.
+ * The whole thing is a lot of nested 'if'/'match' statements.
+ * Code is generated from the inner-most (last) field outwards.
*)
let rec output_field_extraction inner = function
| [] -> inner
- | {ident=ident; flen=flen; endian=endian; signed=signed; t=t} :: fields ->
- (* If length an integer constant? If so, what is it? This
- * is very simple-minded and only detects simple constants.
+ | field :: fields ->
+ let fpatt = P.get_patt field in
+ let flen = P.get_length field in
+ let endian = P.get_endian field in
+ let signed = P.get_signed field in
+ let t = P.get_type field in
+ let _loc = P.get_location field in
+ let offset = P.get_offset field in
+
+ let fail = locfail _loc in
+
+ (* Is flen (field len) an integer constant? If so, what is it?
+ * This will be [Some i] if it's a constant or [None] if it's
+ * non-constant or we couldn't determine.
*)
- let flen_is_const =
- match flen with
- | <:expr< $int:i$ >> -> Some (int_of_string i)
- | _ -> None in
-
- let name_of_int_extract_const = function
- (* XXX As an enhancement we should allow a 64-bit-only
- * mode which lets us use 'int' up to 63 bits and won't
- * compile on 32-bit platforms.
- *)
- (* XXX The meaning of signed/unsigned breaks down at
- * 31, 32, 63 and 64 bits.
- *)
- | (1, _, _) -> "extract_bit"
- | ((2|3|4|5|6|7|8), _, false) -> "extract_char_unsigned"
- | ((2|3|4|5|6|7|8), _, true) -> "extract_char_signed"
- | (i, BigEndian, false) when i <= 31 -> "extract_int_be_unsigned"
- | (i, BigEndian, true) when i <= 31 -> "extract_int_be_signed"
- | (i, LittleEndian, false) when i <= 31 -> "extract_int_le_unsigned"
- | (i, LittleEndian, true) when i <= 31 -> "extract_int_le_signed"
- | (i, NativeEndian, false) when i <= 31 -> "extract_int_ne_unsigned"
- | (i, NativeEndian, true) when i <= 31 -> "extract_int_ne_signed"
- | (32, BigEndian, false) -> "extract_int32_be_unsigned"
- | (32, BigEndian, true) -> "extract_int32_be_signed"
- | (32, LittleEndian, false) -> "extract_int32_le_unsigned"
- | (32, LittleEndian, true) -> "extract_int32_le_signed"
- | (32, NativeEndian, false) -> "extract_int32_ne_unsigned"
- | (32, NativeEndian, true) -> "extract_int32_ne_signed"
- | (_, BigEndian, false) -> "extract_int64_be_unsigned"
- | (_, BigEndian, true) -> "extract_int64_be_signed"
- | (_, LittleEndian, false) -> "extract_int64_le_unsigned"
- | (_, LittleEndian, true) -> "extract_int64_le_signed"
- | (_, NativeEndian, false) -> "extract_int64_ne_unsigned"
- | (_, NativeEndian, true) -> "extract_int64_ne_signed"
- in
- let name_of_int_extract = function
- (* XXX As an enhancement we should allow users to
- * specify that a field length can fit into a char/int/int32
- * (of course, this would have to be checked at runtime).
- *)
- | (BigEndian, false) -> "extract_int64_be_unsigned"
- | (BigEndian, true) -> "extract_int64_be_signed"
- | (LittleEndian, false) -> "extract_int64_le_unsigned"
- | (LittleEndian, true) -> "extract_int64_le_signed"
- | (NativeEndian, false) -> "extract_int64_ne_unsigned"
- | (NativeEndian, true) -> "extract_int64_ne_signed"
- in
+ let flen_is_const = expr_is_constant flen in
+
+ let int_extract_const (i, endian, signed) =
+ build_bitmatch_call _loc "extract" (Some i) endian signed in
+ let int_extract (endian, signed) =
+ build_bitmatch_call _loc "extract" None endian signed in
let expr =
match t, flen_is_const with
(* Common case: int field, constant flen *)
- | Int, Some i when i > 0 && i <= 64 ->
- let extract_func = name_of_int_extract_const (i,endian,signed) in
+ | P.Int, Some i when i > 0 && i <= 64 ->
+ let extract_fn = int_extract_const (i,endian,signed) in
+ let v = gensym "val" in
<:expr<
- if $lid:len$ >= $flen$ then (
- let $lid:ident$, $lid:off$, $lid:len$ =
- Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
- $flen$ in
- $inner$
+ if $lid:len$ >= $`int:i$ then (
+ let $lid:v$, $lid:off$, $lid:len$ =
+ $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $`int:i$ in
+ match $lid:v$ with $fpatt$ when true -> $inner$ | _ -> ()
)
>>
- | Int, Some _ ->
- Loc.raise _loc (Failure "length of int field must be [1..64]")
+ | P.Int, Some _ ->
+ fail "length of int field must be [1..64]"
(* Int field, non-const flen. We have to test the range of
* the field at runtime. If outside the range it's a no-match
* (not an error).
*)
- | Int, None ->
- let extract_func = name_of_int_extract (endian,signed) in
+ | P.Int, None ->
+ let extract_fn = int_extract (endian,signed) in
+ let v = gensym "val" in
<:expr<
- if $flen$ >= 1 && $flen$ <= 64 && $flen$ >= $lid:len$ then (
- let $lid:ident$, $lid:off$, $lid:len$ =
- Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
- $flen$ in
- $inner$
+ if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then (
+ let $lid:v$, $lid:off$, $lid:len$ =
+ $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $flen$ in
+ match $lid:v$ with $fpatt$ when true -> $inner$ | _ -> ()
)
>>
- (* Bitstring, constant flen >= 0. *)
- | Bitstring, Some i when i >= 0 ->
+ (* String, constant flen > 0. *)
+ | P.String, Some i when i > 0 && i land 7 = 0 ->
+ let bs = gensym "bs" in
<:expr<
- if $lid:len$ >= $flen$ then (
+ if $lid:len$ >= $`int:i$ then (
+ let $lid:bs$, $lid:off$, $lid:len$ =
+ Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$
+ $`int:i$ in
+ match Bitmatch.string_of_bitstring $lid:bs$ with
+ | $fpatt$ when true -> $inner$
+ | _ -> ()
+ )
+ >>
+
+ (* String, constant flen = -1, means consume all the
+ * rest of the input.
+ *)
+ | P.String, Some i when i = -1 ->
+ let bs = gensym "bs" in
+ <:expr<
+ let $lid:bs$, $lid:off$, $lid:len$ =
+ Bitmatch.extract_remainder $lid:data$ $lid:off$ $lid:len$ in
+ match Bitmatch.string_of_bitstring $lid:bs$ with
+ | $fpatt$ when true -> $inner$
+ | _ -> ()
+ >>
+
+ | P.String, Some _ ->
+ fail "length of string must be > 0 and a multiple of 8, or the special value -1"
+
+ (* String field, non-const flen. We check the flen is > 0
+ * and a multiple of 8 (-1 is not allowed here), at runtime.
+ *)
+ | P.String, None ->
+ let bs = gensym "bs" in
+ <:expr<
+ if $flen$ >= 0 && $flen$ <= $lid:len$
+ && $flen$ land 7 = 0 then (
+ let $lid:bs$, $lid:off$, $lid:len$ =
+ Bitmatch.extract_bitstring
+ $lid:data$ $lid:off$ $lid:len$ $flen$ in
+ match Bitmatch.string_of_bitstring $lid:bs$ with
+ | $fpatt$ when true -> $inner$
+ | _ -> ()
+ )
+ >>
+
+ (* Bitstring, constant flen >= 0.
+ * At the moment all we can do is assign the bitstring to an
+ * identifier.
+ *)
+ | P.Bitstring, Some i when i >= 0 ->
+ let ident =
+ match fpatt with
+ | <:patt< $lid:ident$ >> -> ident
+ | <:patt< _ >> -> "_"
+ | _ ->
+ fail "cannot compare a bitstring to a constant" in
+ <:expr<
+ if $lid:len$ >= $`int:i$ then (
let $lid:ident$, $lid:off$, $lid:len$ =
Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$
- $flen$ in
+ $`int:i$ in
$inner$
)
>>
(* Bitstring, constant flen = -1, means consume all the
* rest of the input.
*)
- | Bitstring, Some i when i = -1 ->
+ | P.Bitstring, Some i when i = -1 ->
+ let ident =
+ match fpatt with
+ | <:patt< $lid:ident$ >> -> ident
+ | <:patt< _ >> -> "_"
+ | _ ->
+ fail "cannot compare a bitstring to a constant" in
<:expr<
let $lid:ident$, $lid:off$, $lid:len$ =
Bitmatch.extract_remainder $lid:data$ $lid:off$ $lid:len$ in
$inner$
>>
- | Bitstring, Some _ ->
- Loc.raise _loc (Failure "length of bitstring must be >= 0 or the special value -1")
+ | P.Bitstring, Some _ ->
+ fail "length of bitstring must be >= 0 or the special value -1"
(* Bitstring field, non-const flen. We check the flen is >= 0
* (-1 is not allowed here) at runtime.
*)
- | Bitstring, None ->
+ | P.Bitstring, None ->
+ let ident =
+ match fpatt with
+ | <:patt< $lid:ident$ >> -> ident
+ | <:patt< _ >> -> "_"
+ | _ ->
+ fail "cannot compare a bitstring to a constant" in
<:expr<
- if $flen$ >= 0 && $lid:len$ >= $flen$ then (
+ if $flen$ >= 0 && $flen$ <= $lid:len$ then (
let $lid:ident$, $lid:off$, $lid:len$ =
Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$
$flen$ in
>>
in
+ (* Computed offset: only offsets forward are supported.
+ *
+ * We try hard to optimize this based on what we know. Are
+ * we at a predictable offset now? (Look at the outer 'fields'
+ * list and see if they all have constant field length starting
+ * at some constant offset). Is this offset constant?
+ *
+ * Based on this we can do a lot of the computation at
+ * compile time, or defer it to runtime only if necessary.
+ *
+ * In all cases, the off and len fields get updated.
+ *)
+ let expr =
+ match offset with
+ | None -> expr (* common case: there was no offset expression *)
+ | Some offset_expr ->
+ (* This will be [Some i] if offset is a constant expression
+ * or [None] if it's a non-constant.
+ *)
+ let requested_offset = expr_is_constant offset_expr in
+
+ (* This will be [Some i] if our current offset is known
+ * at compile time, or [None] if we can't determine it.
+ *)
+ let current_offset =
+ let has_constant_offset field =
+ match P.get_offset field with
+ | None -> false
+ | Some expr ->
+ match expr_is_constant expr with
+ | None -> false
+ | Some i -> true
+ in
+ let get_constant_offset field =
+ match P.get_offset field with
+ | None -> assert false
+ | Some expr ->
+ match expr_is_constant expr with
+ | None -> assert false
+ | Some i -> i
+ in
+
+ let has_constant_len field =
+ match expr_is_constant (P.get_length field) with
+ | None -> false
+ | Some i when i > 0 -> true
+ | Some _ -> false
+ in
+ let get_constant_len field =
+ match expr_is_constant (P.get_length field) with
+ | None -> assert false
+ | Some i when i > 0 -> i
+ | Some _ -> assert false
+ in
+
+ let rec loop = function
+ (* first field has constant offset 0 *)
+ | [] -> Some 0
+ (* field with constant offset & length *)
+ | field :: _
+ when has_constant_offset field &&
+ has_constant_len field ->
+ Some (get_constant_offset field + get_constant_len field)
+ (* field with no offset & constant length *)
+ | field :: fields
+ when P.get_offset field = None &&
+ has_constant_len field ->
+ (match loop fields with
+ | None -> None
+ | Some offset -> Some (offset + get_constant_len field))
+ (* else, can't work out the offset *)
+ | _ -> None
+ in
+ loop fields in
+
+ (* Look at the current offset and requested offset cases and
+ * determine what code to generate.
+ *)
+ match current_offset, requested_offset with
+ (* This is the good case: both the current offset and
+ * the requested offset are constant, so we can remove
+ * almost all the runtime checks.
+ *)
+ | Some current_offset, Some requested_offset ->
+ let move = requested_offset - current_offset in
+ if move < 0 then
+ fail (sprintf "requested offset is less than the current offset (%d < %d)" requested_offset current_offset);
+ (* Add some code to move the offset and length by a
+ * constant amount, and a runtime test that len >= 0
+ * (XXX possibly the runtime test is unnecessary?)
+ *)
+ <:expr<
+ let $lid:off$ = $lid:off$ + $`int:move$ in
+ let $lid:len$ = $lid:len$ - $`int:move$ in
+ if $lid:len$ >= 0 then $expr$
+ >>
+ (* In any other case, we need to use runtime checks.
+ *
+ * XXX It's not clear if a backwards move detected at runtime
+ * is merely a match failure, or a runtime error. At the
+ * moment it's just a match failure since bitmatch generally
+ * doesn't raise runtime errors.
+ *)
+ | _ ->
+ let move = gensym "move" in
+ <:expr<
+ let $lid:move$ =
+ $offset_expr$ - ($lid:off$ - $lid:original_off$) in
+ if $lid:move$ >= 0 then (
+ let $lid:off$ = $lid:off$ + $lid:move$ in
+ let $lid:len$ = $lid:len$ - $lid:move$ in
+ if $lid:len$ >= 0 then $expr$
+ )
+ >> in (* end of computed offset code *)
+
+ (* Emit extra debugging code. *)
+ let expr =
+ if not debug then expr else (
+ let field = P.string_of_pattern_field field in
+
+ <:expr<
+ if !Bitmatch.debug then (
+ Printf.eprintf "PA_BITMATCH: TEST:\n";
+ Printf.eprintf " %s\n" $str:field$;
+ Printf.eprintf " off %d len %d\n%!" $lid:off$ $lid:len$;
+ (*Bitmatch.hexdump_bitstring stderr
+ ($lid:data$,$lid:off$,$lid:len$);*)
+ );
+ $expr$
+ >>
+ ) in
+
output_field_extraction expr fields
in
(* Convert each case in the match. *)
let cases = List.map (
- function
- (* field : len ; field : len when .. -> ..*)
- | (Fields fields, Some whenclause, code) ->
- let inner =
- <:expr<
- if $whenclause$ then (
- $lid:result$ := Some ($code$);
- raise Exit
- )
- >> in
- output_field_extraction inner (List.rev fields)
-
- (* field : len ; field : len -> ... *)
- | (Fields fields, None, code) ->
- let inner =
- <:expr<
- $lid:result$ := Some ($code$);
- raise Exit
- >> in
- output_field_extraction inner (List.rev fields)
-
- (* _ as name when ... -> ... *)
- | (Bind (Some name), Some whenclause, code) ->
- <:expr<
- let $lid:name$ = ($lid:data$, $lid:off$, $lid:len$) in
- if $whenclause$ then (
- $lid:result$ := Some ($code$);
- raise Exit
- )
- >>
-
- (* _ as name -> ... *)
- | (Bind (Some name), None, code) ->
- <:expr<
- let $lid:name$ = ($lid:data$, $lid:off$, $lid:len$) in
- $lid:result$ := Some ($code$);
- raise Exit
- >>
-
- (* _ when ... -> ... *)
- | (Bind None, Some whenclause, code) ->
- <:expr<
- if $whenclause$ then (
- $lid:result$ := Some ($code$);
- raise Exit
- )
- >>
-
- (* _ -> ... *)
- | (Bind None, None, code) ->
- <:expr<
- $lid:result$ := Some ($code$);
- raise Exit
- >>
-
+ fun (fields, bind, whenclause, code) ->
+ let inner = <:expr< $lid:result$ := Some ($code$); raise Exit >> in
+ let inner =
+ match whenclause with
+ | Some whenclause ->
+ <:expr< if $whenclause$ then $inner$ >>
+ | None -> inner in
+ let inner =
+ match bind with
+ | Some name ->
+ <:expr<
+ let $lid:name$ = ($lid:data$, $lid:off$, $lid:len$) in
+ $inner$
+ >>
+ | None -> inner in
+ output_field_extraction inner (List.rev fields)
) cases in
+ (* Join them into a single expression.
+ *
+ * Don't do it with a normal fold_right because that leaves
+ * 'raise Exit; ()' at the end which causes a compiler warning.
+ * Hence a bit of complexity here.
+ *
+ * Note that the number of cases is always >= 1 so List.hd is safe.
+ *)
+ let cases = List.rev cases in
let cases =
- List.fold_right (fun case base -> <:expr< $case$ ; $base$ >>)
- cases <:expr< () >> in
+ List.fold_left (fun base case -> <:expr< $case$ ; $base$ >>)
+ (List.hd cases) (List.tl cases) in
(* The final code just wraps the list of cases in a
* try/with construct so that each case is tried in
let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in
<:expr<
- let ($lid:data$, $lid:off$, $lid:len$) = $bs$ in
+ (* Note we save the original offset/length at the start of the match
+ * in 'original_off'/'original_len' symbols. 'data' never changes.
+ *)
+ let ($lid:data$, $lid:original_off$, $lid:original_len$) = $bs$ in
+ let $lid:off$ = $lid:original_off$ and $lid:len$ = $lid:original_len$ in
let $lid:result$ = ref None in
(try
$cases$
$int:loc_line$, $int:loc_char$))
>>
+(* Add a named pattern. *)
+let add_named_pattern _loc name pattern =
+ Hashtbl.add pattern_hash name pattern
+
+(* Expand a named pattern from the pattern_hash. *)
+let expand_named_pattern _loc name =
+ try Hashtbl.find pattern_hash name
+ with Not_found ->
+ locfail _loc (sprintf "named pattern not found: %s" name)
+
+(* Add named patterns from a file. See the documentation on the
+ * directory search path in bitmatch_persistent.mli
+ *)
+let load_patterns_from_file _loc filename =
+ let chan =
+ if Filename.is_relative filename && Filename.is_implicit filename then (
+ (* Try current directory. *)
+ try open_in filename
+ with _ ->
+ (* Try OCaml library directory. *)
+ try open_in (Filename.concat Bitmatch_config.ocamllibdir filename)
+ with exn -> Loc.raise _loc exn
+ ) else (
+ try open_in filename
+ with exn -> Loc.raise _loc exn
+ ) in
+ let names = ref [] in
+ (try
+ let rec loop () =
+ let name = P.named_from_channel chan in
+ names := name :: !names
+ in
+ loop ()
+ with End_of_file -> ()
+ );
+ close_in chan;
+ let names = List.rev !names in
+ List.iter (
+ function
+ | name, P.Pattern patt ->
+ if patt = [] then
+ locfail _loc (sprintf "pattern %s: no fields" name);
+ add_named_pattern _loc name patt
+ | _, P.Constructor _ -> () (* just ignore these for now *)
+ ) names
+
EXTEND Gram
- GLOBAL: expr;
+ GLOBAL: expr str_item;
+ (* Qualifiers are a list of identifiers ("string", "bigendian", etc.)
+ * followed by an optional expression (used in certain cases). Note
+ * that we are careful not to declare any explicit reserved words.
+ *)
qualifiers: [
- [ LIST0 [ q = LIDENT -> q ] SEP "," ]
+ [ LIST0
+ [ q = LIDENT;
+ e = OPT [ "("; e = expr; ")" -> e ] -> (q, e) ]
+ SEP "," ]
];
- field: [
- [ name = LIDENT; ":"; len = expr LEVEL "top";
+ (* Field used in the bitmatch operator (a pattern). This can actually
+ * return multiple fields, in the case where the 'field' is a named
+ * persitent pattern.
+ *)
+ patt_field: [
+ [ fpatt = patt; ":"; len = expr LEVEL "top";
qs = OPT [ ":"; qs = qualifiers -> qs ] ->
- output_field _loc name len qs
+ let field = P.create_pattern_field _loc in
+ let field = P.set_patt field fpatt in
+ let field = P.set_length field len in
+ [parse_field _loc field qs] (* Normal, single field. *)
+ | ":"; name = LIDENT ->
+ expand_named_pattern _loc name (* Named -> list of fields. *)
]
];
- match_case: [
- [ fields = LIST0 field SEP ";";
- w = OPT [ "when"; e = expr -> e ]; "->";
- code = expr ->
- (Fields fields, w, code)
+ (* Case inside bitmatch operator. *)
+ patt_fields: [
+ [ "{";
+ fields = LIST0 patt_field SEP ";";
+ "}" ->
+ List.concat fields
]
- | [ "_";
+ ];
+
+ patt_case: [
+ [ fields = patt_fields;
bind = OPT [ "as"; name = LIDENT -> name ];
- w = OPT [ "when"; e = expr -> e ]; "->";
+ whenclause = OPT [ "when"; e = expr -> e ]; "->";
code = expr ->
- (Bind bind, w, code)
+ (fields, bind, whenclause, code)
+ ]
+ ];
+
+ (* Field used in the BITSTRING constructor (an expression). *)
+ constr_field: [
+ [ fexpr = expr LEVEL "top"; ":"; len = expr LEVEL "top";
+ qs = OPT [ ":"; qs = qualifiers -> qs ] ->
+ let field = P.create_constructor_field _loc in
+ let field = P.set_expr field fexpr in
+ let field = P.set_length field len in
+ parse_field _loc field qs
+ ]
+ ];
+
+ constr_fields: [
+ [ "{";
+ fields = LIST0 constr_field SEP ";";
+ "}" ->
+ fields
]
];
+ (* 'bitmatch' expressions. *)
expr: LEVEL ";" [
- [ "bitmatch"; bs = expr; "with"; OPT "|";
- cases = LIST1 match_case SEP "|" ->
+ [ "bitmatch";
+ bs = expr; "with"; OPT "|";
+ cases = LIST1 patt_case SEP "|" ->
output_bitmatch _loc bs cases
]
+
+ (* Constructor. *)
+ | [ "BITSTRING";
+ fields = constr_fields ->
+ output_constructor _loc fields
+ ]
+ ];
+
+ (* Named persistent patterns.
+ *
+ * NB: Currently only allowed at the top level. We can probably lift
+ * this restriction later if necessary. We only deal with patterns
+ * at the moment, not constructors, but the infrastructure to do
+ * constructors is in place.
+ *)
+ str_item: LEVEL "top" [
+ [ "let"; "bitmatch";
+ name = LIDENT; "="; fields = patt_fields ->
+ add_named_pattern _loc name fields;
+ (* The statement disappears, but we still need a str_item so ... *)
+ <:str_item< >>
+ | "open"; "bitmatch"; filename = STRING ->
+ load_patterns_from_file _loc filename;
+ <:str_item< >>
+ ]
];
END