*)
let debug = false
+(* Hashtable storing named persistent patterns. *)
+let pattern_hash : (string, P.pattern) Hashtbl.t = Hashtbl.create 13
+
+let locfail _loc msg = Loc.raise _loc (Failure msg)
+
(* Work out if an expression is an integer constant.
*
* Returns [Some i] if so (where i is the integer value), else [None].
(* Deal with the qualifiers which appear for a field of both types. *)
let parse_field _loc field qs =
- let endian_set, signed_set, type_set, field =
+ let fail = locfail _loc in
+
+ let endian_set, signed_set, type_set, offset_set, field =
match qs with
- | None -> (false, false, false, field)
+ | None -> (false, false, false, false, field)
| Some qs ->
List.fold_left (
- fun (endian_set, signed_set, type_set, field) qual_expr ->
+ fun (endian_set, signed_set, type_set, offset_set, field) qual_expr ->
match qual_expr with
| "bigendian", None ->
if endian_set then
- Loc.raise _loc (Failure "an endian flag has been set already")
+ fail "an endian flag has been set already"
else (
let field = P.set_endian field BigEndian in
- (true, signed_set, type_set, field)
+ (true, signed_set, type_set, offset_set, field)
)
| "littleendian", None ->
if endian_set then
- Loc.raise _loc (Failure "an endian flag has been set already")
+ fail "an endian flag has been set already"
else (
let field = P.set_endian field LittleEndian in
- (true, signed_set, type_set, field)
+ (true, signed_set, type_set, offset_set, field)
)
| "nativeendian", None ->
if endian_set then
- Loc.raise _loc (Failure "an endian flag has been set already")
+ fail "an endian flag has been set already"
else (
let field = P.set_endian field NativeEndian in
- (true, signed_set, type_set, field)
+ (true, signed_set, type_set, offset_set, field)
)
| "endian", Some expr ->
if endian_set then
- Loc.raise _loc (Failure "an endian flag has been set already")
+ fail "an endian flag has been set already"
else (
let field = P.set_endian_expr field expr in
- (true, signed_set, type_set, field)
+ (true, signed_set, type_set, offset_set, field)
)
| "signed", None ->
if signed_set then
- Loc.raise _loc (Failure "a signed flag has been set already")
+ fail "a signed flag has been set already"
else (
let field = P.set_signed field true in
- (endian_set, true, type_set, field)
+ (endian_set, true, type_set, offset_set, field)
)
| "unsigned", None ->
if signed_set then
- Loc.raise _loc (Failure "a signed flag has been set already")
+ fail "a signed flag has been set already"
else (
let field = P.set_signed field false in
- (endian_set, true, type_set, field)
+ (endian_set, true, type_set, offset_set, field)
)
| "int", None ->
if type_set then
- Loc.raise _loc (Failure "a type flag has been set already")
+ fail "a type flag has been set already"
else (
let field = P.set_type_int field in
- (endian_set, signed_set, true, field)
+ (endian_set, signed_set, true, offset_set, field)
)
| "string", None ->
if type_set then
- Loc.raise _loc (Failure "a type flag has been set already")
+ fail "a type flag has been set already"
else (
let field = P.set_type_string field in
- (endian_set, signed_set, true, field)
+ (endian_set, signed_set, true, offset_set, field)
)
| "bitstring", None ->
if type_set then
- Loc.raise _loc (Failure "a type flag has been set already")
+ fail "a type flag has been set already"
else (
let field = P.set_type_bitstring field in
- (endian_set, signed_set, true, field)
+ (endian_set, signed_set, true, offset_set, field)
+ )
+ | "offset", Some expr ->
+ if offset_set then
+ fail "an offset has been set already"
+ else (
+ let field = P.set_offset field expr in
+ (endian_set, signed_set, type_set, true, field)
)
| s, Some _ ->
- Loc.raise _loc (Failure (s ^ ": unknown qualifier, or qualifier should not be followed by an expression"))
+ fail (s ^ ": unknown qualifier, or qualifier should not be followed by an expression")
| s, None ->
- Loc.raise _loc (Failure (s ^ ": unknown qualifier, or qualifier should be followed by an expression"))
- ) (false, false, false, field) qs in
+ fail (s ^ ": unknown qualifier, or qualifier should be followed by an expression")
+ ) (false, false, false, false, field) qs in
(* If type is set to string or bitstring then endianness and
* signedness qualifiers are meaningless and must not be set.
let () =
let t = P.get_type field in
if (t = P.Bitstring || t = P.String) && (endian_set || signed_set) then
- Loc.raise _loc (
- Failure "string types and endian or signed qualifiers cannot be mixed"
- ) in
+ fail "string types and endian or signed qualifiers cannot be mixed" in
(* Default endianness, signedness, type if not set already. *)
let field = if endian_set then field else P.set_endian field BigEndian in
(* Generate the code for a constructor, ie. 'BITSTRING ...'. *)
let output_constructor _loc fields =
+ let fail = locfail _loc in
+
let loc_fname = Loc.file_name _loc in
let loc_line = string_of_int (Loc.start_line _loc) in
let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in
let signed = P.get_signed field in
let t = P.get_type field in
let _loc = P.get_location field in
+ let offset = P.get_offset field in
+
+ (* offset() not supported in constructors. Implementation of
+ * forward-only offsets is fairly straightforward: we would
+ * need to just calculate the length of padding here and add
+ * it to what has been constructed. For general offsets,
+ * including going backwards, that would require a rethink in
+ * how we construct bitstrings.
+ *)
+ if offset <> None then
+ fail "offset expressions are not supported in BITSTRING constructors";
(* Is flen an integer constant? If so, what is it? This
* is very simple-minded and only detects simple constants.
>>
| P.Int, Some _ ->
- Loc.raise _loc (Failure "length of int field must be [1..64]")
+ fail "length of int field must be [1..64]"
(* Int field, non-constant length. We need to perform a runtime
* test to ensure the length is [1..64].
* any other value.
*)
| P.String, Some _ ->
- Loc.raise _loc (Failure "length of string must be > 0 and a multiple of 8, or the special value -1")
+ fail "length of string must be > 0 and a multiple of 8, or the special value -1"
(* String, non-constant length.
* We check at runtime that the length is > 0, a multiple of 8,
$int:loc_line$, $int:loc_char$))
>>
- (* Bitstring, constant length > 0. *)
- | P.Bitstring, Some i when i > 0 ->
+ (* Bitstring, constant length >= 0. *)
+ | P.Bitstring, Some i when i >= 0 ->
let bs = gensym "bs" in
<:expr<
let $lid:bs$ = $fexpr$ in
| P.Bitstring, Some (-1) ->
<:expr< Bitmatch.construct_bitstring $lid:buffer$ $fexpr$ >>
- (* Bitstring, constant length = 0 is probably an error, and so is
- * any other value.
- *)
+ (* Bitstring, constant length < -1 is an error. *)
| P.Bitstring, Some _ ->
- Loc.raise _loc
- (Failure
- "length of bitstring must be > 0 or the special value -1")
+ fail "length of bitstring must be >= 0 or the special value -1"
(* Bitstring, non-constant length.
- * We check at runtime that the length is > 0 and matches
+ * We check at runtime that the length is >= 0 and matches
* the declared length.
*)
| P.Bitstring, None ->
let bs = gensym "bs" in
<:expr<
let $lid:bslen$ = $flen$ in
- if $lid:bslen$ > 0 then (
+ if $lid:bslen$ >= 0 then (
let $lid:bs$ = $fexpr$ in
if Bitmatch.bitstring_length $lid:bs$ = $lid:bslen$ then
Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$
* the list of cases to test against.
*)
let output_bitmatch _loc bs cases =
+ let fail = locfail _loc in
+
let data = gensym "data" and off = gensym "off" and len = gensym "len" in
let result = gensym "result" in
(* This generates the field extraction code for each
- * field a single case. Each field must be wider than
- * the minimum permitted for the type and there must be
- * enough remaining data in the bitstring to satisfy it.
+ * field in a single case. There must be enough remaining data
+ * in the bitstring to satisfy the field.
+ *
* As we go through the fields, symbols 'data', 'off' and 'len'
* track our position and remaining length in the bitstring.
*
let signed = P.get_signed field in
let t = P.get_type field in
let _loc = P.get_location field in
+ let offset = P.get_offset field in
- (* Is flen an integer constant? If so, what is it? This
- * is very simple-minded and only detects simple constants.
+ (* Is flen (field len) an integer constant? If so, what is it?
+ * This will be [Some i] if it's a constant or [None] if it's
+ * non-constant or we couldn't determine.
*)
let flen_is_const = expr_is_constant flen in
>>
| P.Int, Some _ ->
- Loc.raise _loc (Failure "length of int field must be [1..64]")
+ fail "length of int field must be [1..64]"
(* Int field, non-const flen. We have to test the range of
* the field at runtime. If outside the range it's a no-match
>>
| P.String, Some _ ->
- Loc.raise _loc (Failure "length of string must be > 0 and a multiple of 8, or the special value -1")
+ fail "length of string must be > 0 and a multiple of 8, or the special value -1"
(* String field, non-const flen. We check the flen is > 0
* and a multiple of 8 (-1 is not allowed here), at runtime.
| <:patt< $lid:ident$ >> -> ident
| <:patt< _ >> -> "_"
| _ ->
- Loc.raise _loc
- (Failure "cannot compare a bitstring to a constant") in
+ fail "cannot compare a bitstring to a constant" in
<:expr<
if $lid:len$ >= $`int:i$ then (
let $lid:ident$, $lid:off$, $lid:len$ =
| <:patt< $lid:ident$ >> -> ident
| <:patt< _ >> -> "_"
| _ ->
- Loc.raise _loc
- (Failure "cannot compare a bitstring to a constant") in
+ fail "cannot compare a bitstring to a constant" in
<:expr<
let $lid:ident$, $lid:off$, $lid:len$ =
Bitmatch.extract_remainder $lid:data$ $lid:off$ $lid:len$ in
>>
| P.Bitstring, Some _ ->
- Loc.raise _loc (Failure "length of bitstring must be >= 0 or the special value -1")
+ fail "length of bitstring must be >= 0 or the special value -1"
(* Bitstring field, non-const flen. We check the flen is >= 0
* (-1 is not allowed here) at runtime.
| <:patt< $lid:ident$ >> -> ident
| <:patt< _ >> -> "_"
| _ ->
- Loc.raise _loc
- (Failure "cannot compare a bitstring to a constant") in
+ fail "cannot compare a bitstring to a constant" in
<:expr<
if $flen$ >= 0 && $flen$ <= $lid:len$ then (
let $lid:ident$, $lid:off$, $lid:len$ =
>>
in
+ (* Computed offset: only offsets forward are supported.
+ *
+ * We try hard to optimize this based on what we know. Are
+ * we at a predictable offset now? (Look at the outer 'fields'
+ * list and see if they all have constant field length starting
+ * at some constant offset). Is this offset constant?
+ *
+ * Based on this we can do a lot of the computation at
+ * compile time, or defer it to runtime only if necessary.
+ *
+ * In all cases, the off and len fields get updated.
+ *)
+ let expr =
+ match offset with
+ | None -> expr (* common case: there was no offset expression *)
+ | Some offset_expr ->
+ (* This will be [Some i] if offset is a constant expression
+ * or [None] if it's a non-constant.
+ *)
+ let requested_offset = expr_is_constant offset_expr in
+
+ (* This will be [Some i] if our current offset is known
+ * at compile time, or [None] if we can't determine it.
+ *)
+ let current_offset =
+ let has_constant_offset field =
+ match P.get_offset field with
+ | None -> false
+ | Some expr ->
+ match expr_is_constant expr with
+ | None -> false
+ | Some i -> true
+ in
+ let get_constant_offset field =
+ match P.get_offset field with
+ | None -> assert false
+ | Some expr ->
+ match expr_is_constant expr with
+ | None -> assert false
+ | Some i -> i
+ in
+
+ let has_constant_len field =
+ match expr_is_constant (P.get_length field) with
+ | None -> false
+ | Some i when i > 0 -> true
+ | Some _ -> false
+ in
+ let get_constant_len field =
+ match expr_is_constant (P.get_length field) with
+ | None -> assert false
+ | Some i when i > 0 -> i
+ | Some _ -> assert false
+ in
+
+ let rec loop = function
+ (* first field has constant offset 0 *)
+ | [] -> Some 0
+ (* field with constant offset & length *)
+ | field :: _
+ when has_constant_offset field &&
+ has_constant_len field ->
+ Some (get_constant_offset field + get_constant_len field)
+ (* field with no offset & constant length *)
+ | field :: fields
+ when P.get_offset field = None &&
+ has_constant_len field ->
+ (match loop fields with
+ | None -> None
+ | Some offset -> Some (offset + get_constant_len field))
+ (* else, can't work out the offset *)
+ | _ -> None
+ in
+ loop fields in
+
+ (* Look at the current offset and requested offset cases and
+ * determine what code to generate.
+ *)
+ match current_offset, requested_offset with
+ (* This is the good case: both the current offset and
+ * the requested offset are constant, so we can remove
+ * almost all the runtime checks.
+ *)
+ | Some current_offset, Some requested_offset ->
+ let move = requested_offset - current_offset in
+ if move < 0 then
+ fail (sprintf "requested offset is less than the current offset (%d < %d)" requested_offset current_offset);
+ (* Add some code to move the offset and length by a
+ * constant amount, and a runtime test that len >= 0
+ * (XXX possibly the runtime test is unnecessary?)
+ *)
+ <:expr<
+ let $lid:off$ = $lid:off$ + $`int:move$ in
+ let $lid:len$ = $lid:len$ - $`int:move$ in
+ if $lid:len$ >= 0 then $expr$
+ >>
+ (* In any other case, we need to use runtime checks.
+ *
+ * XXX It's not clear if a backwards move detected at runtime
+ * is merely a match failure, or a runtime error. At the
+ * moment it's just a match failure since bitmatch generally
+ * doesn't raise runtime errors.
+ *)
+ | _ ->
+ let move = gensym "move" in
+ <:expr<
+ let $lid:move$ = $offset_expr$ - $lid:off$ in
+ if $lid:move$ >= 0 then (
+ let $lid:off$ = $lid:off$ + $lid:move$ in
+ let $lid:len$ = $lid:len$ - $lid:move$ in
+ if $lid:len$ >= 0 then $expr$
+ )
+ >> in (* end of computed offset code *)
+
(* Emit extra debugging code. *)
let expr =
if not debug then expr else (
- let field = P.string_of_field field in
+ let field = P.string_of_pattern_field field in
<:expr<
if !Bitmatch.debug then (
$int:loc_line$, $int:loc_char$))
>>
+(* Add a named pattern. *)
+let add_named_pattern _loc name pattern =
+ Hashtbl.add pattern_hash name pattern
+
+(* Expand a named pattern from the pattern_hash. *)
+let expand_named_pattern _loc name =
+ try Hashtbl.find pattern_hash name
+ with Not_found ->
+ locfail _loc (sprintf "named pattern not found: %s" name)
+
+(* Add named patterns from a file. See the documentation on the
+ * directory search path in bitmatch_persistent.mli
+ *)
+let load_patterns_from_file _loc filename =
+ let chan =
+ if Filename.is_relative filename && Filename.is_implicit filename then (
+ (* Try current directory. *)
+ try open_in filename
+ with _ ->
+ (* Try OCaml library directory. *)
+ try open_in (Filename.concat Bitmatch_config.ocamllibdir filename)
+ with exn -> Loc.raise _loc exn
+ ) else (
+ try open_in filename
+ with exn -> Loc.raise _loc exn
+ ) in
+ let names = ref [] in
+ (try
+ let rec loop () =
+ let name = P.named_from_channel chan in
+ names := name :: !names
+ in
+ loop ()
+ with End_of_file -> ()
+ );
+ close_in chan;
+ let names = List.rev !names in
+ List.iter (
+ function
+ | name, P.Pattern patt -> add_named_pattern _loc name patt
+ | _, P.Constructor _ -> () (* just ignore these for now *)
+ ) names
+
EXTEND Gram
- GLOBAL: expr;
+ GLOBAL: expr str_item;
(* Qualifiers are a list of identifiers ("string", "bigendian", etc.)
* followed by an optional expression (used in certain cases). Note
SEP "," ]
];
- (* Field used in the bitmatch operator (a pattern). *)
+ (* Field used in the bitmatch operator (a pattern). This can actually
+ * return multiple fields, in the case where the 'field' is a named
+ * persitent pattern.
+ *)
patt_field: [
[ fpatt = patt; ":"; len = expr LEVEL "top";
qs = OPT [ ":"; qs = qualifiers -> qs ] ->
let field = P.create_pattern_field _loc in
let field = P.set_patt field fpatt in
let field = P.set_length field len in
- parse_field _loc field qs
+ [parse_field _loc field qs] (* Normal, single field. *)
+ | ":"; name = LIDENT ->
+ expand_named_pattern _loc name (* Named -> list of fields. *)
]
];
(* Case inside bitmatch operator. *)
- match_case: [
+ patt_fields: [
[ "{";
fields = LIST0 patt_field SEP ";";
- "}";
+ "}" ->
+ List.concat fields
+ ]
+ ];
+
+ patt_case: [
+ [ fields = patt_fields;
bind = OPT [ "as"; name = LIDENT -> name ];
whenclause = OPT [ "when"; e = expr -> e ]; "->";
code = expr ->
]
];
+ constr_fields: [
+ [ "{";
+ fields = LIST0 constr_field SEP ";";
+ "}" ->
+ fields
+ ]
+ ];
+
(* 'bitmatch' expressions. *)
expr: LEVEL ";" [
[ "bitmatch";
bs = expr; "with"; OPT "|";
- cases = LIST1 match_case SEP "|" ->
+ cases = LIST1 patt_case SEP "|" ->
output_bitmatch _loc bs cases
]
(* Constructor. *)
- | [ "BITSTRING"; "{";
- fields = LIST0 constr_field SEP ";";
- "}" ->
+ | [ "BITSTRING";
+ fields = constr_fields ->
output_constructor _loc fields
]
];
+ (* Named persistent patterns.
+ *
+ * NB: Currently only allowed at the top level. We can probably lift
+ * this restriction later if necessary. We only deal with patterns
+ * at the moment, not constructors, but the infrastructure to do
+ * constructors is in place.
+ *)
+ str_item: LEVEL "top" [
+ [ "let"; "bitmatch";
+ name = LIDENT; "="; fields = patt_fields ->
+ add_named_pattern _loc name fields;
+ (* The statement disappears, but we still need a str_item so ... *)
+ <:str_item< >>
+ | "open"; "bitmatch"; filename = STRING ->
+ load_patterns_from_file _loc filename;
+ <:str_item< >>
+ ]
+ ];
+
END