X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=pa_bitmatch.ml;h=98151a3746175885d66b6a998440b7b64dd84daf;hb=1be1a65d5fc5727d20e465bebaafcad263b87162;hp=f7695823b7d0462a38c44f0a963c627b82544a4e;hpb=97cd7dd22059a1c5ca72852130ac430aa713e968;p=ocaml-bitstring.git diff --git a/pa_bitmatch.ml b/pa_bitmatch.ml index f769582..98151a3 100644 --- a/pa_bitmatch.ml +++ b/pa_bitmatch.ml @@ -15,7 +15,7 @@ * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * - * $Id: pa_bitmatch.ml,v 1.11 2008-04-25 14:57:11 rjones Exp $ + * $Id$ *) open Printf @@ -24,6 +24,9 @@ open Camlp4.PreCast open Syntax open Ast +open Bitmatch +module P = Bitmatch_persistent + (* If this is true then we emit some debugging code which can * be useful to tell what is happening during matches. You * also need to do 'Bitmatch.debug := true' in your main program. @@ -32,6 +35,11 @@ open Ast *) let debug = false +(* Hashtable storing named persistent patterns. *) +let pattern_hash : (string, P.pattern) Hashtbl.t = Hashtbl.create 13 + +let locfail _loc msg = Loc.raise _loc (Failure msg) + (* Work out if an expression is an integer constant. * * Returns [Some i] if so (where i is the integer value), else [None]. @@ -40,47 +48,18 @@ let debug = false * expressions such as [k], [k+c], [k-c] etc. *) let rec expr_is_constant = function - | <:expr< $int:i$ >> -> (* Literal integer constant. *) + | <:expr< $int:i$ >> -> (* Literal integer constant. *) Some (int_of_string i) - | <:expr< $a$ + $b$ >> -> (* Addition of constants. *) - (match expr_is_constant a, expr_is_constant b with - | Some a, Some b -> Some (a+b) - | _ -> None) - | <:expr< $a$ - $b$ >> -> (* Subtraction. *) - (match expr_is_constant a, expr_is_constant b with - | Some a, Some b -> Some (a-b) - | _ -> None) - | <:expr< $a$ * $b$ >> -> (* Multiplication. *) - (match expr_is_constant a, expr_is_constant b with - | Some a, Some b -> Some (a*b) - | _ -> None) - | <:expr< $a$ / $b$ >> -> (* Division. *) - (match expr_is_constant a, expr_is_constant b with - | Some a, Some b -> Some (a/b) - | _ -> None) - | <:expr< $a$ lsl $b$ >> -> (* Shift left. *) - (match expr_is_constant a, expr_is_constant b with - | Some a, Some b -> Some (a lsl b) - | _ -> None) - | <:expr< $a$ lsr $b$ >> -> (* Shift right. *) + | <:expr< $lid:op$ $a$ $b$ >> -> (match expr_is_constant a, expr_is_constant b with - | Some a, Some b -> Some (a lsr b) + | Some a, Some b -> (* Integer binary operations. *) + let ops = ["+", (+); "-", (-); "*", ( * ); "/", (/); + "land", (land); "lor", (lor); "lxor", (lxor); + "lsl", (lsl); "lsr", (lsr); "asr", (asr); + "mod", (mod)] in + (try Some ((List.assoc op ops) a b) with Not_found -> None) | _ -> None) - | _ -> None (* Anything else is not constant. *) - -(* Field. In bitmatch (patterns) the type is [patt field]. In - * BITSTRING (constructor) the type is [expr field]. - *) -type 'a field = { - field : 'a; (* field ('a is either patt or expr) *) - flen : expr; (* length in bits, may be non-const *) - endian : Bitmatch.endian; (* endianness *) - signed : bool; (* true if signed, false if unsigned *) - t : t; (* type *) - _loc : Loc.t; (* location in source code *) - printer : 'a -> string; (* turn the field into a string *) -} -and t = Int | String | Bitstring + | _ -> None (* Generate a fresh, unique symbol each time called. *) let gensym = @@ -90,134 +69,119 @@ let gensym = sprintf "__pabitmatch_%s_%d" name i (* Deal with the qualifiers which appear for a field of both types. *) -let parse_field _loc field flen qs printer = - let endian, signed, t = +let parse_field _loc field qs = + let fail = locfail _loc in + + let endian_set, signed_set, type_set, offset_set, field = match qs with - | None -> (None, None, None) + | None -> (false, false, false, false, field) | Some qs -> - List.fold_left ( - fun (endian, signed, t) q -> - match q with - | "bigendian" -> - if endian <> None then - Loc.raise _loc (Failure "an endian flag has been set already") - else ( - let endian = Some Bitmatch.BigEndian in - (endian, signed, t) - ) - | "littleendian" -> - if endian <> None then - Loc.raise _loc (Failure "an endian flag has been set already") - else ( - let endian = Some Bitmatch.LittleEndian in - (endian, signed, t) - ) - | "nativeendian" -> - if endian <> None then - Loc.raise _loc (Failure "an endian flag has been set already") - else ( - let endian = Some Bitmatch.NativeEndian in - (endian, signed, t) - ) - | "signed" -> - if signed <> None then - Loc.raise _loc (Failure "a signed flag has been set already") - else ( - let signed = Some true in - (endian, signed, t) - ) - | "unsigned" -> - if signed <> None then - Loc.raise _loc (Failure "a signed flag has been set already") - else ( - let signed = Some false in - (endian, signed, t) - ) - | "int" -> - if t <> None then - Loc.raise _loc (Failure "a type flag has been set already") - else ( - let t = Some Int in - (endian, signed, t) - ) - | "string" -> - if t <> None then - Loc.raise _loc (Failure "a type flag has been set already") - else ( - let t = Some String in - (endian, signed, t) - ) - | "bitstring" -> - if t <> None then - Loc.raise _loc (Failure "a type flag has been set already") - else ( - let t = Some Bitstring in - (endian, signed, t) - ) - | s -> - Loc.raise _loc (Failure (s ^ ": unknown qualifier")) - ) (None, None, None) qs in + let check already_set msg = if already_set then fail msg in + let apply_qualifier + (endian_set, signed_set, type_set, offset_set, field) = + function + | "endian", Some expr -> + check endian_set "an endian flag has been set already"; + let field = P.set_endian_expr field expr in + (true, signed_set, type_set, offset_set, field) + | "endian", None -> + fail "qualifier 'endian' should be followed by an expression" + | "offset", Some expr -> + check offset_set "an offset has been set already"; + let field = P.set_offset field expr in + (endian_set, signed_set, type_set, true, field) + | "offset", None -> + fail "qualifier 'offset' should be followed by an expression" + | s, Some _ -> + fail (s ^ ": unknown qualifier, or qualifier should not be followed by an expression") + | qual, None -> + let endian_quals = ["bigendian", BigEndian; + "littleendian", LittleEndian; + "nativeendian", NativeEndian] in + let sign_quals = ["signed", true; "unsigned", false] in + let type_quals = ["int", P.set_type_int; + "string", P.set_type_string; + "bitstring", P.set_type_bitstring] in + if List.mem_assoc qual endian_quals then ( + check endian_set "an endian flag has been set already"; + let field = P.set_endian field (List.assoc qual endian_quals) in + (true, signed_set, type_set, offset_set, field) + ) else if List.mem_assoc qual sign_quals then ( + check signed_set "a signed flag has been set already"; + let field = P.set_signed field (List.assoc qual sign_quals) in + (endian_set, true, type_set, offset_set, field) + ) else if List.mem_assoc qual type_quals then ( + check type_set "a type flag has been set already"; + let field = List.assoc qual type_quals field in + (endian_set, signed_set, true, offset_set, field) + ) else + fail (qual ^ ": unknown qualifier, or qualifier should be followed by an expression") in + List.fold_left apply_qualifier (false, false, false, false, field) qs in (* If type is set to string or bitstring then endianness and * signedness qualifiers are meaningless and must not be set. *) - if (t = Some Bitstring || t = Some String) - && (endian <> None || signed <> None) then - Loc.raise _loc ( - Failure "string types and endian or signed qualifiers cannot be mixed" - ); - - (* Default endianness, signedness, type. *) - let endian = match endian with None -> Bitmatch.BigEndian | Some e -> e in - let signed = match signed with None -> false | Some s -> s in - let t = match t with None -> Int | Some t -> t in - - { - field = field; - flen = flen; - endian = endian; - signed = signed; - t = t; - _loc = _loc; - printer = printer; - } - -let string_of_t = function - | Int -> "int" - | String -> "string" - | Bitstring -> "bitstring" - -let patt_printer = function - | <:patt< $lid:id$ >> -> id - | _ -> "[pattern]" - -let expr_printer = function - | <:expr< $lid:id$ >> -> id - | _ -> "[expression]" - -let string_of_field { field = field; flen = flen; - endian = endian; signed = signed; t = t; - _loc = _loc; - printer = printer} = - let flen = - match expr_is_constant flen with - | Some i -> string_of_int i - | None -> "[non-const-len]" in - let endian = Bitmatch.string_of_endian endian in - let signed = if signed then "signed" else "unsigned" in - let t = string_of_t t in - let loc_fname = Loc.file_name _loc in - let loc_line = Loc.start_line _loc in - let loc_char = Loc.start_off _loc - Loc.start_bol _loc in - - sprintf "%s : %s : %s, %s, %s @ (%S, %d, %d)" - (printer field) flen t endian signed loc_fname loc_line loc_char + let () = + let t = P.get_type field in + if (t = P.Bitstring || t = P.String) && (endian_set || signed_set) then + fail "string types and endian or signed qualifiers cannot be mixed" in + + (* Default endianness, signedness, type if not set already. *) + let field = if endian_set then field else P.set_endian field BigEndian in + let field = if signed_set then field else P.set_signed field false in + let field = if type_set then field else P.set_type_int field in + + field + +(* Choose the right constructor function. *) +let build_bitmatch_call _loc funcname length endian signed = + match length, endian, signed with + (* XXX The meaning of signed/unsigned breaks down at + * 31, 32, 63 and 64 bits. + *) + | (Some 1, _, _) -> <:expr> + | (Some (2|3|4|5|6|7|8), _, sign) -> + let call = Printf.sprintf "%s_char_%s" + funcname (if sign then "signed" else "unsigned") in + <:expr< Bitmatch.$lid:call$ >> + | (len, endian, signed) -> + let t = match len with + | Some i when i <= 31 -> "int" + | Some 32 -> "int32" + | _ -> "int64" in + let sign = if signed then "signed" else "unsigned" in + match endian with + | P.ConstantEndian constant -> + let endianness = match constant with + | BigEndian -> "be" + | LittleEndian -> "le" + | NativeEndian -> "ne" in + let call = Printf.sprintf "%s_%s_%s_%s" + funcname t endianness sign in + <:expr< Bitmatch.$lid:call$ >> + | P.EndianExpr expr -> + let call = Printf.sprintf "%s_%s_%s_%s" + funcname t "ee" sign in + <:expr< Bitmatch.$lid:call$ $expr$ >> (* Generate the code for a constructor, ie. 'BITSTRING ...'. *) let output_constructor _loc fields = - let loc_fname = Loc.file_name _loc in - let loc_line = string_of_int (Loc.start_line _loc) in - let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in + (* This function makes code to raise a Bitmatch.Construct_failure exception + * containing a message and the current _loc context. + * (Thanks to Bluestorm for suggesting this). + *) + let construct_failure _loc msg = + <:expr< + Bitmatch.Construct_failure + ($`str:msg$, + $`str:Loc.file_name _loc$, + $`int:Loc.start_line _loc$, + $`int:Loc.start_off _loc - Loc.start_bol _loc$) + >> + in + let raise_construct_failure _loc msg = + <:expr< raise $construct_failure _loc msg$ >> + in (* Bitstrings are created like the 'Buffer' module (in fact, using * the Buffer module), by appending snippets to a growing buffer. @@ -233,61 +197,36 @@ let output_constructor _loc fields = (* Convert each field to a simple bitstring-generating expression. *) let fields = List.map ( - fun {field=fexpr; flen=flen; endian=endian; signed=signed; - t=t; _loc=_loc} -> + fun field -> + let fexpr = P.get_expr field in + let flen = P.get_length field in + let endian = P.get_endian field in + let signed = P.get_signed field in + let t = P.get_type field in + let _loc = P.get_location field in + let offset = P.get_offset field in + + let fail = locfail _loc in + + (* offset() not supported in constructors. Implementation of + * forward-only offsets is fairly straightforward: we would + * need to just calculate the length of padding here and add + * it to what has been constructed. For general offsets, + * including going backwards, that would require a rethink in + * how we construct bitstrings. + *) + if offset <> None then + fail "offset expressions are not supported in BITSTRING constructors"; + (* Is flen an integer constant? If so, what is it? This * is very simple-minded and only detects simple constants. *) let flen_is_const = expr_is_constant flen in - let name_of_int_construct_const = function - (* XXX As an enhancement we should allow a 64-bit-only - * mode which lets us use 'int' up to 63 bits and won't - * compile on 32-bit platforms. - *) - (* XXX The meaning of signed/unsigned breaks down at - * 31, 32, 63 and 64 bits. - *) - | (1, _, _) -> "construct_bit" - | ((2|3|4|5|6|7|8), _, false) -> "construct_char_unsigned" - | ((2|3|4|5|6|7|8), _, true) -> "construct_char_signed" - | (i, Bitmatch.BigEndian, false) when i <= 31 -> - "construct_int_be_unsigned" - | (i, Bitmatch.BigEndian, true) when i <= 31 -> - "construct_int_be_signed" - | (i, Bitmatch.LittleEndian, false) when i <= 31 -> - "construct_int_le_unsigned" - | (i, Bitmatch.LittleEndian, true) when i <= 31 -> - "construct_int_le_signed" - | (i, Bitmatch.NativeEndian, false) when i <= 31 -> - "construct_int_ne_unsigned" - | (i, Bitmatch.NativeEndian, true) when i <= 31 -> - "construct_int_ne_signed" - | (32, Bitmatch.BigEndian, false) -> "construct_int32_be_unsigned" - | (32, Bitmatch.BigEndian, true) -> "construct_int32_be_signed" - | (32, Bitmatch.LittleEndian, false) -> "construct_int32_le_unsigned" - | (32, Bitmatch.LittleEndian, true) -> "construct_int32_le_signed" - | (32, Bitmatch.NativeEndian, false) -> "construct_int32_ne_unsigned" - | (32, Bitmatch.NativeEndian, true) -> "construct_int32_ne_signed" - | (_, Bitmatch.BigEndian, false) -> "construct_int64_be_unsigned" - | (_, Bitmatch.BigEndian, true) -> "construct_int64_be_signed" - | (_, Bitmatch.LittleEndian, false) -> "construct_int64_le_unsigned" - | (_, Bitmatch.LittleEndian, true) -> "construct_int64_le_signed" - | (_, Bitmatch.NativeEndian, false) -> "construct_int64_ne_unsigned" - | (_, Bitmatch.NativeEndian, true) -> "construct_int64_ne_signed" - in - let name_of_int_construct = function - (* XXX As an enhancement we should allow users to - * specify that a field length can fit into a char/int/int32 - * (of course, this would have to be checked at runtime). - *) - | (Bitmatch.BigEndian, false) -> "construct_int64_be_unsigned" - | (Bitmatch.BigEndian, true) -> "construct_int64_be_signed" - | (Bitmatch.LittleEndian, false) -> "construct_int64_le_unsigned" - | (Bitmatch.LittleEndian, true) -> "construct_int64_le_signed" - | (Bitmatch.NativeEndian, false) -> "construct_int64_ne_unsigned" - | (Bitmatch.NativeEndian, true) -> "construct_int64_ne_signed" - in + let int_construct_const (i, endian, signed) = + build_bitmatch_call _loc "construct" (Some i) endian signed in + let int_construct (endian, signed) = + build_bitmatch_call _loc "construct" None endian signed in let expr = match t, flen_is_const with @@ -297,18 +236,16 @@ let output_constructor _loc fields = * because that's a lot simpler w.r.t. types. It might * be better to move them here. XXX *) - | Int, Some i when i > 0 && i <= 64 -> - let construct_func = - name_of_int_construct_const (i,endian,signed) in + | P.Int, Some i when i > 0 && i <= 64 -> + let construct_fn = int_construct_const (i,endian,signed) in exn_used := true; <:expr< - Bitmatch.$lid:construct_func$ $lid:buffer$ $fexpr$ $flen$ - $lid:exn$ + $construct_fn$ $lid:buffer$ $fexpr$ $`int:i$ $lid:exn$ >> - | Int, Some _ -> - Loc.raise _loc (Failure "length of int field must be [1..64]") + | P.Int, Some _ -> + fail "length of int field must be [1..64]" (* Int field, non-constant length. We need to perform a runtime * test to ensure the length is [1..64]. @@ -317,52 +254,46 @@ let output_constructor _loc fields = * because that's a lot simpler w.r.t. types. It might * be better to move them here. XXX *) - | Int, None -> - let construct_func = name_of_int_construct (endian,signed) in + | P.Int, None -> + let construct_fn = int_construct (endian,signed) in exn_used := true; <:expr< if $flen$ >= 1 && $flen$ <= 64 then - Bitmatch.$lid:construct_func$ $lid:buffer$ $fexpr$ $flen$ - $lid:exn$ + $construct_fn$ $lid:buffer$ $fexpr$ $flen$ $lid:exn$ else - raise (Bitmatch.Construct_failure - ("length of int field must be [1..64]", - $str:loc_fname$, - $int:loc_line$, $int:loc_char$)) + $raise_construct_failure _loc "length of int field must be [1..64]"$ >> (* String, constant length > 0, must be a multiple of 8. *) - | String, Some i when i > 0 && i land 7 = 0 -> + | P.String, Some i when i > 0 && i land 7 = 0 -> let bs = gensym "bs" in + let j = i lsr 3 in <:expr< let $lid:bs$ = $fexpr$ in - if String.length $lid:bs$ = ($flen$ lsr 3) then + if String.length $lid:bs$ = $`int:j$ then Bitmatch.construct_string $lid:buffer$ $lid:bs$ else - raise (Bitmatch.Construct_failure - ("length of string does not match declaration", - $str:loc_fname$, - $int:loc_line$, $int:loc_char$)) + $raise_construct_failure _loc "length of string does not match declaration"$ >> (* String, constant length -1, means variable length string * with no checks. *) - | String, Some (-1) -> + | P.String, Some (-1) -> <:expr< Bitmatch.construct_string $lid:buffer$ $fexpr$ >> (* String, constant length = 0 is probably an error, and so is * any other value. *) - | String, Some _ -> - Loc.raise _loc (Failure "length of string must be > 0 and a multiple of 8, or the special value -1") + | P.String, Some _ -> + fail "length of string must be > 0 and a multiple of 8, or the special value -1" (* String, non-constant length. * We check at runtime that the length is > 0, a multiple of 8, * and matches the declared length. *) - | String, None -> + | P.String, None -> let bslen = gensym "bslen" in let bs = gensym "bs" in <:expr< @@ -373,73 +304,51 @@ let output_constructor _loc fields = if String.length $lid:bs$ = ($lid:bslen$ lsr 3) then Bitmatch.construct_string $lid:buffer$ $lid:bs$ else - raise (Bitmatch.Construct_failure - ("length of string does not match declaration", - $str:loc_fname$, - $int:loc_line$, $int:loc_char$)) + $raise_construct_failure _loc "length of string does not match declaration"$ ) else - raise (Bitmatch.Construct_failure - ("length of string must be a multiple of 8", - $str:loc_fname$, - $int:loc_line$, $int:loc_char$)) + $raise_construct_failure _loc "length of string must be a multiple of 8"$ ) else - raise (Bitmatch.Construct_failure - ("length of string must be > 0", - $str:loc_fname$, - $int:loc_line$, $int:loc_char$)) + $raise_construct_failure _loc "length of string must be > 0"$ >> - (* Bitstring, constant length > 0. *) - | Bitstring, Some i when i > 0 -> + (* Bitstring, constant length >= 0. *) + | P.Bitstring, Some i when i >= 0 -> let bs = gensym "bs" in <:expr< let $lid:bs$ = $fexpr$ in - if Bitmatch.bitstring_length $lid:bs$ = $flen$ then + if Bitmatch.bitstring_length $lid:bs$ = $`int:i$ then Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$ else - raise (Bitmatch.Construct_failure - ("length of bitstring does not match declaration", - $str:loc_fname$, - $int:loc_line$, $int:loc_char$)) + $raise_construct_failure _loc "length of bitstring does not match declaration"$ >> (* Bitstring, constant length -1, means variable length bitstring * with no checks. *) - | Bitstring, Some (-1) -> + | P.Bitstring, Some (-1) -> <:expr< Bitmatch.construct_bitstring $lid:buffer$ $fexpr$ >> - (* Bitstring, constant length = 0 is probably an error, and so is - * any other value. - *) - | Bitstring, Some _ -> - Loc.raise _loc - (Failure - "length of bitstring must be > 0 or the special value -1") + (* Bitstring, constant length < -1 is an error. *) + | P.Bitstring, Some _ -> + fail "length of bitstring must be >= 0 or the special value -1" (* Bitstring, non-constant length. - * We check at runtime that the length is > 0 and matches + * We check at runtime that the length is >= 0 and matches * the declared length. *) - | Bitstring, None -> + | P.Bitstring, None -> let bslen = gensym "bslen" in let bs = gensym "bs" in <:expr< let $lid:bslen$ = $flen$ in - if $lid:bslen$ > 0 then ( + if $lid:bslen$ >= 0 then ( let $lid:bs$ = $fexpr$ in if Bitmatch.bitstring_length $lid:bs$ = $lid:bslen$ then Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$ else - raise (Bitmatch.Construct_failure - ("length of bitstring does not match declaration", - $str:loc_fname$, - $int:loc_line$, $int:loc_char$)) + $raise_construct_failure _loc "length of bitstring does not match declaration"$ ) else - raise (Bitmatch.Construct_failure - ("length of bitstring must be > 0", - $str:loc_fname$, - $int:loc_line$, $int:loc_char$)) + $raise_construct_failure _loc "length of bitstring must be > 0"$ >> in expr ) fields in @@ -466,11 +375,8 @@ let output_constructor _loc fields = if !exn_used then <:expr< - let $lid:exn$ = - Bitmatch.Construct_failure ("value out of range", - $str:loc_fname$, - $int:loc_line$, $int:loc_char$) in - $expr$ + let $lid:exn$ = $construct_failure _loc "value out of range"$ in + $expr$ >> else expr @@ -484,9 +390,9 @@ let output_bitmatch _loc bs cases = let result = gensym "result" in (* This generates the field extraction code for each - * field a single case. Each field must be wider than - * the minimum permitted for the type and there must be - * enough remaining data in the bitstring to satisfy it. + * field in a single case. There must be enough remaining data + * in the bitstring to satisfy the field. + * * As we go through the fields, symbols 'data', 'off' and 'len' * track our position and remaining length in the bitstring. * @@ -496,106 +402,67 @@ let output_bitmatch _loc bs cases = let rec output_field_extraction inner = function | [] -> inner | field :: fields -> - let {field=fpatt; flen=flen; endian=endian; signed=signed; - t=t; _loc=_loc} - = field in - - (* Is flen an integer constant? If so, what is it? This - * is very simple-minded and only detects simple constants. + let fpatt = P.get_patt field in + let flen = P.get_length field in + let endian = P.get_endian field in + let signed = P.get_signed field in + let t = P.get_type field in + let _loc = P.get_location field in + let offset = P.get_offset field in + + let fail = locfail _loc in + + (* Is flen (field len) an integer constant? If so, what is it? + * This will be [Some i] if it's a constant or [None] if it's + * non-constant or we couldn't determine. *) let flen_is_const = expr_is_constant flen in - let name_of_int_extract_const = function - (* XXX As an enhancement we should allow a 64-bit-only - * mode which lets us use 'int' up to 63 bits and won't - * compile on 32-bit platforms. - *) - (* XXX The meaning of signed/unsigned breaks down at - * 31, 32, 63 and 64 bits. - *) - | (1, _, _) -> "extract_bit" - | ((2|3|4|5|6|7|8), _, false) -> "extract_char_unsigned" - | ((2|3|4|5|6|7|8), _, true) -> "extract_char_signed" - | (i, Bitmatch.BigEndian, false) when i <= 31 -> - "extract_int_be_unsigned" - | (i, Bitmatch.BigEndian, true) when i <= 31 -> - "extract_int_be_signed" - | (i, Bitmatch.LittleEndian, false) when i <= 31 -> - "extract_int_le_unsigned" - | (i, Bitmatch.LittleEndian, true) when i <= 31 -> - "extract_int_le_signed" - | (i, Bitmatch.NativeEndian, false) when i <= 31 -> - "extract_int_ne_unsigned" - | (i, Bitmatch.NativeEndian, true) when i <= 31 -> - "extract_int_ne_signed" - | (32, Bitmatch.BigEndian, false) -> "extract_int32_be_unsigned" - | (32, Bitmatch.BigEndian, true) -> "extract_int32_be_signed" - | (32, Bitmatch.LittleEndian, false) -> "extract_int32_le_unsigned" - | (32, Bitmatch.LittleEndian, true) -> "extract_int32_le_signed" - | (32, Bitmatch.NativeEndian, false) -> "extract_int32_ne_unsigned" - | (32, Bitmatch.NativeEndian, true) -> "extract_int32_ne_signed" - | (_, Bitmatch.BigEndian, false) -> "extract_int64_be_unsigned" - | (_, Bitmatch.BigEndian, true) -> "extract_int64_be_signed" - | (_, Bitmatch.LittleEndian, false) -> "extract_int64_le_unsigned" - | (_, Bitmatch.LittleEndian, true) -> "extract_int64_le_signed" - | (_, Bitmatch.NativeEndian, false) -> "extract_int64_ne_unsigned" - | (_, Bitmatch.NativeEndian, true) -> "extract_int64_ne_signed" - in - let name_of_int_extract = function - (* XXX As an enhancement we should allow users to - * specify that a field length can fit into a char/int/int32 - * (of course, this would have to be checked at runtime). - *) - | (Bitmatch.BigEndian, false) -> "extract_int64_be_unsigned" - | (Bitmatch.BigEndian, true) -> "extract_int64_be_signed" - | (Bitmatch.LittleEndian, false) -> "extract_int64_le_unsigned" - | (Bitmatch.LittleEndian, true) -> "extract_int64_le_signed" - | (Bitmatch.NativeEndian, false) -> "extract_int64_ne_unsigned" - | (Bitmatch.NativeEndian, true) -> "extract_int64_ne_signed" - in + let int_extract_const (i, endian, signed) = + build_bitmatch_call _loc "extract" (Some i) endian signed in + let int_extract (endian, signed) = + build_bitmatch_call _loc "extract" None endian signed in let expr = match t, flen_is_const with (* Common case: int field, constant flen *) - | Int, Some i when i > 0 && i <= 64 -> - let extract_func = name_of_int_extract_const (i,endian,signed) in + | P.Int, Some i when i > 0 && i <= 64 -> + let extract_fn = int_extract_const (i,endian,signed) in let v = gensym "val" in <:expr< - if $lid:len$ >= $flen$ then ( + if $lid:len$ >= $`int:i$ then ( let $lid:v$, $lid:off$, $lid:len$ = - Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$ - $flen$ in + $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $`int:i$ in match $lid:v$ with $fpatt$ when true -> $inner$ | _ -> () ) >> - | Int, Some _ -> - Loc.raise _loc (Failure "length of int field must be [1..64]") + | P.Int, Some _ -> + fail "length of int field must be [1..64]" (* Int field, non-const flen. We have to test the range of * the field at runtime. If outside the range it's a no-match * (not an error). *) - | Int, None -> - let extract_func = name_of_int_extract (endian,signed) in + | P.Int, None -> + let extract_fn = int_extract (endian,signed) in let v = gensym "val" in <:expr< if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then ( let $lid:v$, $lid:off$, $lid:len$ = - Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$ - $flen$ in + $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $flen$ in match $lid:v$ with $fpatt$ when true -> $inner$ | _ -> () ) >> (* String, constant flen > 0. *) - | String, Some i when i > 0 && i land 7 = 0 -> + | P.String, Some i when i > 0 && i land 7 = 0 -> let bs = gensym "bs" in <:expr< - if $lid:len$ >= $flen$ then ( + if $lid:len$ >= $`int:i$ then ( let $lid:bs$, $lid:off$, $lid:len$ = Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$ - $flen$ in + $`int:i$ in match Bitmatch.string_of_bitstring $lid:bs$ with | $fpatt$ when true -> $inner$ | _ -> () @@ -605,7 +472,7 @@ let output_bitmatch _loc bs cases = (* String, constant flen = -1, means consume all the * rest of the input. *) - | String, Some i when i = -1 -> + | P.String, Some i when i = -1 -> let bs = gensym "bs" in <:expr< let $lid:bs$, $lid:off$, $lid:len$ = @@ -615,13 +482,13 @@ let output_bitmatch _loc bs cases = | _ -> () >> - | String, Some _ -> - Loc.raise _loc (Failure "length of string must be > 0 and a multiple of 8, or the special value -1") + | P.String, Some _ -> + fail "length of string must be > 0 and a multiple of 8, or the special value -1" (* String field, non-const flen. We check the flen is > 0 * and a multiple of 8 (-1 is not allowed here), at runtime. *) - | String, None -> + | P.String, None -> let bs = gensym "bs" in <:expr< if $flen$ >= 0 && $flen$ <= $lid:len$ @@ -639,19 +506,18 @@ let output_bitmatch _loc bs cases = * At the moment all we can do is assign the bitstring to an * identifier. *) - | Bitstring, Some i when i >= 0 -> + | P.Bitstring, Some i when i >= 0 -> let ident = match fpatt with | <:patt< $lid:ident$ >> -> ident | <:patt< _ >> -> "_" | _ -> - Loc.raise _loc - (Failure "cannot compare a bitstring to a constant") in + fail "cannot compare a bitstring to a constant" in <:expr< - if $lid:len$ >= $flen$ then ( + if $lid:len$ >= $`int:i$ then ( let $lid:ident$, $lid:off$, $lid:len$ = Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$ - $flen$ in + $`int:i$ in $inner$ ) >> @@ -659,34 +525,32 @@ let output_bitmatch _loc bs cases = (* Bitstring, constant flen = -1, means consume all the * rest of the input. *) - | Bitstring, Some i when i = -1 -> + | P.Bitstring, Some i when i = -1 -> let ident = match fpatt with | <:patt< $lid:ident$ >> -> ident | <:patt< _ >> -> "_" | _ -> - Loc.raise _loc - (Failure "cannot compare a bitstring to a constant") in + fail "cannot compare a bitstring to a constant" in <:expr< let $lid:ident$, $lid:off$, $lid:len$ = Bitmatch.extract_remainder $lid:data$ $lid:off$ $lid:len$ in $inner$ >> - | Bitstring, Some _ -> - Loc.raise _loc (Failure "length of bitstring must be >= 0 or the special value -1") + | P.Bitstring, Some _ -> + fail "length of bitstring must be >= 0 or the special value -1" (* Bitstring field, non-const flen. We check the flen is >= 0 * (-1 is not allowed here) at runtime. *) - | Bitstring, None -> + | P.Bitstring, None -> let ident = match fpatt with | <:patt< $lid:ident$ >> -> ident | <:patt< _ >> -> "_" | _ -> - Loc.raise _loc - (Failure "cannot compare a bitstring to a constant") in + fail "cannot compare a bitstring to a constant" in <:expr< if $flen$ >= 0 && $flen$ <= $lid:len$ then ( let $lid:ident$, $lid:off$, $lid:len$ = @@ -697,10 +561,124 @@ let output_bitmatch _loc bs cases = >> in + (* Computed offset: only offsets forward are supported. + * + * We try hard to optimize this based on what we know. Are + * we at a predictable offset now? (Look at the outer 'fields' + * list and see if they all have constant field length starting + * at some constant offset). Is this offset constant? + * + * Based on this we can do a lot of the computation at + * compile time, or defer it to runtime only if necessary. + * + * In all cases, the off and len fields get updated. + *) + let expr = + match offset with + | None -> expr (* common case: there was no offset expression *) + | Some offset_expr -> + (* This will be [Some i] if offset is a constant expression + * or [None] if it's a non-constant. + *) + let requested_offset = expr_is_constant offset_expr in + + (* This will be [Some i] if our current offset is known + * at compile time, or [None] if we can't determine it. + *) + let current_offset = + let has_constant_offset field = + match P.get_offset field with + | None -> false + | Some expr -> + match expr_is_constant expr with + | None -> false + | Some i -> true + in + let get_constant_offset field = + match P.get_offset field with + | None -> assert false + | Some expr -> + match expr_is_constant expr with + | None -> assert false + | Some i -> i + in + + let has_constant_len field = + match expr_is_constant (P.get_length field) with + | None -> false + | Some i when i > 0 -> true + | Some _ -> false + in + let get_constant_len field = + match expr_is_constant (P.get_length field) with + | None -> assert false + | Some i when i > 0 -> i + | Some _ -> assert false + in + + let rec loop = function + (* first field has constant offset 0 *) + | [] -> Some 0 + (* field with constant offset & length *) + | field :: _ + when has_constant_offset field && + has_constant_len field -> + Some (get_constant_offset field + get_constant_len field) + (* field with no offset & constant length *) + | field :: fields + when P.get_offset field = None && + has_constant_len field -> + (match loop fields with + | None -> None + | Some offset -> Some (offset + get_constant_len field)) + (* else, can't work out the offset *) + | _ -> None + in + loop fields in + + (* Look at the current offset and requested offset cases and + * determine what code to generate. + *) + match current_offset, requested_offset with + (* This is the good case: both the current offset and + * the requested offset are constant, so we can remove + * almost all the runtime checks. + *) + | Some current_offset, Some requested_offset -> + let move = requested_offset - current_offset in + if move < 0 then + fail (sprintf "requested offset is less than the current offset (%d < %d)" requested_offset current_offset); + (* Add some code to move the offset and length by a + * constant amount, and a runtime test that len >= 0 + * (XXX possibly the runtime test is unnecessary?) + *) + <:expr< + let $lid:off$ = $lid:off$ + $`int:move$ in + let $lid:len$ = $lid:len$ - $`int:move$ in + if $lid:len$ >= 0 then $expr$ + >> + (* In any other case, we need to use runtime checks. + * + * XXX It's not clear if a backwards move detected at runtime + * is merely a match failure, or a runtime error. At the + * moment it's just a match failure since bitmatch generally + * doesn't raise runtime errors. + *) + | _ -> + let move = gensym "move" in + <:expr< + let $lid:move$ = $offset_expr$ - $lid:off$ in + if $lid:move$ >= 0 then ( + let $lid:off$ = $lid:off$ + $lid:move$ in + let $lid:len$ = $lid:len$ - $lid:move$ in + if $lid:len$ >= 0 then $expr$ + ) + >> in (* end of computed offset code *) + (* Emit extra debugging code. *) let expr = if not debug then expr else ( - let field = string_of_field field in + let field = P.string_of_pattern_field field in <:expr< if !Bitmatch.debug then ( @@ -774,26 +752,93 @@ let output_bitmatch _loc bs cases = $int:loc_line$, $int:loc_char$)) >> +(* Add a named pattern. *) +let add_named_pattern _loc name pattern = + Hashtbl.add pattern_hash name pattern + +(* Expand a named pattern from the pattern_hash. *) +let expand_named_pattern _loc name = + try Hashtbl.find pattern_hash name + with Not_found -> + locfail _loc (sprintf "named pattern not found: %s" name) + +(* Add named patterns from a file. See the documentation on the + * directory search path in bitmatch_persistent.mli + *) +let load_patterns_from_file _loc filename = + let chan = + if Filename.is_relative filename && Filename.is_implicit filename then ( + (* Try current directory. *) + try open_in filename + with _ -> + (* Try OCaml library directory. *) + try open_in (Filename.concat Bitmatch_config.ocamllibdir filename) + with exn -> Loc.raise _loc exn + ) else ( + try open_in filename + with exn -> Loc.raise _loc exn + ) in + let names = ref [] in + (try + let rec loop () = + let name = P.named_from_channel chan in + names := name :: !names + in + loop () + with End_of_file -> () + ); + close_in chan; + let names = List.rev !names in + List.iter ( + function + | name, P.Pattern patt -> + if patt = [] then + locfail _loc (sprintf "pattern %s: no fields" name); + add_named_pattern _loc name patt + | _, P.Constructor _ -> () (* just ignore these for now *) + ) names + EXTEND Gram - GLOBAL: expr; + GLOBAL: expr str_item; + (* Qualifiers are a list of identifiers ("string", "bigendian", etc.) + * followed by an optional expression (used in certain cases). Note + * that we are careful not to declare any explicit reserved words. + *) qualifiers: [ - [ LIST0 [ q = LIDENT -> q ] SEP "," ] + [ LIST0 + [ q = LIDENT; + e = OPT [ "("; e = expr; ")" -> e ] -> (q, e) ] + SEP "," ] ]; - (* Field used in the bitmatch operator (a pattern). *) + (* Field used in the bitmatch operator (a pattern). This can actually + * return multiple fields, in the case where the 'field' is a named + * persitent pattern. + *) patt_field: [ [ fpatt = patt; ":"; len = expr LEVEL "top"; qs = OPT [ ":"; qs = qualifiers -> qs ] -> - parse_field _loc fpatt len qs patt_printer + let field = P.create_pattern_field _loc in + let field = P.set_patt field fpatt in + let field = P.set_length field len in + [parse_field _loc field qs] (* Normal, single field. *) + | ":"; name = LIDENT -> + expand_named_pattern _loc name (* Named -> list of fields. *) ] ]; (* Case inside bitmatch operator. *) - match_case: [ + patt_fields: [ [ "{"; fields = LIST0 patt_field SEP ";"; - "}"; + "}" -> + List.concat fields + ] + ]; + + patt_case: [ + [ fields = patt_fields; bind = OPT [ "as"; name = LIDENT -> name ]; whenclause = OPT [ "when"; e = expr -> e ]; "->"; code = expr -> @@ -805,7 +850,18 @@ EXTEND Gram constr_field: [ [ fexpr = expr LEVEL "top"; ":"; len = expr LEVEL "top"; qs = OPT [ ":"; qs = qualifiers -> qs ] -> - parse_field _loc fexpr len qs expr_printer + let field = P.create_constructor_field _loc in + let field = P.set_expr field fexpr in + let field = P.set_length field len in + parse_field _loc field qs + ] + ]; + + constr_fields: [ + [ "{"; + fields = LIST0 constr_field SEP ";"; + "}" -> + fields ] ]; @@ -813,16 +869,34 @@ EXTEND Gram expr: LEVEL ";" [ [ "bitmatch"; bs = expr; "with"; OPT "|"; - cases = LIST1 match_case SEP "|" -> + cases = LIST1 patt_case SEP "|" -> output_bitmatch _loc bs cases ] (* Constructor. *) - | [ "BITSTRING"; "{"; - fields = LIST0 constr_field SEP ";"; - "}" -> + | [ "BITSTRING"; + fields = constr_fields -> output_constructor _loc fields ] ]; + (* Named persistent patterns. + * + * NB: Currently only allowed at the top level. We can probably lift + * this restriction later if necessary. We only deal with patterns + * at the moment, not constructors, but the infrastructure to do + * constructors is in place. + *) + str_item: LEVEL "top" [ + [ "let"; "bitmatch"; + name = LIDENT; "="; fields = patt_fields -> + add_named_pattern _loc name fields; + (* The statement disappears, but we still need a str_item so ... *) + <:str_item< >> + | "open"; "bitmatch"; filename = STRING -> + load_patterns_from_file _loc filename; + <:str_item< >> + ] + ]; + END