From: Richard W.M. Jones Date: Thu, 17 Jul 2008 11:27:13 +0000 (+0000) Subject: Implement save_to_offset() and partially implement when() and bind(). X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=573b0f1c4feb7e3b7abf29a835938367ef896867;p=ocaml-bitstring.git Implement save_to_offset() and partially implement when() and bind(). --- diff --git a/bitmatch.mli b/bitmatch.mli index cd59f95..fbecf8a 100644 --- a/bitmatch.mli +++ b/bitmatch.mli @@ -419,7 +419,9 @@ Bitmatch.hexdump_bitstring stdout bits ;; still need to be a runtime check to enforce the size). - {2:computedoffsets Computed offsets} + {2 Advanced pattern-matching features} + + {3:computedoffsets Computed offsets} You can add an [offset(..)] qualifier to bitmatch patterns in order to move the current offset within the bitstring forwards. @@ -444,6 +446,79 @@ bitmatch bits with Note that moving the offset backwards, and moving the offset in [BITSTRING] constructors, are both not supported at present. + {3 When-qualifiers} + + You can add a [when(expr)] qualifier to bitmatch patterns. + If the expression evaluates to false then the current match case fails. + + For example: +{[ +bitmatch bits with +| { field : 16 : when (field > 100) } -> ... +]} + + Note the difference between a when-qualifier and a when-clause + is that the when-clause is evaluated after all the fields have + been matched. On the other hand a when-qualifier is evaluated + after the individual field has been matched, which means it is + potentially more efficient (if the when-qualifier fails then + we don't waste any time matching later fields). + + {3 Bind expressions} + + A bind expression is used to change the value of a matched + field. For example: +{[ +bitmatch bits with +| { len : 16 : bind (len * 8); + field : len : bitstring } -> ... +]} + + In the example, after 'len' has been matched, its value would + be multiplied by 8, so the width of 'field' is the matched + value multiplied by 8. + + In the general case: +{[ +| { field : ... : bind (expr) } -> ... +]} + evaluates the following after the field has been matched: +{[ + let field = expr in + (* remaining fields *) +]} + + {3 Order of evaluation of when() and bind()} + + The choice is arbitrary, but we have chosen that when-qualifiers + are evaluated first, and bind expressions are evaluated after. + + This means that the result of bind() is {i not} available in + the when-qualifier. + + Note that this rule applies whatever order the when() and bind() + appear in the source code. + + {3 save_offset_to} + + Use [save_offset_to(variable)] to save the current bit offset + within the match to a variable (strictly speaking, to a pattern). + This variable is then made available in any [when()] and [bind()] + clauses in the current field, {i and} to any later fields, and + to the code after the [->]. + + For example: +{[ +bitmatch bits with +| { len : 16; + _ : len : bitstring; + field : 16 : save_offset_to (field_offset) } -> + printf "field is at bit offset %d in the match\n" field_offset +]} + + (In that example, [field_offset] should always have the value + [len+16]). + {2 Named patterns and persistent patterns} Please see {!Bitmatch_persistent} for documentation on this subject. diff --git a/bitmatch_persistent.ml b/bitmatch_persistent.ml index 2b1b524..3413800 100644 --- a/bitmatch_persistent.ml +++ b/bitmatch_persistent.ml @@ -40,6 +40,9 @@ type 'a field = { t : field_type; (* type *) _loc : Loc.t; (* location in source code *) offset : expr option; (* offset expression *) + when_ : expr option; (* when qualifier [patterns only] *) + bind : expr option; (* bind expression [patterns only] *) + save_offset_to : patt option; (* save_offset_to [patterns only] *) } and field_type = Int | String | Bitstring (* field type *) and endian_expr = @@ -109,7 +112,8 @@ let expr_printer = function let _string_of_field { flen = flen; endian = endian; signed = signed; t = t; _loc = _loc; - offset = offset } = + offset = offset; when_ = when_; bind = bind; + save_offset_to = save_offset_to } = let flen = match expr_is_constant flen with | Some i -> string_of_int i @@ -129,12 +133,31 @@ let _string_of_field { flen = flen; | Some i -> sprintf ", offset(%d)" i | None -> sprintf ", offset([expr])" in + let when_ = + match when_ with + | None -> "" + | Some expr -> sprintf ", when([expr])" in + + let bind = + match bind with + | None -> "" + | Some expr -> sprintf ", bind([expr])" in + + let save_offset_to = + match save_offset_to with + | None -> "" + | Some patt -> + match patt with + | <:patt< $lid:id$ >> -> sprintf ", save_offset_to(%s)" id + | _ -> sprintf ", save_offset_to([patt])" in + let loc_fname = Loc.file_name _loc in let loc_line = Loc.start_line _loc in let loc_char = Loc.start_off _loc - Loc.start_bol _loc in - sprintf "%s : %s, %s, %s%s (* %S:%d %d *)" - flen t endian signed offset loc_fname loc_line loc_char + sprintf "%s : %s, %s, %s%s%s%s%s (* %S:%d %d *)" + flen t endian signed offset when_ bind save_offset_to + loc_fname loc_line loc_char let rec string_of_pattern_field ({ field = patt } as field) = sprintf "%s : %s" (patt_printer patt) (_string_of_field field) @@ -171,6 +194,9 @@ let create_pattern_field _loc = t = Int; _loc = _loc; offset = None; + when_ = None; + bind = None; + save_offset_to = None; } let set_lident_patt field id = @@ -202,6 +228,15 @@ let set_offset_int field i = { field with offset = Some <:expr< $`int:i$ >> } let set_offset field expr = { field with offset = Some expr } let set_no_offset field = { field with offset = None } +let set_when field expr = { field with when_ = Some expr } +let set_no_when field = { field with when_ = None } +let set_bind field expr = { field with bind = Some expr } +let set_no_bind field = { field with bind = None } +let set_save_offset_to field patt = { field with save_offset_to = Some patt } +let set_save_offset_to_lident field id = + let _loc = field._loc in + { field with save_offset_to = Some <:patt< $lid:id$ >> } +let set_no_save_offset_to field = { field with save_offset_to = None } let create_constructor_field _loc = { @@ -212,6 +247,9 @@ let create_constructor_field _loc = t = Int; _loc = _loc; offset = None; + when_ = None; + bind = None; + save_offset_to = None; } let set_lident_expr field id = @@ -235,3 +273,6 @@ let get_signed field = field.signed let get_type field = field.t let get_location field = field._loc let get_offset field = field.offset +let get_when field = field.when_ +let get_bind field = field.bind +let get_save_offset_to field = field.save_offset_to diff --git a/bitmatch_persistent.mli b/bitmatch_persistent.mli index c41db5d..7374632 100644 --- a/bitmatch_persistent.mli +++ b/bitmatch_persistent.mli @@ -441,6 +441,27 @@ val set_no_offset : 'a field -> 'a field follow the previous field, or if it is the first field will be at offset zero. *) +val set_when : 'a field -> expr -> 'a field +(** Set the when-expression for a field to the given expression. *) + +val set_no_when : 'a field -> 'a field +(** Remove the when-expression from a field. *) + +val set_bind : 'a field -> expr -> 'a field +(** Set the bind-expression for a field to the given expression. *) + +val set_no_bind : 'a field -> 'a field +(** Remove the bind-expression from a field. *) + +val set_save_offset_to : 'a field -> patt -> 'a field +(** Set the save_offset_to pattern for a field to the given pattern. *) + +val set_save_offset_to_lident : 'a field -> string -> 'a field +(** Set the save_offset_to pattern for a field to identifier. *) + +val set_no_save_offset_to : 'a field -> 'a field +(** Remove the save_offset_to from a field. *) + (** {3 Create constructor fields} These fields are used in constructors ([BITSTRING]). *) @@ -507,3 +528,12 @@ val get_location : 'a field -> loc_t val get_offset : 'a field -> expr option (** Get the offset expression of a field, or [None] if there is none. *) + +val get_when : 'a field -> expr option +(** Get the when expression of a field, or [None] if there is none. *) + +val get_bind : 'a field -> expr option +(** Get the bind expression of a field, or [None] if there is none. *) + +val get_save_offset_to : 'a field -> patt option +(** Get the save_offset_to pattern of a field, or [None] if there is none. *) diff --git a/pa_bitmatch.ml b/pa_bitmatch.ml index c05ffde..58290ca 100644 --- a/pa_bitmatch.ml +++ b/pa_bitmatch.ml @@ -69,30 +69,65 @@ let gensym = incr i; let i = !i in sprintf "__pabitmatch_%s_%d" name i +(* Used to keep track of which qualifiers we've seen in parse_field. *) +type whatset_t = { + endian_set : bool; signed_set : bool; type_set : bool; + offset_set : bool; when_set : bool; bind_set : bool; + save_offset_to_set : bool; +} +let noneset = { + endian_set = false; signed_set = false; type_set = false; + offset_set = false; when_set = false; bind_set = false; + save_offset_to_set = false +} + (* Deal with the qualifiers which appear for a field of both types. *) let parse_field _loc field qs = let fail = locfail _loc in - let endian_set, signed_set, type_set, offset_set, field = + let whatset, field = match qs with - | None -> (false, false, false, false, field) + | None -> noneset, field | Some qs -> let check already_set msg = if already_set then fail msg in - let apply_qualifier - (endian_set, signed_set, type_set, offset_set, field) = + let apply_qualifier (whatset, field) = function | "endian", Some expr -> - check endian_set "an endian flag has been set already"; + check whatset.endian_set "an endian flag has been set already"; let field = P.set_endian_expr field expr in - (true, signed_set, type_set, offset_set, field) + { whatset with endian_set = true }, field | "endian", None -> fail "qualifier 'endian' should be followed by an expression" | "offset", Some expr -> - check offset_set "an offset has been set already"; + check whatset.offset_set "an offset has been set already"; let field = P.set_offset field expr in - (endian_set, signed_set, type_set, true, field) + { whatset with offset_set = true }, field | "offset", None -> fail "qualifier 'offset' should be followed by an expression" + | "when", Some expr -> + check whatset.when_set "a when-qualifier has been set already"; + let field = P.set_when field expr in + { whatset with when_set = true }, field + | "when", None -> + fail "qualifier 'when' should be followed by an expression" + | "bind", Some expr -> + check whatset.bind_set "a bind expression has been set already"; + let field = P.set_bind field expr in + { whatset with bind_set = true }, field + | "bind", None -> + fail "qualifier 'bind' should be followed by an expression" + | "save_offset_to", Some expr (* XXX should be a pattern *) -> + check whatset.save_offset_to_set + "a save_offset_to-qualifier has been set already"; + let id = + match expr with + | <:expr< $lid:id$ >> -> id + | _ -> + failwith "pa_bitmatch: internal error: save_offset_to only supports simple identifiers at the moment. In future we should support full patterns." in + let field = P.set_save_offset_to_lident field id in + { whatset with save_offset_to_set = true }, field + | "save_offset_to", None -> + fail "qualifier 'save_offset_to' should be followed by a binding expression" | s, Some _ -> fail (s ^ ": unknown qualifier, or qualifier should not be followed by an expression") | qual, None -> @@ -104,33 +139,37 @@ let parse_field _loc field qs = "string", P.set_type_string; "bitstring", P.set_type_bitstring] in if List.mem_assoc qual endian_quals then ( - check endian_set "an endian flag has been set already"; + check whatset.endian_set "an endian flag has been set already"; let field = P.set_endian field (List.assoc qual endian_quals) in - (true, signed_set, type_set, offset_set, field) + { whatset with endian_set = true }, field ) else if List.mem_assoc qual sign_quals then ( - check signed_set "a signed flag has been set already"; + check whatset.signed_set "a signed flag has been set already"; let field = P.set_signed field (List.assoc qual sign_quals) in - (endian_set, true, type_set, offset_set, field) + { whatset with signed_set = true }, field ) else if List.mem_assoc qual type_quals then ( - check type_set "a type flag has been set already"; - let field = List.assoc qual type_quals field in - (endian_set, signed_set, true, offset_set, field) + check whatset.type_set "a type flag has been set already"; + let field = (List.assoc qual type_quals) field in + { whatset with type_set = true }, field ) else fail (qual ^ ": unknown qualifier, or qualifier should be followed by an expression") in - List.fold_left apply_qualifier (false, false, false, false, field) qs in + List.fold_left apply_qualifier (noneset, field) qs in (* If type is set to string or bitstring then endianness and * signedness qualifiers are meaningless and must not be set. *) let () = let t = P.get_type field in - if (t = P.Bitstring || t = P.String) && (endian_set || signed_set) then - fail "string types and endian or signed qualifiers cannot be mixed" in + if (t = P.Bitstring || t = P.String) && + (whatset.endian_set || whatset.signed_set) then + fail "string types and endian or signed qualifiers cannot be mixed" in (* Default endianness, signedness, type if not set already. *) - let field = if endian_set then field else P.set_endian field BigEndian in - let field = if signed_set then field else P.set_signed field false in - let field = if type_set then field else P.set_type_int field in + let field = + if whatset.endian_set then field else P.set_endian field BigEndian in + let field = + if whatset.signed_set then field else P.set_signed field false in + let field = + if whatset.type_set then field else P.set_type_int field in field @@ -140,7 +179,7 @@ let build_bitmatch_call _loc funcname length endian signed = (* XXX The meaning of signed/unsigned breaks down at * 31, 32, 63 and 64 bits. *) - | (Some 1, _, _) -> <:expr> + | (Some 1, _, _) -> <:expr< Bitmatch.$lid:funcname ^ "_bit"$ >> | (Some (2|3|4|5|6|7|8), _, sign) -> let call = Printf.sprintf "%s_char_%s" funcname (if sign then "signed" else "unsigned") in @@ -205,19 +244,26 @@ let output_constructor _loc fields = let signed = P.get_signed field in let t = P.get_type field in let _loc = P.get_location field in - let offset = P.get_offset field in let fail = locfail _loc in - (* offset() not supported in constructors. Implementation of - * forward-only offsets is fairly straightforward: we would - * need to just calculate the length of padding here and add - * it to what has been constructed. For general offsets, - * including going backwards, that would require a rethink in - * how we construct bitstrings. + (* offset(), when(), bind(), save_offset_to() not supported in + * constructors. + * + * Implementation of forward-only offsets is fairly + * straightforward: we would need to just calculate the length of + * padding here and add it to what has been constructed. For + * general offsets, including going backwards, that would require + * a rethink in how we construct bitstrings. *) - if offset <> None then + if P.get_offset field <> None then fail "offset expressions are not supported in BITSTRING constructors"; + if P.get_when field <> None then + fail "when expressions are not supported in BITSTRING constructors"; + if P.get_bind field <> None then + fail "bind expressions are not supported in BITSTRING constructors"; + if P.get_save_offset_to field <> None then + fail "save_offset_to is not supported in BITSTRING constructors"; (* Is flen an integer constant? If so, what is it? This * is very simple-minded and only detects simple constants. @@ -227,7 +273,7 @@ let output_constructor _loc fields = let int_construct_const (i, endian, signed) = build_bitmatch_call _loc "construct" (Some i) endian signed in let int_construct (endian, signed) = - build_bitmatch_call _loc "construct" None endian signed in + build_bitmatch_call _loc "construct" None endian signed in let expr = match t, flen_is_const with @@ -427,7 +473,6 @@ let output_bitmatch _loc bs cases = let signed = P.get_signed field in let t = P.get_type field in let _loc = P.get_location field in - let offset = P.get_offset field in let fail = locfail _loc in @@ -437,10 +482,10 @@ let output_bitmatch _loc bs cases = *) let flen_is_const = expr_is_constant flen in - let int_extract_const (i, endian, signed) = - build_bitmatch_call _loc "extract" (Some i) endian signed in - let int_extract (endian, signed) = - build_bitmatch_call _loc "extract" None endian signed in + let int_extract_const (i, endian, signed) = + build_bitmatch_call _loc "extract" (Some i) endian signed in + let int_extract (endian, signed) = + build_bitmatch_call _loc "extract" None endian signed in let expr = match t, flen_is_const with @@ -593,7 +638,7 @@ let output_bitmatch _loc bs cases = * In all cases, the off and len fields get updated. *) let expr = - match offset with + match P.get_offset field with | None -> expr (* common case: there was no offset expression *) | Some offset_expr -> (* This will be [Some i] if offset is a constant expression @@ -695,6 +740,16 @@ let output_bitmatch _loc bs cases = ) >> in (* end of computed offset code *) + (* save_offset_to(patt) saves the current offset into a variable. *) + let expr = + match P.get_save_offset_to field with + | None -> expr (* no save_offset_to *) + | Some patt -> + <:expr< + let $patt$ = $lid:off$ - $lid:original_off$ in + $expr$ + >> in + (* Emit extra debugging code. *) let expr = if not debug then expr else ( diff --git a/tests/65_save_offset_to.ml b/tests/65_save_offset_to.ml new file mode 100644 index 0000000..27c17e4 --- /dev/null +++ b/tests/65_save_offset_to.ml @@ -0,0 +1,47 @@ +(* Test save_offset_to. + * $Id$ + *) + +open Printf +open Bitmatch + +let make_bits p i n j m k = ( + let pad0 = ones_bitstring p in + let pad1 = ones_bitstring (n-8) in + let pad2 = ones_bitstring (m-n-8) in + BITSTRING { + pad0 : p : bitstring; (* will be skipped below *) + i : 8; + pad1 : n-8 : bitstring; + j : 8; (* this should be at offset(n) *) + pad2 : m-n-8 : bitstring; + k : 8 (* this should be at offset(m) *) + } +) + +let test_bits bits p i n j m k = + (* Skip the 'p' padding bits so the match starts at a non-zero offset. *) + let bits = dropbits p bits in + + bitmatch bits with + | { i' : 8; + _ : n-8 : bitstring; + j' : 8 : save_offset_to (j_offset); + _ : m-n-8 : bitstring; + k' : 8 : save_offset_to (k_offset) } + when i = i' && j = j' && k = k' && j_offset = n && k_offset = m -> + () (* ok *) + | { _ } -> + failwith (sprintf + "65_save_offset_to: test_bits: failed %d %d %d %d %d %d" + p i n j m k) + +let () = + for p = 0 to 4 do + for n = 8 to 64 do + for m = n+8 to 128 do + List.iter (fun (i,j,k) -> test_bits (make_bits p i n j m k) p i n j m k) + [0x55, 0xaa, 0x33; 0x33, 0xaa, 0x55; 0x12, 0x34, 0x56] + done; + done; + done