From: Richard W.M. Jones Date: Tue, 1 Apr 2008 17:05:37 +0000 (+0000) Subject: Added constructors. X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=277441c3a2a9118c5da99bac9246a912860fa210;p=ocaml-bitstring.git Added constructors. --- diff --git a/Makefile b/Makefile index b6697eb..e1973c8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# $Id: Makefile,v 1.2 2008-04-01 08:56:43 rjones Exp $ +# $Id: Makefile,v 1.3 2008-04-01 17:05:37 rjones Exp $ OCAMLFIND = ocamlfind OCAMLMKLIB = ocamlmklib @@ -37,7 +37,9 @@ test: pa_bitmatch.cmo bitmatch.cma print-tests: pa_bitmatch.cmo @for f in $(TESTS); do \ echo Test: $$f.ml; \ - camlp4o pa_bitmatch.cmo -printer pr_o.cmo $$f.ml; \ + cmd="camlp4o pa_bitmatch.cmo -printer pr_o.cmo $$f.ml"; \ + echo $$cmd; \ + $$cmd; \ done print-examples: pa_bitmatch.cmo diff --git a/bitmatch.ml b/bitmatch.ml index 6985c41..0c9edc0 100644 --- a/bitmatch.ml +++ b/bitmatch.ml @@ -1,9 +1,17 @@ (* Bitmatch library. - * $Id: bitmatch.ml,v 1.4 2008-04-01 10:58:53 rjones Exp $ + * $Id: bitmatch.ml,v 1.5 2008-04-01 17:05:37 rjones Exp $ *) open Printf +(* Enable runtime debug messages. Must also have been enabled + * in pa_bitmatch.ml. + *) +let debug = ref false + +(* Exceptions. *) +exception Construct_failure of string * string * int * int + (* A bitstring is simply the data itself (as a string), and the * bitoffset and the bitlength within the string. Note offset/length * are counted in bits, not bytes. @@ -33,6 +41,8 @@ let bitstring_of_file fname = close_in chan; bs +let bitstring_length (_, _, len) = len + (*----------------------------------------------------------------------*) (* Extraction functions. * @@ -136,6 +146,15 @@ let extract_int_be_unsigned data off len flen = ) in word, off+flen, len-flen +let _make_int32_be c0 c1 c2 c3 = + Int32.logor + (Int32.logor + (Int32.logor + (Int32.shift_left c0 24) + (Int32.shift_left c1 16)) + (Int32.shift_left c2 8)) + c3 + (* Extract exactly 32 bits. We have to consider endianness and signedness. *) let extract_int32_be_unsigned data off len flen = let byteoff = off lsr 3 in @@ -146,30 +165,226 @@ let extract_int32_be_unsigned data off len flen = (* Optimize the common (byte-aligned) case. *) if off land 7 = 0 then ( let word = - Int32.add - (Int32.add - (Int32.add - (Int32.shift_left (_get_byte32 data byteoff strlen) 24) - (Int32.shift_left (_get_byte32 data (byteoff+1) strlen) 16)) - (Int32.shift_left (_get_byte32 data (byteoff+2) strlen) 8)) - (_get_byte32 data (byteoff+3) strlen) in - Int32.shift_right word (32 - flen) + let c0 = _get_byte32 data byteoff strlen in + let c1 = _get_byte32 data (byteoff+1) strlen in + let c2 = _get_byte32 data (byteoff+2) strlen in + let c3 = _get_byte32 data (byteoff+3) strlen in + _make_int32_be c0 c1 c2 c3 in + Int32.shift_right_logical word (32 - flen) ) else ( (* Extract the next 32 bits, slow method. *) let word = let c0, off, len = extract_char_unsigned data off len 8 in let c1, off, len = extract_char_unsigned data off len 8 in let c2, off, len = extract_char_unsigned data off len 8 in - let c3, off, len = extract_char_unsigned data off len 8 in - let c0 = Int32.shift_left (Int32.of_int c0) 24 in - let c1 = Int32.shift_left (Int32.of_int c1) 16 in - let c2 = Int32.shift_left (Int32.of_int c2) 8 in + let c3, _, _ = extract_char_unsigned data off len 8 in + let c0 = Int32.of_int c0 in + let c1 = Int32.of_int c1 in + let c2 = Int32.of_int c2 in let c3 = Int32.of_int c3 in - Int32.add c0 (Int32.add c1 (Int32.add c2 c3)) in - Int32.shift_right word (32 - flen) + _make_int32_be c0 c1 c2 c3 in + Int32.shift_right_logical word (32 - flen) ) in word, off+flen, len-flen +let _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 = + Int64.logor + (Int64.logor + (Int64.logor + (Int64.logor + (Int64.logor + (Int64.logor + (Int64.logor + (Int64.shift_left c0 56) + (Int64.shift_left c1 48)) + (Int64.shift_left c2 40)) + (Int64.shift_left c3 32)) + (Int64.shift_left c4 24)) + (Int64.shift_left c5 16)) + (Int64.shift_left c6 8)) + c7 + +(* Extract [1..64] bits. We have to consider endianness and signedness. *) +let extract_int64_be_unsigned data off len flen = + let byteoff = off lsr 3 in + + let strlen = String.length data in + + let word = + (* Optimize the common (byte-aligned) case. *) + if off land 7 = 0 then ( + let word = + let c0 = _get_byte64 data byteoff strlen in + let c1 = _get_byte64 data (byteoff+1) strlen in + let c2 = _get_byte64 data (byteoff+2) strlen in + let c3 = _get_byte64 data (byteoff+3) strlen in + let c4 = _get_byte64 data (byteoff+4) strlen in + let c5 = _get_byte64 data (byteoff+5) strlen in + let c6 = _get_byte64 data (byteoff+6) strlen in + let c7 = _get_byte64 data (byteoff+7) strlen in + _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 in + Int64.shift_right_logical word (64 - flen) + ) else ( + (* Extract the next 64 bits, slow method. *) + let word = + let c0, off, len = extract_char_unsigned data off len 8 in + let c1, off, len = extract_char_unsigned data off len 8 in + let c2, off, len = extract_char_unsigned data off len 8 in + let c3, off, len = extract_char_unsigned data off len 8 in + let c4, off, len = extract_char_unsigned data off len 8 in + let c5, off, len = extract_char_unsigned data off len 8 in + let c6, off, len = extract_char_unsigned data off len 8 in + let c7, _, _ = extract_char_unsigned data off len 8 in + let c0 = Int64.of_int c0 in + let c1 = Int64.of_int c1 in + let c2 = Int64.of_int c2 in + let c3 = Int64.of_int c3 in + let c4 = Int64.of_int c4 in + let c5 = Int64.of_int c5 in + let c6 = Int64.of_int c6 in + let c7 = Int64.of_int c7 in + _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 in + Int64.shift_right_logical word (64 - flen) + ) in + word, off+flen, len-flen + +(*----------------------------------------------------------------------*) +(* Constructor functions. *) + +module Buffer = struct + type t = { + buf : Buffer.t; + mutable len : int; (* Length in bits. *) + (* Last byte in the buffer (if len is not aligned). We store + * it outside the buffer because buffers aren't mutable. + *) + mutable last : int; + } + + let create () = + (* XXX We have almost enough information in the generator to + * choose a good initial size. + *) + { buf = Buffer.create 128; len = 0; last = 0 } + + let contents { buf = buf; len = len; last = last } = + let data = + if len land 7 = 0 then + Buffer.contents buf + else + Buffer.contents buf ^ (String.make 1 (Char.chr last)) in + data, 0, len + + (* Add exactly 8 bits. *) + let add_byte ({ buf = buf; len = len; last = last } as t) byte = + if byte < 0 || byte > 255 then invalid_arg "Bitmatch.Buffer.add_byte"; + let shift = len land 7 in + if shift = 0 then + (* Target buffer is byte-aligned. *) + Buffer.add_char buf (Char.chr byte) + else ( + (* Target buffer is unaligned. 'last' is meaningful. *) + let first = byte lsr shift in + let second = (byte lsl (8 - shift)) land 0xff in + Buffer.add_char buf (Char.chr (last lor first)); + t.last <- second + ); + t.len <- t.len + 8 + + (* Add exactly 1 bit. *) + let add_bit ({ buf = buf; len = len; last = last } as t) bit = + let shift = 7 - (len land 7) in + if shift > 0 then + (* Somewhere in the middle of 'last'. *) + t.last <- last lor ((if bit then 1 else 0) lsl shift) + else ( + (* Just a single spare bit in 'last'. *) + let last = last lor if bit then 1 else 0 in + Buffer.add_char buf (Char.chr last); + t.last <- 0 + ); + t.len <- len + 1 + + (* Add a small number of bits (definitely < 8). This uses a loop + * to call add_bit so it's slow. + *) + let _add_bits t c slen = + if slen < 1 || slen >= 8 then invalid_arg "Bitmatch.Buffer._add_bits"; + for i = slen-1 downto 0 do + let bit = c land (1 lsl i) <> 0 in + add_bit t bit + done + + let add_bits ({ buf = buf; len = len } as t) str slen = + if slen > 0 then ( + if len land 7 = 0 then ( + if slen land 7 = 0 then + (* Common case - everything is byte-aligned. *) + Buffer.add_substring buf str 0 (slen lsr 3) + else ( + (* Target buffer is aligned. Copy whole bytes then leave the + * remaining bits in last. + *) + let slenbytes = slen lsr 3 in + if slenbytes > 0 then Buffer.add_substring buf str 0 slenbytes; + t.last <- Char.code str.[slenbytes] lsl (8 - (slen land 7)) + ); + t.len <- len + slen + ) else ( + (* Target buffer is unaligned. Copy whole bytes using + * add_byte which knows how to deal with an unaligned + * target buffer, then call _add_bits for the remaining < 8 bits. + * + * XXX This is going to be dog-slow. + *) + let slenbytes = slen lsr 3 in + for i = 0 to slenbytes-1 do + let byte = Char.code str.[i] in + add_byte t byte + done; + _add_bits t (Char.code str.[slenbytes]) (slen - (slenbytes lsl 3)) + ); + ) +end + +(* Construct a single bit. *) +let construct_bit buf b _ = + Buffer.add_bit buf b + +(* Construct a field, flen = [2..8]. *) +let construct_char_unsigned buf v flen exn = + let max_val = 1 lsl flen in + if v < 0 || v >= max_val then raise exn; + if flen = 8 then + Buffer.add_byte buf v + else + Buffer._add_bits buf v flen + +(* Generate a mask with the lower 'bits' bits set. *) +let mask64 bits = + if bits < 63 then Int64.pred (Int64.shift_left 1L bits) + else if bits = 63 then Int64.max_int + else if bits = 64 then -1L + else invalid_arg "Bitmatch.mask64" + +(* Construct a field of up to 64 bits. *) +let construct_int64_be_unsigned buf v flen exn = + (* Check value is within range. *) + let m = Int64.lognot (mask64 flen) in + if Int64.logand v m <> 0L then raise exn; + + (* Add the bytes. *) + let rec loop v flen = + if flen > 8 then ( + loop (Int64.shift_right_logical v 8) (flen-8); + let lsb = Int64.to_int (Int64.logand v 0xffL) in + Buffer.add_byte buf lsb + ) else if flen > 0 then ( + let lsb = Int64.to_int (Int64.logand v (mask64 flen)) in + Buffer._add_bits buf lsb flen + ) + in + loop v flen (*----------------------------------------------------------------------*) (* Display functions. *) diff --git a/bitmatch.mli b/bitmatch.mli index 4f58039..d2afc30 100644 --- a/bitmatch.mli +++ b/bitmatch.mli @@ -1,7 +1,9 @@ (* Bitmatch library. - * $Id: bitmatch.mli,v 1.4 2008-04-01 10:58:53 rjones Exp $ + * $Id: bitmatch.mli,v 1.5 2008-04-01 17:05:37 rjones Exp $ *) +exception Construct_failure of string * string * int * int + type bitstring = string * int * int val empty_bitstring : bitstring @@ -16,7 +18,23 @@ val bitstring_of_file : string -> bitstring val hexdump_bitstring : out_channel -> bitstring -> unit +val bitstring_length : bitstring -> int + +module Buffer : sig + type t + val create : unit -> t + val contents : t -> bitstring + val add_bits : t -> string -> int -> unit + val add_bit : t -> bool -> unit + val add_byte : t -> int -> unit +end + +val debug : bool ref + (**/**) +(* Private functions, called from generated code. Do not use + * these directly - they are not safe. + *) val extract_bitstring : string -> int -> int -> int -> bitstring * int * int @@ -29,3 +47,11 @@ val extract_char_unsigned : string -> int -> int -> int -> int * int * int val extract_int_be_unsigned : string -> int -> int -> int -> int * int * int val extract_int32_be_unsigned : string -> int -> int -> int -> int32 * int * int + +val extract_int64_be_unsigned : string -> int -> int -> int -> int64 * int * int + +val construct_bit : Buffer.t -> bool -> int -> unit + +val construct_char_unsigned : Buffer.t -> int -> int -> exn -> unit + +val construct_int64_be_unsigned : Buffer.t -> int64 -> int -> exn -> unit diff --git a/pa_bitmatch.ml b/pa_bitmatch.ml index fc50e68..f25a49d 100644 --- a/pa_bitmatch.ml +++ b/pa_bitmatch.ml @@ -1,5 +1,5 @@ (* Bitmatch syntax extension. - * $Id: pa_bitmatch.ml,v 1.3 2008-04-01 10:05:14 rjones Exp $ + * $Id: pa_bitmatch.ml,v 1.4 2008-04-01 17:05:37 rjones Exp $ *) open Printf @@ -8,14 +8,24 @@ open Camlp4.PreCast open Syntax open Ast +(* If this is true then we emit some debugging code which can + * be useful to tell what is happening during matches. You + * also need to do 'Bitmatch.debug := true' in your main program. + * + * If this is false then no extra debugging code is emitted. + *) +let debug = true + type m = Fields of f list (* field ; field -> ... *) | Bind of string option (* _ -> ... *) and f = { - ident : string; (* field name *) + (* XXX fval should be a patt, not an expr *) + fval : expr; (* field binding or value *) flen : expr; (* length in bits, may be non-const *) endian : endian; (* endianness *) signed : bool; (* true if signed, false if unsigned *) t : t; (* type *) + _loc : Loc.t; (* location in source code *) } and endian = BigEndian | LittleEndian | NativeEndian and t = Int | Bitstring @@ -28,7 +38,7 @@ let gensym = sprintf "__pabitmatch_%s_%d" name i (* Deal with the qualifiers which appear for a field. *) -let output_field _loc name flen qs = +let parse_field _loc fval flen qs = let endian, signed, t = match qs with | None -> (None, None, None) @@ -103,13 +113,245 @@ let output_field _loc name flen qs = let t = match t with None -> Int | Some t -> t in { - ident = name; + fval = fval; flen = flen; endian = endian; signed = signed; t = t; + _loc = _loc; } +let string_of_endian = function + | BigEndian -> "bigendian" + | LittleEndian -> "littleendian" + | NativeEndian -> "nativeendian" + +let string_of_t = function + | Int -> "int" + | Bitstring -> "bitstring" + +let string_of_field { fval = fval; flen = flen; + endian = endian; signed = signed; t = t; + _loc = _loc } = + let fval = + match fval with + | <:expr< $lid:id$ >> -> id + | _ -> "[expression]" in + let flen = + match flen with + | <:expr< $int:i$ >> -> i + | _ -> "[non-const-len]" in + let endian = string_of_endian endian in + let signed = if signed then "signed" else "unsigned" in + let t = string_of_t t in + let loc_fname = Loc.file_name _loc in + let loc_line = Loc.start_line _loc in + let loc_char = Loc.start_off _loc - Loc.start_bol _loc in + + sprintf "%s : %s : %s, %s, %s @ (%S, %d, %d)" + fval flen t endian signed loc_fname loc_line loc_char + +(* Generate the code for a constructor, ie. 'BITSTRING ...'. *) +let output_constructor _loc fields = + let loc_fname = Loc.file_name _loc in + let loc_line = string_of_int (Loc.start_line _loc) in + let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in + + (* Bitstrings are created like the 'Buffer' module (in fact, using + * the Buffer module), by appending snippets to a growing buffer. + * This is reasonable efficient and avoids a lot of garbage. + *) + let buffer = gensym "buffer" in + + (* General exception which is raised inside the constructor functions + * when an int expression is out of range at runtime. + *) + let exn = gensym "exn" in + let exn_used = ref false in + + (* Convert each field to a simple bitstring-generating expression. *) + let fields = List.map ( + fun {fval=fval; flen=flen; endian=endian; signed=signed; t=t} -> + (* Is flen an integer constant? If so, what is it? This + * is very simple-minded and only detects simple constants. + *) + let flen_is_const = + match flen with + | <:expr< $int:i$ >> -> Some (int_of_string i) + | _ -> None in + + let name_of_int_construct_const = function + (* XXX As an enhancement we should allow a 64-bit-only + * mode which lets us use 'int' up to 63 bits and won't + * compile on 32-bit platforms. + *) + (* XXX The meaning of signed/unsigned breaks down at + * 31, 32, 63 and 64 bits. + *) + | (1, _, _) -> "construct_bit" + | ((2|3|4|5|6|7|8), _, false) -> "construct_char_unsigned" + | ((2|3|4|5|6|7|8), _, true) -> "construct_char_signed" + | (i, BigEndian, false) when i <= 31 -> "construct_int_be_unsigned" + | (i, BigEndian, true) when i <= 31 -> "construct_int_be_signed" + | (i, LittleEndian, false) when i <= 31 -> "construct_int_le_unsigned" + | (i, LittleEndian, true) when i <= 31 -> "construct_int_le_signed" + | (i, NativeEndian, false) when i <= 31 -> "construct_int_ne_unsigned" + | (i, NativeEndian, true) when i <= 31 -> "construct_int_ne_signed" + | (32, BigEndian, false) -> "construct_int32_be_unsigned" + | (32, BigEndian, true) -> "construct_int32_be_signed" + | (32, LittleEndian, false) -> "construct_int32_le_unsigned" + | (32, LittleEndian, true) -> "construct_int32_le_signed" + | (32, NativeEndian, false) -> "construct_int32_ne_unsigned" + | (32, NativeEndian, true) -> "construct_int32_ne_signed" + | (_, BigEndian, false) -> "construct_int64_be_unsigned" + | (_, BigEndian, true) -> "construct_int64_be_signed" + | (_, LittleEndian, false) -> "construct_int64_le_unsigned" + | (_, LittleEndian, true) -> "construct_int64_le_signed" + | (_, NativeEndian, false) -> "construct_int64_ne_unsigned" + | (_, NativeEndian, true) -> "construct_int64_ne_signed" + in + let name_of_int_construct = function + (* XXX As an enhancement we should allow users to + * specify that a field length can fit into a char/int/int32 + * (of course, this would have to be checked at runtime). + *) + | (BigEndian, false) -> "construct_int64_be_unsigned" + | (BigEndian, true) -> "construct_int64_be_signed" + | (LittleEndian, false) -> "construct_int64_le_unsigned" + | (LittleEndian, true) -> "construct_int64_le_signed" + | (NativeEndian, false) -> "construct_int64_ne_unsigned" + | (NativeEndian, true) -> "construct_int64_ne_signed" + in + + let expr = + match t, flen_is_const with + (* Common case: int field, constant flen. + * + * Range checks are done inside the construction function + * because that's a lot simpler w.r.t. types. It might + * be better to move them here. XXX + *) + | Int, Some i when i > 0 && i <= 64 -> + let construct_func = + name_of_int_construct_const (i,endian,signed) in + exn_used := true; + + <:expr< + Bitmatch.$lid:construct_func$ $lid:buffer$ $fval$ $flen$ + $lid:exn$ + >> + + | Int, Some _ -> + Loc.raise _loc (Failure "length of int field must be [1..64]") + + (* Int field, non-constant length. We need to perform a runtime + * test to ensure the length is [1..64]. + * + * Range checks are done inside the construction function + * because that's a lot simpler w.r.t. types. It might + * be better to move them here. XXX + *) + | Int, None -> + let construct_func = name_of_int_construct (endian,signed) in + exn_used := true; + + <:expr< + if $flen$ >= 1 && $flen$ <= 64 then + Bitmatch.$lid:construct_func$ $lid:buffer$ $fval$ $flen$ + $lid:exn$ + else + raise (Bitmatch.Construct_failure + ("length of int field must be [1..64]", + $str:loc_fname$, + $int:loc_line$, $int:loc_char$)) + >> + + (* Bitstring, constant length > 0. *) + | Bitstring, Some i when i > 0 -> + let bs = gensym "bs" in + <:expr< + let $lid:bs$ = $fval$ in + if Bitmatch.bitstring_length $lid:bs$ = $flen$ then + Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$ + else + raise (Bitmatch.Construct_failure + ("length of bitstring does not match declaration", + $str:loc_fname$, + $int:loc_line$, $int:loc_char$)) + >> + + (* Bitstring, constant length -1, means variable length bitstring + * with no checks. + *) + | Bitstring, Some (-1) -> + <:expr< Bitmatch.construct_bitstring $lid:buffer$ $fval$ >> + + (* Bitstring, constant length = 0 is probably an error, and so it + * any other value. + *) + | Bitstring, Some _ -> + Loc.raise _loc + (Failure + "length of bitstring must be > 0 or the special value -1") + + (* Bitstring, non-constant length. + * We check at runtime that the length is > 0 and matches + * the declared length. + *) + | Bitstring, None -> + let bslen = gensym "bslen" in + let bs = gensym "bs" in + <:expr< + let $lid:bslen$ = $flen$ in + if $lid:bslen$ > 0 then ( + let $lid:bs$ = $fval$ in + if Bitmatch.bitstring_length $lid:bs$ = $lid:bslen$ then + Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$ + else + raise (Bitmatch.Construct_failure + ("length of bitstring does not match declaration", + $str:loc_fname$, + $int:loc_line$, $int:loc_char$)) + ) else + raise (Bitmatch.Construct_failure + ("length of bitstring must be > 0", + $str:loc_fname$, + $int:loc_line$, $int:loc_char$)) + >> in + expr + ) fields in + + (* Create the final bitstring. Start by creating an empty buffer + * and then evaluate each expression above in turn which will + * append some more to the bitstring buffer. Finally extract + * the bitstring. + * + * XXX We almost have enough information to be able to guess + * a good initial size for the buffer. + *) + let fields = + match fields with + | [] -> <:expr< [] >> + | h::t -> List.fold_left (fun h t -> <:expr< $h$; $t$ >>) h t in + + let expr = + <:expr< + let $lid:buffer$ = Bitmatch.Buffer.create () in + $fields$; + Bitmatch.Buffer.contents $lid:buffer$ + >> in + + if !exn_used then + <:expr< + let $lid:exn$ = + Bitmatch.Construct_failure ("value out of range", + $str:loc_fname$, + $int:loc_line$, $int:loc_char$) in + $expr$ + >> + else + expr + (* Generate the code for a bitmatch statement. '_loc' is the * location, 'bs' is the bitstring parameter, 'cases' are * the list of cases to test against. @@ -130,8 +372,21 @@ let output_bitmatch _loc bs cases = *) let rec output_field_extraction inner = function | [] -> inner - | {ident=ident; flen=flen; endian=endian; signed=signed; t=t} :: fields -> - (* If length an integer constant? If so, what is it? This + | field :: fields -> + let {fval=fval; flen=flen; endian=endian; signed=signed; t=t} + = field in + + (* Is fval a binding (an ident) or an expression? If it's + * a binding then we will generate a binding for this field. + * If it's an expression then we will test the field against + * the expression. + *) + let fval_is_ident = + match fval with + | <:expr< $lid:id$ >> -> Some id + | _ -> None in + + (* Is flen an integer constant? If so, what is it? This * is very simple-minded and only detects simple constants. *) let flen_is_const = @@ -183,9 +438,9 @@ let output_bitmatch _loc bs cases = in let expr = - match t, flen_is_const with - (* Common case: int field, constant flen *) - | Int, Some i when i > 0 && i <= 64 -> + match t, fval_is_ident, flen_is_const with + (* Common case: int field, binding, constant flen *) + | Int, Some ident, Some i when i > 0 && i <= 64 -> let extract_func = name_of_int_extract_const (i,endian,signed) in <:expr< if $lid:len$ >= $flen$ then ( @@ -196,17 +451,32 @@ let output_bitmatch _loc bs cases = ) >> - | Int, Some _ -> + (* Int field, not a binding, constant flen *) + | Int, None, Some i when i > 0 && i <= 64 -> + let extract_func = name_of_int_extract_const (i,endian,signed) in + let v = gensym "val" in + <:expr< + if $lid:len$ >= $flen$ then ( + let $lid:v$, $lid:off$, $lid:len$ = + Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$ + $flen$ in + if $lid:v$ = $fval$ then ( + $inner$ + ) + ) + >> + + | Int, _, Some _ -> Loc.raise _loc (Failure "length of int field must be [1..64]") (* Int field, non-const flen. We have to test the range of * the field at runtime. If outside the range it's a no-match * (not an error). *) - | Int, None -> + | Int, Some ident, None -> let extract_func = name_of_int_extract (endian,signed) in <:expr< - if $flen$ >= 1 && $flen$ <= 64 && $flen$ >= $lid:len$ then ( + if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then ( let $lid:ident$, $lid:off$, $lid:len$ = Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$ $flen$ in @@ -214,8 +484,27 @@ let output_bitmatch _loc bs cases = ) >> + | Int, None, None -> + let extract_func = name_of_int_extract (endian,signed) in + let v = gensym "val" in + <:expr< + if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then ( + let $lid:v$, $lid:off$, $lid:len$ = + Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$ + $flen$ in + if $lid:v$ = $fval$ then ( + $inner$ + ) + ) + >> + + (* Can't compare bitstrings at the moment. *) + | Bitstring, None, _ -> + Loc.raise _loc + (Failure "cannot compare a bitstring to a constant") + (* Bitstring, constant flen >= 0. *) - | Bitstring, Some i when i >= 0 -> + | Bitstring, Some ident, Some i when i >= 0 -> <:expr< if $lid:len$ >= $flen$ then ( let $lid:ident$, $lid:off$, $lid:len$ = @@ -228,22 +517,22 @@ let output_bitmatch _loc bs cases = (* Bitstring, constant flen = -1, means consume all the * rest of the input. *) - | Bitstring, Some i when i = -1 -> + | Bitstring, Some ident, Some i when i = -1 -> <:expr< let $lid:ident$, $lid:off$, $lid:len$ = Bitmatch.extract_remainder $lid:data$ $lid:off$ $lid:len$ in $inner$ >> - | Bitstring, Some _ -> + | Bitstring, _, Some _ -> Loc.raise _loc (Failure "length of bitstring must be >= 0 or the special value -1") (* Bitstring field, non-const flen. We check the flen is >= 0 * (-1 is not allowed here) at runtime. *) - | Bitstring, None -> + | Bitstring, Some ident, None -> <:expr< - if $flen$ >= 0 && $lid:len$ >= $flen$ then ( + if $flen$ >= 0 && $flen$ <= $lid:len$ then ( let $lid:ident$, $lid:off$, $lid:len$ = Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$ $flen$ in @@ -252,6 +541,21 @@ let output_bitmatch _loc bs cases = >> in + (* Emit extra debugging code. *) + let expr = + if not debug then expr else ( + let field = string_of_field field in + + <:expr< + if !Bitmatch.debug then ( + Printf.eprintf "PA_BITMATCH: TEST:\n"; + Printf.eprintf " %s\n" $str:field$; + Printf.eprintf " off %d len %d\n%!" $lid:off$ $lid:len$; + ); + $expr$ + >> + ) in + output_field_extraction expr fields in @@ -359,31 +663,38 @@ EXTEND Gram ]; field: [ - [ name = LIDENT; ":"; len = expr LEVEL "top"; + [ fval = expr LEVEL "top"; ":"; len = expr LEVEL "top"; qs = OPT [ ":"; qs = qualifiers -> qs ] -> - output_field _loc name len qs + parse_field _loc fval len qs ] ]; match_case: [ - [ fields = LIST0 field SEP ";"; + [ "_"; + bind = OPT [ "as"; name = LIDENT -> name ]; w = OPT [ "when"; e = expr -> e ]; "->"; code = expr -> - (Fields fields, w, code) + (Bind bind, w, code) ] - | [ "_"; - bind = OPT [ "as"; name = LIDENT -> name ]; + | [ fields = LIST0 field SEP ";"; w = OPT [ "when"; e = expr -> e ]; "->"; code = expr -> - (Bind bind, w, code) + (Fields fields, w, code) ] ]; + (* 'bitmatch' expressions. *) expr: LEVEL ";" [ [ "bitmatch"; bs = expr; "with"; OPT "|"; cases = LIST1 match_case SEP "|" -> output_bitmatch _loc bs cases ] + + (* Constructor. *) + | [ "BITSTRING"; + fields = LIST0 field SEP ";" -> + output_constructor _loc fields + ] ]; END diff --git a/tests/.cvsignore b/tests/.cvsignore index 0f4722c..b4fbc01 100644 --- a/tests/.cvsignore +++ b/tests/.cvsignore @@ -9,4 +9,6 @@ 06_ints1 06_ints2 06_ints3 +10_constr1 +20_varsize 60_ping diff --git a/tests/10_constr1.ml b/tests/10_constr1.ml new file mode 100644 index 0000000..ba7ccc8 --- /dev/null +++ b/tests/10_constr1.ml @@ -0,0 +1,22 @@ +(* Test a simple constructor. + * $Id: 10_constr1.ml,v 1.1 2008-04-01 17:05:37 rjones Exp $ + *) + +let bits = BITSTRING 0xc : 4; 0xf : 4; 0xc : 4; 0xf : 4 ;; + +assert (bits = Bitmatch.make_bitstring 16 '\xcf') ;; + +let () = + bitmatch bits with + | n0 : 4; n1 : 4; n2 : 4; n3 : 4; + rest : -1 : bitstring -> + assert (n0 = 0xc); + assert (n1 = 0xf); + assert (n2 = 0xc); + assert (n3 = 0xf); + + let _, off, len = rest in + assert (off = 16 && len = 0) (* no further data *) + + | _ -> + failwith "error: did not match\n" diff --git a/tests/20_varsize.ml b/tests/20_varsize.ml new file mode 100644 index 0000000..5222766 --- /dev/null +++ b/tests/20_varsize.ml @@ -0,0 +1,94 @@ +(* Construct and match against random variable sized strings. + * $Id: 20_varsize.ml,v 1.1 2008-04-01 17:05:37 rjones Exp $ + *) + +open Printf + +let nr_passes = 10000 +let max_size = 8 (* max field size in bits *) + +(* let () = Bitmatch.debug := true *) + +(* Return a full 64 bits of randomness. *) +let rand64 () = + let r0 = Int64.shift_left (Int64.of_int (Random.bits ())) 34 in (* 30 bits *) + let r1 = Int64.shift_left (Int64.of_int (Random.bits ())) 4 in (* 30 bits *) + let r2 = Int64.of_int (Random.int 16) in (* 4 bits *) + Int64.logor (Int64.logor r0 r1) r2 + +(* Return unsigned mask of length bits, bits <= 64. *) +let mask64 bits = + if bits < 63 then Int64.pred (Int64.shift_left 1L bits) + else if bits = 63 then Int64.max_int + else if bits = 64 then -1L + else invalid_arg "mask64" + +(* Return a random number between 0 and 2^bits-1 where bits <= 64. *) +let rand bits = + let r = rand64 () in + let m = mask64 bits in + Int64.logand r m + +(* Dump the state in case there is an error. *) +let dump n0 n0sz n1 n1sz n2 n2sz n3 n3sz bits r0 r1 r2 r3 = + eprintf "dumping state:\n"; + eprintf " 0: %3d - %016Lx - %016Lx\n" n0sz n0 r0; + eprintf " 1: %3d - %016Lx - %016Lx\n" n1sz n1 r1; + eprintf " 2: %3d - %016Lx - %016Lx\n" n2sz n2 r2; + eprintf " 3: %3d - %016Lx - %016Lx\n" n3sz n3 r3; + eprintf "bits (length = %d):\n" (Bitmatch.bitstring_length bits); + Bitmatch.hexdump_bitstring stderr bits; + eprintf "%!" + +let () = + Random.self_init (); + + for pass = 0 to nr_passes-1 do + let n0sz = 1 + Random.int (max_size-1) in + let n0 = rand n0sz in + let n1sz = 1 + Random.int (max_size-1) in + let n1 = rand n1sz in + let n2sz = 1 + Random.int (max_size-1) in + let n2 = rand n2sz in + let n3sz = 1 + Random.int (max_size-1) in + let n3 = rand n3sz in + + (* Construct the bitstring. *) + let bits = + try + (BITSTRING + n0 : n0sz; + n1 : n1sz; + n2 : n2sz; + n3 : n3sz) + with + Bitmatch.Construct_failure (msg, _, _, _) -> + eprintf "FAILED: Construct_failure %s\n%!" msg; + dump n0 n0sz n1 n1sz n2 n2sz n3 n3sz + (Bitmatch.empty_bitstring) 0L 0L 0L 0L; + exit 2 in + + let r0, r1, r2, r3 = + bitmatch bits with + | r0 : n0sz; r1 : n1sz; r2 : n2sz; r3 : n3sz; rest : -1 : bitstring -> + let rest_len = Bitmatch.bitstring_length rest in + if rest_len <> 0 then ( + eprintf "FAILED: rest is not zero length (length = %d)\n%!" + rest_len; + dump n0 n0sz n1 n1sz n2 n2sz n3 n3sz bits 0L 0L 0L 0L; + exit 2 + ); + r0, r1, r2, r3 + | _ -> + eprintf "FAILED: bitmatch operator did not match\n%!"; + dump n0 n0sz n1 n1sz n2 n2sz n3 n3sz bits 0L 0L 0L 0L; + exit 2 in + + (*dump n0 n0sz n1 n1sz n2 n2sz n3 n3sz bits r0 r1 r2 r3;*) + + if n0 <> r0 || n1 <> r1 || n2 <> r2 || n3 <> r3 then ( + eprintf "FAILED: numbers returned from match are different\n%!"; + dump n0 n0sz n1 n1sz n2 n2sz n3 n3sz bits r0 r1 r2 r3; + exit 2 + ) + done