From 161ecfb3d3c9aa5c934a956cec1bf206029a9c41 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Wed, 21 May 2008 08:59:40 +0000 Subject: [PATCH] Endianness expressions. --- TODO | 9 +- bitmatch.ml | 45 ++++++- bitmatch.mli | 34 ++++-- examples/libpcap.ml | 71 ++++------- pa_bitmatch.ml | 317 ++++++++++++++++++++++++++++++------------------- tests/40_endianexpr.ml | 35 ++++++ 6 files changed, 331 insertions(+), 180 deletions(-) create mode 100644 tests/40_endianexpr.ml diff --git a/TODO b/TODO index 4473944..74551a6 100644 --- a/TODO +++ b/TODO @@ -36,12 +36,13 @@ Major to-do items. (10) Cross-module, persistent, named patterns, see: http://caml.inria.fr/pub/ml-archives/caml-list/2008/04/25992c9c9fa999fe1d35d961dd9917a2.en.html -(11) Runtime endiannness expressions. The suggested syntax is: +(11) DONE - + Runtime endiannness expressions. The suggested syntax is: - { field : len : endianness(expr) } + { field : len : endian (expr) } - where expr would evaluate to something like `BigEndian or - `LittleEndian. + where expr would evaluate to something like BigEndian or + LittleEndian. There are several protocols around where endianness is only determined at runtime, examples are libpcap and TIFF. diff --git a/bitmatch.ml b/bitmatch.ml index 1d4b30f..e382504 100644 --- a/bitmatch.ml +++ b/bitmatch.ml @@ -420,6 +420,11 @@ let extract_int_ne_unsigned = then extract_int_be_unsigned else extract_int_le_unsigned +let extract_int_ee_unsigned = function + | BigEndian -> extract_int_be_unsigned + | LittleEndian -> extract_int_le_unsigned + | NativeEndian -> extract_int_ne_unsigned + let _make_int32_be c0 c1 c2 c3 = Int32.logor (Int32.logor @@ -480,6 +485,11 @@ let extract_int32_ne_unsigned = then extract_int32_be_unsigned else extract_int32_le_unsigned +let extract_int32_ee_unsigned = function + | BigEndian -> extract_int32_be_unsigned + | LittleEndian -> extract_int32_le_unsigned + | NativeEndian -> extract_int32_ne_unsigned + let _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 = Int64.logor (Int64.logor @@ -592,6 +602,11 @@ let extract_int64_ne_unsigned = then extract_int64_be_unsigned else extract_int64_le_unsigned +let extract_int64_ee_unsigned = function + | BigEndian -> extract_int64_be_unsigned + | LittleEndian -> extract_int64_le_unsigned + | NativeEndian -> extract_int64_ne_unsigned + (*----------------------------------------------------------------------*) (* Constructor functions. *) @@ -726,6 +741,12 @@ let construct_int_ne_unsigned = else (*construct_int_le_unsigned*) fun _ _ _ _ -> failwith "construct_int_le_unsigned" +let construct_int_ee_unsigned = function + | BigEndian -> construct_int_be_unsigned + | LittleEndian -> (*construct_int_le_unsigned*) + (fun _ _ _ _ -> failwith "construct_int_le_unsigned") + | NativeEndian -> construct_int_ne_unsigned + (* Construct a field of exactly 32 bits. *) let construct_int32_be_unsigned buf v flen _ = Buffer.add_byte buf @@ -737,11 +758,25 @@ let construct_int32_be_unsigned buf v flen _ = Buffer.add_byte buf (Int32.to_int (Int32.logand v 0xff_l)) +let construct_int32_le_unsigned buf v flen _ = + Buffer.add_byte buf + (Int32.to_int (Int32.logand v 0xff_l)); + Buffer.add_byte buf + (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 8) 0xff_l))); + Buffer.add_byte buf + (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 16) 0xff_l))); + Buffer.add_byte buf + (Int32.to_int (Int32.shift_right_logical v 24)) + let construct_int32_ne_unsigned = if nativeendian = BigEndian then construct_int32_be_unsigned - else (*construct_int32_le_unsigned*) - fun _ _ _ _ -> failwith "construct_int32_le_unsigned" + else construct_int32_le_unsigned + +let construct_int32_ee_unsigned = function + | BigEndian -> construct_int32_be_unsigned + | LittleEndian -> construct_int32_le_unsigned + | NativeEndian -> construct_int32_ne_unsigned (* Construct a field of up to 64 bits. *) let construct_int64_be_unsigned buf v flen exn = @@ -756,6 +791,12 @@ let construct_int64_ne_unsigned = else (*construct_int64_le_unsigned*) fun _ _ _ _ -> failwith "construct_int64_le_unsigned" +let construct_int64_ee_unsigned = function + | BigEndian -> construct_int64_be_unsigned + | LittleEndian -> (*construct_int64_le_unsigned*) + (fun _ _ _ _ -> failwith "construct_int64_le_unsigned") + | NativeEndian -> construct_int64_ne_unsigned + (* Construct from a string of bytes, exact multiple of 8 bits * in length of course. *) diff --git a/bitmatch.mli b/bitmatch.mli index c4aacc5..0777c28 100644 --- a/bitmatch.mli +++ b/bitmatch.mli @@ -290,17 +290,21 @@ bitmatch bits with A bitstring field of length 0 matches an empty bitstring (occasionally useful when matching optional subfields). - Qualifiers are a list of identifiers which control the type, + Qualifiers are a list of identifiers/expressions which control the type, signedness and endianness of the field. Permissible qualifiers are: - - [int] (field has an integer type) - - [string] (field is a string type) - - [bitstring] (field is a bitstring type) - - [signed] (field is signed) - - [unsigned] (field is unsigned) - - [bigendian] (field is big endian - a.k.a network byte order) - - [littleendian] (field is little endian - a.k.a Intel byte order) - - [nativeendian] (field is same endianness as the machine) + - [int]: field has an integer type + - [string]: field is a string type + - [bitstring]: field is a bitstring type + - [signed]: field is signed + - [unsigned]: field is unsigned + - [bigendian]: field is big endian - a.k.a network byte order + - [littleendian]: field is little endian - a.k.a Intel byte order + - [nativeendian]: field is same endianness as the machine + - [endian (expr)]: [expr] should be an expression which evaluates to + a {!endian} type, ie. [LittleEndian], [BigEndian] or [NativeEndian]. + The expression is an arbitrary OCaml expression and can use the + value of earlier fields in the bitmatch. The default settings are [int], [unsigned], [bigendian]. @@ -719,18 +723,24 @@ val extract_int_le_unsigned : string -> int -> int -> int -> int * int * int val extract_int_ne_unsigned : string -> int -> int -> int -> int * int * int +val extract_int_ee_unsigned : endian -> string -> int -> int -> int -> int * int * int + val extract_int32_be_unsigned : string -> int -> int -> int -> int32 * int * int val extract_int32_le_unsigned : string -> int -> int -> int -> int32 * int * int val extract_int32_ne_unsigned : string -> int -> int -> int -> int32 * int * int +val extract_int32_ee_unsigned : endian -> string -> int -> int -> int -> int32 * int * int + val extract_int64_be_unsigned : string -> int -> int -> int -> int64 * int * int val extract_int64_le_unsigned : string -> int -> int -> int -> int64 * int * int val extract_int64_ne_unsigned : string -> int -> int -> int -> int64 * int * int +val extract_int64_ee_unsigned : endian -> string -> int -> int -> int -> int64 * int * int + val construct_bit : Buffer.t -> bool -> int -> exn -> unit val construct_char_unsigned : Buffer.t -> int -> int -> exn -> unit @@ -739,12 +749,18 @@ val construct_int_be_unsigned : Buffer.t -> int -> int -> exn -> unit val construct_int_ne_unsigned : Buffer.t -> int -> int -> exn -> unit +val construct_int_ee_unsigned : endian -> Buffer.t -> int -> int -> exn -> unit + val construct_int32_be_unsigned : Buffer.t -> int32 -> int -> exn -> unit val construct_int32_ne_unsigned : Buffer.t -> int32 -> int -> exn -> unit +val construct_int32_ee_unsigned : endian -> Buffer.t -> int32 -> int -> exn -> unit + val construct_int64_be_unsigned : Buffer.t -> int64 -> int -> exn -> unit val construct_int64_ne_unsigned : Buffer.t -> int64 -> int -> exn -> unit +val construct_int64_ee_unsigned : endian -> Buffer.t -> int64 -> int -> exn -> unit + val construct_string : Buffer.t -> string -> unit diff --git a/examples/libpcap.ml b/examples/libpcap.ml index 862a7dc..229f6bc 100644 --- a/examples/libpcap.ml +++ b/examples/libpcap.ml @@ -9,9 +9,7 @@ * The file format is documented here: * http://wiki.wireshark.org/Development/LibpcapFileFormat * - * libpcap endianness is determined at runtime. Currently we don't - * handle this well - we need to write the match code out twice. - * Runtime endianness setting will solve this. See TODO list, item 11. + * libpcap endianness is determined at runtime. *) open Printf @@ -31,59 +29,40 @@ let rec main () = with End_of_file -> () +(* Determine the endianness (at runtime) from the magic number. *) +and endian_of = function + | 0xa1b2c3d4_l -> Bitmatch.BigEndian + | 0xd4c3b2a1_l -> Bitmatch.LittleEndian + | _ -> assert false + and libpcap_header bits = bitmatch bits with - | { 0xd4c3b2a1_l : 32; (* writer was little endian *) - major : 16 : littleendian; (* version *) - minor : 16 : littleendian; - timezone : 32 : littleendian; (* timezone correction (seconds) *) - _ : 32 : littleendian; (* always 0 apparently *) - snaplen : 32 : littleendian; (* max length of captured packets *) - network : 32 : littleendian; (* data link layer type *) + | { ((0xa1b2c3d4_l|0xd4c3b2a1_l) as magic) : 32; (* magic number *) + major : 16 : endian (endian_of magic); (* version *) + minor : 16 : endian (endian_of magic); + timezone : 32 : endian (endian_of magic); (* timezone correction (secs)*) + _ : 32 : endian (endian_of magic); (* always 0 apparently *) + snaplen : 32 : endian (endian_of magic); (* max length of capt pckts *) + network : 32 : endian (endian_of magic); (* data link layer type *) rest : -1 : bitstring } -> - Bitmatch.LittleEndian, (major, minor, timezone, snaplen, network), rest - - | { 0xa1b2c3d4_l : 32; (* writer was big endian *) - major : 16; (* version *) - minor : 16; - timezone : 32; (* timezone correction (seconds) *) - _ : 32; (* always 0 apparently *) - snaplen : 32; (* max length of captured packets *) - network : 32; (* data link layer type *) - rest : -1 : bitstring - } -> - Bitmatch.BigEndian, (major, minor, timezone, snaplen, network), rest + endian_of magic, (major, minor, timezone, snaplen, network), rest | { _ } -> failwith "not a libpcap/tcpdump packet capture file" -and libpcap_packet endian file_header bits = - if endian = Bitmatch.LittleEndian then ( - bitmatch bits with - | { ts_sec : 32 : littleendian; (* packet timestamp seconds *) - ts_usec : 32 : littleendian; (* packet timestamp microseconds *) - incl_len : 32 : littleendian; (* packet length saved in this file *) - orig_len : 32 : littleendian; (* packet length originally on wire *) - pkt_data : Int32.to_int incl_len*8 : bitstring; - rest : -1 : bitstring - } -> - (ts_sec, ts_usec, incl_len, orig_len), pkt_data, rest - - | { _ } -> raise End_of_file - ) else ( - bitmatch bits with - | { ts_sec : 32; (* packet timestamp seconds *) - ts_usec : 32; (* packet timestamp microseconds *) - incl_len : 32; (* packet length saved in this file *) - orig_len : 32; (* packet length originally on wire *) - pkt_data : Int32.to_int incl_len*8 : bitstring; - rest : -1 : bitstring - } -> - (ts_sec, ts_usec, incl_len, orig_len), pkt_data, rest +and libpcap_packet e file_header bits = + bitmatch bits with + | { ts_sec : 32 : endian (e); (* packet timestamp seconds *) + ts_usec : 32 : endian (e); (* packet timestamp microseconds *) + incl_len : 32 : endian (e); (* packet length saved in this file *) + orig_len : 32 : endian (e); (* packet length originally on wire *) + pkt_data : Int32.to_int incl_len*8 : bitstring; + rest : -1 : bitstring + } -> + (ts_sec, ts_usec, incl_len, orig_len), pkt_data, rest | { _ } -> raise End_of_file - ) and decode_and_print_packet file_header pkt_header pkt_data = let (ts_sec, ts_usec, _, orig_len) = pkt_header in diff --git a/pa_bitmatch.ml b/pa_bitmatch.ml index d8698e2..fdd6082 100644 --- a/pa_bitmatch.ml +++ b/pa_bitmatch.ml @@ -24,6 +24,8 @@ open Camlp4.PreCast open Syntax open Ast +open Bitmatch + (* If this is true then we emit some debugging code which can * be useful to tell what is happening during matches. You * also need to do 'Bitmatch.debug := true' in your main program. @@ -74,13 +76,16 @@ let rec expr_is_constant = function type 'a field = { field : 'a; (* field ('a is either patt or expr) *) flen : expr; (* length in bits, may be non-const *) - endian : Bitmatch.endian; (* endianness *) + endian : endian_expr; (* endianness *) signed : bool; (* true if signed, false if unsigned *) t : t; (* type *) _loc : Loc.t; (* location in source code *) printer : 'a -> string; (* turn the field into a string *) } -and t = Int | String | Bitstring +and t = Int | String | Bitstring (* field type *) +and endian_expr = + | ConstantEndian of endian (* a constant little/big/nativeendian *) + | EndianExpr of expr (* an endian expression *) (* Generate a fresh, unique symbol each time called. *) let gensym = @@ -96,66 +101,75 @@ let parse_field _loc field flen qs printer = | None -> (None, None, None) | Some qs -> List.fold_left ( - fun (endian, signed, t) q -> - match q with - | "bigendian" -> + fun (endian, signed, t) qual_expr -> + match qual_expr with + | "bigendian", None -> if endian <> None then Loc.raise _loc (Failure "an endian flag has been set already") else ( - let endian = Some Bitmatch.BigEndian in + let endian = Some (ConstantEndian BigEndian) in (endian, signed, t) ) - | "littleendian" -> + | "littleendian", None -> if endian <> None then Loc.raise _loc (Failure "an endian flag has been set already") else ( - let endian = Some Bitmatch.LittleEndian in + let endian = Some (ConstantEndian LittleEndian) in (endian, signed, t) ) - | "nativeendian" -> + | "nativeendian", None -> if endian <> None then Loc.raise _loc (Failure "an endian flag has been set already") else ( - let endian = Some Bitmatch.NativeEndian in + let endian = Some (ConstantEndian NativeEndian) in (endian, signed, t) ) - | "signed" -> + | "endian", Some expr -> + if endian <> None then + Loc.raise _loc (Failure "an endian flag has been set already") + else ( + let endian = Some (EndianExpr expr) in + (endian, signed, t) + ) + | "signed", None -> if signed <> None then Loc.raise _loc (Failure "a signed flag has been set already") else ( let signed = Some true in (endian, signed, t) ) - | "unsigned" -> + | "unsigned", None -> if signed <> None then Loc.raise _loc (Failure "a signed flag has been set already") else ( let signed = Some false in (endian, signed, t) ) - | "int" -> + | "int", None -> if t <> None then Loc.raise _loc (Failure "a type flag has been set already") else ( let t = Some Int in (endian, signed, t) ) - | "string" -> + | "string", None -> if t <> None then Loc.raise _loc (Failure "a type flag has been set already") else ( let t = Some String in (endian, signed, t) ) - | "bitstring" -> + | "bitstring", None -> if t <> None then Loc.raise _loc (Failure "a type flag has been set already") else ( let t = Some Bitstring in (endian, signed, t) ) - | s -> - Loc.raise _loc (Failure (s ^ ": unknown qualifier")) + | s, Some _ -> + Loc.raise _loc (Failure (s ^ ": unknown qualifier, or qualifier should not be followed by an expression")) + | s, None -> + Loc.raise _loc (Failure (s ^ ": unknown qualifier, or qualifier should be followed by an expression")) ) (None, None, None) qs in (* If type is set to string or bitstring then endianness and @@ -168,7 +182,8 @@ let parse_field _loc field flen qs printer = ); (* Default endianness, signedness, type. *) - let endian = match endian with None -> Bitmatch.BigEndian | Some e -> e in + let endian = + match endian with None -> ConstantEndian BigEndian | Some e -> e in let signed = match signed with None -> false | Some s -> s in let t = match t with None -> Int | Some t -> t in @@ -203,7 +218,10 @@ let string_of_field { field = field; flen = flen; match expr_is_constant flen with | Some i -> string_of_int i | None -> "[non-const-len]" in - let endian = Bitmatch.string_of_endian endian in + let endian = + match endian with + | ConstantEndian endian -> string_of_endian endian + | EndianExpr _ -> "endian [expr]" in let signed = if signed then "signed" else "unsigned" in let t = string_of_t t in let loc_fname = Loc.file_name _loc in @@ -240,53 +258,83 @@ let output_constructor _loc fields = *) let flen_is_const = expr_is_constant flen in - let name_of_int_construct_const = function - (* XXX As an enhancement we should allow a 64-bit-only - * mode which lets us use 'int' up to 63 bits and won't - * compile on 32-bit platforms. - *) + (* Choose the right constructor function. *) + let int_construct_const = function (* XXX The meaning of signed/unsigned breaks down at * 31, 32, 63 and 64 bits. *) - | (1, _, _) -> "construct_bit" - | ((2|3|4|5|6|7|8), _, false) -> "construct_char_unsigned" - | ((2|3|4|5|6|7|8), _, true) -> "construct_char_signed" - | (i, Bitmatch.BigEndian, false) when i <= 31 -> - "construct_int_be_unsigned" - | (i, Bitmatch.BigEndian, true) when i <= 31 -> - "construct_int_be_signed" - | (i, Bitmatch.LittleEndian, false) when i <= 31 -> - "construct_int_le_unsigned" - | (i, Bitmatch.LittleEndian, true) when i <= 31 -> - "construct_int_le_signed" - | (i, Bitmatch.NativeEndian, false) when i <= 31 -> - "construct_int_ne_unsigned" - | (i, Bitmatch.NativeEndian, true) when i <= 31 -> - "construct_int_ne_signed" - | (32, Bitmatch.BigEndian, false) -> "construct_int32_be_unsigned" - | (32, Bitmatch.BigEndian, true) -> "construct_int32_be_signed" - | (32, Bitmatch.LittleEndian, false) -> "construct_int32_le_unsigned" - | (32, Bitmatch.LittleEndian, true) -> "construct_int32_le_signed" - | (32, Bitmatch.NativeEndian, false) -> "construct_int32_ne_unsigned" - | (32, Bitmatch.NativeEndian, true) -> "construct_int32_ne_signed" - | (_, Bitmatch.BigEndian, false) -> "construct_int64_be_unsigned" - | (_, Bitmatch.BigEndian, true) -> "construct_int64_be_signed" - | (_, Bitmatch.LittleEndian, false) -> "construct_int64_le_unsigned" - | (_, Bitmatch.LittleEndian, true) -> "construct_int64_le_signed" - | (_, Bitmatch.NativeEndian, false) -> "construct_int64_ne_unsigned" - | (_, Bitmatch.NativeEndian, true) -> "construct_int64_ne_signed" + | (1, _, _) -> + <:expr> + | ((2|3|4|5|6|7|8), _, false) -> + <:expr> + | ((2|3|4|5|6|7|8), _, true) -> + <:expr> + | (i, ConstantEndian BigEndian, false) when i <= 31 -> + <:expr> + | (i, ConstantEndian BigEndian, true) when i <= 31 -> + <:expr> + | (i, ConstantEndian LittleEndian, false) when i <= 31 -> + <:expr> + | (i, ConstantEndian LittleEndian, true) when i <= 31 -> + <:expr> + | (i, ConstantEndian NativeEndian, false) when i <= 31 -> + <:expr> + | (i, ConstantEndian NativeEndian, true) when i <= 31 -> + <:expr> + | (i, EndianExpr expr, false) when i <= 31 -> + <:expr> + | (i, EndianExpr expr, true) when i <= 31 -> + <:expr> + | (32, ConstantEndian BigEndian, false) -> + <:expr> + | (32, ConstantEndian BigEndian, true) -> + <:expr> + | (32, ConstantEndian LittleEndian, false) -> + <:expr> + | (32, ConstantEndian LittleEndian, true) -> + <:expr> + | (32, ConstantEndian NativeEndian, false) -> + <:expr> + | (32, ConstantEndian NativeEndian, true) -> + <:expr> + | (32, EndianExpr expr, false) -> + <:expr> + | (32, EndianExpr expr, true) -> + <:expr> + | (_, ConstantEndian BigEndian, false) -> + <:expr> + | (_, ConstantEndian BigEndian, true) -> + <:expr> + | (_, ConstantEndian LittleEndian, false) -> + <:expr> + | (_, ConstantEndian LittleEndian, true) -> + <:expr> + | (_, ConstantEndian NativeEndian, false) -> + <:expr> + | (_, ConstantEndian NativeEndian, true) -> + <:expr> + | (_, EndianExpr expr, false) -> + <:expr> + | (_, EndianExpr expr, true) -> + <:expr> in - let name_of_int_construct = function - (* XXX As an enhancement we should allow users to - * specify that a field length can fit into a char/int/int32 - * (of course, this would have to be checked at runtime). - *) - | (Bitmatch.BigEndian, false) -> "construct_int64_be_unsigned" - | (Bitmatch.BigEndian, true) -> "construct_int64_be_signed" - | (Bitmatch.LittleEndian, false) -> "construct_int64_le_unsigned" - | (Bitmatch.LittleEndian, true) -> "construct_int64_le_signed" - | (Bitmatch.NativeEndian, false) -> "construct_int64_ne_unsigned" - | (Bitmatch.NativeEndian, true) -> "construct_int64_ne_signed" + let int_construct = function + | (ConstantEndian BigEndian, false) -> + <:expr> + | (ConstantEndian BigEndian, true) -> + <:expr> + | (ConstantEndian LittleEndian, false) -> + <:expr> + | (ConstantEndian LittleEndian, true) -> + <:expr> + | (ConstantEndian NativeEndian, false) -> + <:expr> + | (ConstantEndian NativeEndian, true) -> + <:expr> + | (EndianExpr expr, false) -> + <:expr> + | (EndianExpr expr, true) -> + <:expr> in let expr = @@ -298,13 +346,11 @@ let output_constructor _loc fields = * be better to move them here. XXX *) | Int, Some i when i > 0 && i <= 64 -> - let construct_func = - name_of_int_construct_const (i,endian,signed) in + let construct_fn = int_construct_const (i,endian,signed) in exn_used := true; <:expr< - Bitmatch.$lid:construct_func$ $lid:buffer$ $fexpr$ $`int:i$ - $lid:exn$ + $construct_fn$ $lid:buffer$ $fexpr$ $`int:i$ $lid:exn$ >> | Int, Some _ -> @@ -318,13 +364,12 @@ let output_constructor _loc fields = * be better to move them here. XXX *) | Int, None -> - let construct_func = name_of_int_construct (endian,signed) in + let construct_fn = int_construct (endian,signed) in exn_used := true; <:expr< if $flen$ >= 1 && $flen$ <= 64 then - Bitmatch.$lid:construct_func$ $lid:buffer$ $fexpr$ $flen$ - $lid:exn$ + $construct_fn$ $lid:buffer$ $fexpr$ $flen$ $lid:exn$ else raise (Bitmatch.Construct_failure ("length of int field must be [1..64]", @@ -506,66 +551,94 @@ let output_bitmatch _loc bs cases = *) let flen_is_const = expr_is_constant flen in - let name_of_int_extract_const = function - (* XXX As an enhancement we should allow a 64-bit-only - * mode which lets us use 'int' up to 63 bits and won't - * compile on 32-bit platforms. - *) + let int_extract_const = function (* XXX The meaning of signed/unsigned breaks down at * 31, 32, 63 and 64 bits. *) - | (1, _, _) -> "extract_bit" - | ((2|3|4|5|6|7|8), _, false) -> "extract_char_unsigned" - | ((2|3|4|5|6|7|8), _, true) -> "extract_char_signed" - | (i, Bitmatch.BigEndian, false) when i <= 31 -> - "extract_int_be_unsigned" - | (i, Bitmatch.BigEndian, true) when i <= 31 -> - "extract_int_be_signed" - | (i, Bitmatch.LittleEndian, false) when i <= 31 -> - "extract_int_le_unsigned" - | (i, Bitmatch.LittleEndian, true) when i <= 31 -> - "extract_int_le_signed" - | (i, Bitmatch.NativeEndian, false) when i <= 31 -> - "extract_int_ne_unsigned" - | (i, Bitmatch.NativeEndian, true) when i <= 31 -> - "extract_int_ne_signed" - | (32, Bitmatch.BigEndian, false) -> "extract_int32_be_unsigned" - | (32, Bitmatch.BigEndian, true) -> "extract_int32_be_signed" - | (32, Bitmatch.LittleEndian, false) -> "extract_int32_le_unsigned" - | (32, Bitmatch.LittleEndian, true) -> "extract_int32_le_signed" - | (32, Bitmatch.NativeEndian, false) -> "extract_int32_ne_unsigned" - | (32, Bitmatch.NativeEndian, true) -> "extract_int32_ne_signed" - | (_, Bitmatch.BigEndian, false) -> "extract_int64_be_unsigned" - | (_, Bitmatch.BigEndian, true) -> "extract_int64_be_signed" - | (_, Bitmatch.LittleEndian, false) -> "extract_int64_le_unsigned" - | (_, Bitmatch.LittleEndian, true) -> "extract_int64_le_signed" - | (_, Bitmatch.NativeEndian, false) -> "extract_int64_ne_unsigned" - | (_, Bitmatch.NativeEndian, true) -> "extract_int64_ne_signed" + | (1, _, _) -> + <:expr> + | ((2|3|4|5|6|7|8), _, false) -> + <:expr> + | ((2|3|4|5|6|7|8), _, true) -> + <:expr> + | (i, ConstantEndian BigEndian, false) when i <= 31 -> + <:expr> + | (i, ConstantEndian BigEndian, true) when i <= 31 -> + <:expr> + | (i, ConstantEndian LittleEndian, false) when i <= 31 -> + <:expr> + | (i, ConstantEndian LittleEndian, true) when i <= 31 -> + <:expr> + | (i, ConstantEndian NativeEndian, false) when i <= 31 -> + <:expr> + | (i, ConstantEndian NativeEndian, true) when i <= 31 -> + <:expr> + | (i, EndianExpr expr, false) when i <= 31 -> + <:expr> + | (i, EndianExpr expr, true) when i <= 31 -> + <:expr> + | (32, ConstantEndian BigEndian, false) -> + <:expr> + | (32, ConstantEndian BigEndian, true) -> + <:expr> + | (32, ConstantEndian LittleEndian, false) -> + <:expr> + | (32, ConstantEndian LittleEndian, true) -> + <:expr> + | (32, ConstantEndian NativeEndian, false) -> + <:expr> + | (32, ConstantEndian NativeEndian, true) -> + <:expr> + | (32, EndianExpr expr, false) -> + <:expr> + | (32, EndianExpr expr, true) -> + <:expr> + | (_, ConstantEndian BigEndian, false) -> + <:expr> + | (_, ConstantEndian BigEndian, true) -> + <:expr> + | (_, ConstantEndian LittleEndian, false) -> + <:expr> + | (_, ConstantEndian LittleEndian, true) -> + <:expr> + | (_, ConstantEndian NativeEndian, false) -> + <:expr> + | (_, ConstantEndian NativeEndian, true) -> + <:expr> + | (_, EndianExpr expr, false) -> + <:expr> + | (_, EndianExpr expr, true) -> + <:expr> in - let name_of_int_extract = function - (* XXX As an enhancement we should allow users to - * specify that a field length can fit into a char/int/int32 - * (of course, this would have to be checked at runtime). - *) - | (Bitmatch.BigEndian, false) -> "extract_int64_be_unsigned" - | (Bitmatch.BigEndian, true) -> "extract_int64_be_signed" - | (Bitmatch.LittleEndian, false) -> "extract_int64_le_unsigned" - | (Bitmatch.LittleEndian, true) -> "extract_int64_le_signed" - | (Bitmatch.NativeEndian, false) -> "extract_int64_ne_unsigned" - | (Bitmatch.NativeEndian, true) -> "extract_int64_ne_signed" + let int_extract = function + | (ConstantEndian BigEndian, false) -> + <:expr> + | (ConstantEndian BigEndian, true) -> + <:expr> + | (ConstantEndian LittleEndian, false) -> + <:expr> + | (ConstantEndian LittleEndian, true) -> + <:expr> + | (ConstantEndian NativeEndian, false) -> + <:expr> + | (ConstantEndian NativeEndian, true) -> + <:expr> + | (EndianExpr expr, false) -> + <:expr> + | (EndianExpr expr, true) -> + <:expr> in let expr = match t, flen_is_const with (* Common case: int field, constant flen *) | Int, Some i when i > 0 && i <= 64 -> - let extract_func = name_of_int_extract_const (i,endian,signed) in + let extract_fn = int_extract_const (i,endian,signed) in let v = gensym "val" in <:expr< if $lid:len$ >= $`int:i$ then ( let $lid:v$, $lid:off$, $lid:len$ = - Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$ - $`int:i$ in + $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $`int:i$ in match $lid:v$ with $fpatt$ when true -> $inner$ | _ -> () ) >> @@ -578,13 +651,12 @@ let output_bitmatch _loc bs cases = * (not an error). *) | Int, None -> - let extract_func = name_of_int_extract (endian,signed) in + let extract_fn = int_extract (endian,signed) in let v = gensym "val" in <:expr< if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then ( let $lid:v$, $lid:off$, $lid:len$ = - Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$ - $flen$ in + $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $flen$ in match $lid:v$ with $fpatt$ when true -> $inner$ | _ -> () ) >> @@ -778,8 +850,15 @@ let output_bitmatch _loc bs cases = EXTEND Gram GLOBAL: expr; + (* Qualifiers are a list of identifiers ("string", "bigendian", etc.) + * followed by an optional expression (used in certain cases). Note + * that we are careful not to declare any explicit reserved words. + *) qualifiers: [ - [ LIST0 [ q = LIDENT -> q ] SEP "," ] + [ LIST0 + [ q = LIDENT; + e = OPT [ "("; e = expr; ")" -> e ] -> (q, e) ] + SEP "," ] ]; (* Field used in the bitmatch operator (a pattern). *) diff --git a/tests/40_endianexpr.ml b/tests/40_endianexpr.ml new file mode 100644 index 0000000..2ebc55b --- /dev/null +++ b/tests/40_endianexpr.ml @@ -0,0 +1,35 @@ +(* Endianness expressions + * $Id$ + *) + +open Printf +open Bitmatch + +let () = + let rec loop = function + | (e, expected) :: rest -> + let bits = BITSTRING { + expected : 32 : endian (e); + expected : 32 : endian (e); + expected : 32 : endian (e) + } in + (bitmatch bits with + | { actual : 32 : endian (e); + actual : 32 : endian (e); + actual : 32 : endian (e) } -> + if actual <> expected then + failwith (sprintf "actual %ld <> expected %ld" actual expected) + | { _ } as bits -> + hexdump_bitstring stderr bits; exit 1 + ); + loop rest + | [] -> () + in + loop [ + BigEndian, 0xa1b2c3d4_l; + BigEndian, 0xa1d4c3b2_l; + LittleEndian, 0xa1b2c3d4_l; + LittleEndian, 0xa1d4c3b2_l; + NativeEndian, 0xa1b2c3d4_l; + NativeEndian, 0xa1d4c3b2_l; + ] -- 1.8.3.1