From: Richard W.M. Jones Date: Sun, 18 May 2008 15:39:29 +0000 (+0000) Subject: More complete handling of constant field length expressions. X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=e87f0879fef8e32e7ae7f7103f420c1612f3863f;p=ocaml-bitstring.git More complete handling of constant field length expressions. More TODO line items. Set svnignore and remove old cvsignore file. Fix link to bitmatch home page. --- diff --git a/.cvsignore b/.cvsignore deleted file mode 100644 index 3e76a87..0000000 --- a/.cvsignore +++ /dev/null @@ -1,15 +0,0 @@ -*.cmi -*.cmo -*.cmx -*.cma -*.cmxa -ocaml-bitmatch-*.tar.gz -Makefile -META -configure -autom4te.cache -config.h.in -config.h -config.log -config.status -html diff --git a/TODO b/TODO index 10ea7a9..ecb0620 100644 --- a/TODO +++ b/TODO @@ -35,3 +35,24 @@ Major to-do items. (10) Cross-module, persistent, named patterns, see: http://caml.inria.fr/pub/ml-archives/caml-list/2008/04/25992c9c9fa999fe1d35d961dd9917a2.en.html + +(11) Runtime endiannness expressions. The suggested syntax is: + + { field : len : endianness(expr) } + + where expr would evaluate to something like `BigEndian or + `LittleEndian. + + There are several protocols around where endianness is only + determined at runtime, examples are libpcap and TIFF. + +(12) More constant field lengths. + +(13) Implement native endian functions. + +(14) A proper test suite. + +(15) More examples: + + ELF binaries + GIF images diff --git a/bitmatch.mli b/bitmatch.mli index a8fd8ba..90f6acc 100644 --- a/bitmatch.mli +++ b/bitmatch.mli @@ -29,7 +29,7 @@ this module to both parse and generate binary formats, for example, communications protocols, disk formats and binary files. - {{:http://et.redhat.com/~rjones/bitmatch/}OCaml bitmatch website} + {{:http://code.google.com/p/bitmatch/}OCaml bitmatch website} {2 Examples} @@ -186,7 +186,7 @@ let make_message typ subtype param = match then the standard library [Match_failure] exception is thrown. - Patterns look a bit different from normal match patterns. The + Patterns look a bit different from normal match patterns. They consist of a list of bitfields separated by [;] where each bitfield contains a bind variable, the width (in bits) of the field, and other information. Some example patterns: @@ -381,9 +381,8 @@ Bitmatch.hexdump_bitstring stdout bits ;; computed expression. Detection of compile-time constants is quite simplistic so only an - immediate, simple integer is recognised as a constant and anything - else is considered a computed expression, even expressions such as - [5-2] which are obviously (to our eyes) constant. + simple integer literals and simple expressions (eg. [5*8]) are + recognized as constants. In any case the bit size of an integer is limited to the range \[1..64\]. This is detected as a compile-time error if that is diff --git a/pa_bitmatch.ml b/pa_bitmatch.ml index 83bb5d4..9d29340 100644 --- a/pa_bitmatch.ml +++ b/pa_bitmatch.ml @@ -32,6 +32,42 @@ open Ast *) let debug = false +(* Work out if an expression is an integer constant. + * + * Returns [Some i] if so (where i is the integer value), else [None]. + * + * Fairly simplistic algorithm: we can only detect simple constant + * expressions such as [k], [k+c], [k-c] etc. + *) +let rec expr_is_constant = function + | <:expr< $int:i$ >> -> (* Literal integer constant. *) + Some (int_of_string i) + | <:expr< $a$ + $b$ >> -> (* Addition of constants. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a+b) + | _ -> None) + | <:expr< $a$ - $b$ >> -> (* Subtraction. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a-b) + | _ -> None) + | <:expr< $a$ * $b$ >> -> (* Multiplication. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a*b) + | _ -> None) + | <:expr< $a$ / $b$ >> -> (* Division. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a/b) + | _ -> None) + | <:expr< $a$ lsl $b$ >> -> (* Shift left. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a lsl b) + | _ -> None) + | <:expr< $a$ lsr $b$ >> -> (* Shift right. *) + (match expr_is_constant a, expr_is_constant b with + | Some a, Some b -> Some (a lsr b) + | _ -> None) + | _ -> None (* Anything else is not constant. *) + (* A field when used in a bitmatch (a pattern). *) type fpatt = { fpatt : patt; (* field matching pattern *) @@ -189,9 +225,9 @@ and string_of_field_common { flen = flen; endian = endian; signed = signed; t = t; _loc = _loc } = let flen = - match flen with - | <:expr< $int:i$ >> -> i - | _ -> "[non-const-len]" in + match expr_is_constant flen with + | Some i -> string_of_int i + | None -> "[non-const-len]" in let endian = string_of_endian endian in let signed = if signed then "signed" else "unsigned" in let t = string_of_t t in @@ -227,10 +263,7 @@ let output_constructor _loc fields = (* Is flen an integer constant? If so, what is it? This * is very simple-minded and only detects simple constants. *) - let flen_is_const = - match flen with - | <:expr< $int:i$ >> -> Some (int_of_string i) - | _ -> None in + let flen_is_const = expr_is_constant flen in let name_of_int_construct_const = function (* XXX As an enhancement we should allow a 64-bit-only @@ -489,10 +522,7 @@ let output_bitmatch _loc bs cases = (* Is flen an integer constant? If so, what is it? This * is very simple-minded and only detects simple constants. *) - let flen_is_const = - match flen with - | <:expr< $int:i$ >> -> Some (int_of_string i) - | _ -> None in + let flen_is_const = expr_is_constant flen in let name_of_int_extract_const = function (* XXX As an enhancement we should allow a 64-bit-only