* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2 of the License, or (at your option) any later version,
+ * with the OCaml linking exception described in COPYING.LIB.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
- * $Id: bitmatch.mli,v 1.20 2008-05-08 21:28:28 rjones Exp $
+ * $Id$
*)
(**
The general form of [bitmatch] is:
- [bitmatch {] {i bitstring-expression} [} with]
+ [bitmatch] {i bitstring-expression} [with]
[| {] {i pattern} [} ->] {i code}
A bitstring field of length 0 matches an empty bitstring
(occasionally useful when matching optional subfields).
- Qualifiers are a list of identifiers which control the type,
+ Qualifiers are a list of identifiers/expressions which control the type,
signedness and endianness of the field. Permissible qualifiers are:
- - [int] (field has an integer type)
- - [string] (field is a string type)
- - [bitstring] (field is a bitstring type)
- - [signed] (field is signed)
- - [unsigned] (field is unsigned)
- - [bigendian] (field is big endian - a.k.a network byte order)
- - [littleendian] (field is little endian - a.k.a Intel byte order)
- - [nativeendian] (field is same endianness as the machine)
-
- The default settings are [int], [unsigned], [bigendian].
+ - [int]: field has an integer type
+ - [string]: field is a string type
+ - [bitstring]: field is a bitstring type
+ - [signed]: field is signed
+ - [unsigned]: field is unsigned
+ - [bigendian]: field is big endian - a.k.a network byte order
+ - [littleendian]: field is little endian - a.k.a Intel byte order
+ - [nativeendian]: field is same endianness as the machine
+ - [endian (expr)]: [expr] should be an expression which evaluates to
+ a {!endian} type, ie. [LittleEndian], [BigEndian] or [NativeEndian].
+ The expression is an arbitrary OCaml expression and can use the
+ value of earlier fields in the bitmatch.
+ - [offset (expr)]: see {{:#computedoffsets}computed offsets} below.
+
+ The default settings are [int], [unsigned], [bigendian], no offset.
Note that many of these qualifiers cannot be used together,
eg. bitstrings do not have endianness. The syntax extension should
length expression in the field is a compile-time constant or a
computed expression.
- Detection of compile-time constants is quite simplistic so only an
+ Detection of compile-time constants is quite simplistic so only
simple integer literals and simple expressions (eg. [5*8]) are
recognized as constants.
still need to be a runtime check to enforce the
size).
+ {2:computedoffsets Computed offsets}
+
+ You can add an [offset(..)] qualifier to bitmatch patterns in order
+ to move the current offset within the bitstring forwards.
+
+ For example:
+
+{[
+bitmatch bits with
+| { field1 : 8;
+ field2 : 8 : offset(160) } -> ...
+]}
+
+ matches [field1] at the start of the bitstring and [field2]
+ at 160 bits into the bitstring. The middle 152 bits go
+ unmatched (ie. can be anything).
+
+ The generated code is efficient. If field lengths and offsets
+ are known to be constant at compile time, then almost all
+ runtime checks are avoided. Non-constant field lengths and/or
+ non-constant offsets can result in more runtime checks being added.
+
+ Note that moving the offset backwards, and moving the offset in
+ [BITSTRING] constructors, are both not supported at present.
+
+ {2 Named patterns and persistent patterns}
+
+ Please see {!Bitmatch_persistent} for documentation on this subject.
+
{2 Compiling}
Using the compiler directly you can do:
{v
ocamlc -I +bitmatch \
- -pp "camlp4o `ocamlc -where`/bitmatch/pa_bitmatch.cmo" \
- bitmatch.cma test.ml -o test
+ -pp "camlp4of bitmatch.cma bitmatch_persistent.cma \
+ `ocamlc -where`/bitmatch/pa_bitmatch.cmo" \
+ unix.cma bitmatch.cma test.ml -o test
v}
Simpler method using findlib:
{v
ocamlfind ocamlc \
- -package bitmatch.syntax -syntax bitmatch.syntax \
+ -package bitmatch,bitmatch.syntax -syntax bitmatch.syntax \
-linkpkg test.ml -o test
v}
let len = read_untrusted_source () in
let buffer = allocate_bitstring () in
BITSTRING {
- buffer : len : bitstring
+ buffer : len : bitstring
}
]}
{3 Types}
*)
+type endian = BigEndian | LittleEndian | NativeEndian
+
+val string_of_endian : endian -> string
+(** Endianness. *)
+
type bitstring = string * int * int
(** [bitstring] is the basic type used to store bitstrings.
location of the [BITSTRING] constructor that failed.
*)
-(** {3 Bitstrings} *)
+(** {3 Bitstring manipulation} *)
+
+val bitstring_length : bitstring -> int
+(** [bitstring_length bitstring] returns the length of
+ the bitstring in bits.
+
+ Note this just returns the third field in the {!bitstring} tuple. *)
+
+val subbitstring : bitstring -> int -> int -> bitstring
+(** [subbitstring bits off len] returns a sub-bitstring
+ of the bitstring, starting at offset [off] bits and
+ with length [len] bits.
+
+ If the original bitstring is not long enough to do this
+ then the function raises [Invalid_argument "subbitstring"].
+
+ Note that this function just changes the offset and length
+ fields of the {!bitstring} tuple, so is very efficient. *)
+
+val dropbits : int -> bitstring -> bitstring
+(** Drop the first n bits of the bitstring and return a new
+ bitstring which is shorter by n bits.
+
+ If the length of the original bitstring is less than n bits,
+ this raises [Invalid_argument "dropbits"].
+
+ Note that this function just changes the offset and length
+ fields of the {!bitstring} tuple, so is very efficient. *)
+
+val takebits : int -> bitstring -> bitstring
+(** Take the first n bits of the bitstring and return a new
+ bitstring which is exactly n bits long.
+
+ If the length of the original bitstring is less than n bits,
+ this raises [Invalid_argument "takebits"].
+
+ Note that this function just changes the offset and length
+ fields of the {!bitstring} tuple, so is very efficient. *)
+
+(** {3 Constructing bitstrings} *)
val empty_bitstring : bitstring
(** [empty_bitstring] is the empty, zero-length bitstring. *)
For example, [make_bitstring 16 '\x5a'] will create
the bitstring [0x5a5a] or in binary [0101 1010 0101 1010].
- Note that the length is in bits, not bytes. *)
+ Note that the length is in bits, not bytes. The length does NOT
+ need to be a multiple of 8. *)
+
+val zeroes_bitstring : int -> bitstring
+(** [zeroes_bitstring] creates an [n] bit bitstring of all 0's.
+
+ Actually this is the same as {!create_bitstring}. *)
+
+val ones_bitstring : int -> bitstring
+(** [ones_bitstring] creates an [n] bit bitstring of all 1's. *)
val bitstring_of_string : string -> bitstring
(** [bitstring_of_string str] creates a bitstring
[max] bytes from the channel (or fewer if the end of input
occurs before that). *)
-val bitstring_length : bitstring -> int
-(** [bitstring_length bitstring] returns the length of
- the bitstring in bits. *)
+(** {3 Converting bitstrings} *)
val string_of_bitstring : bitstring -> string
(** [string_of_bitstring bitstring] converts a bitstring to a string
This function is inefficient. In the best case when the bitstring
is nicely byte-aligned we do a [String.sub] operation. If the
bitstring isn't aligned then this involves a lot of bit twiddling
- and is particularly inefficient. *)
+ and is particularly inefficient.
+
+ If the bitstring is not a multiple of 8 bits wide then the
+ final byte of the string contains the high bits set to the
+ remaining bits and the low bits set to 0. *)
+
+val bitstring_to_file : bitstring -> string -> unit
+(** [bitstring_to_file bits filename] writes the bitstring [bits]
+ to the file [filename]. It overwrites the output file.
+
+ Some restrictions apply, see {!bitstring_to_chan}. *)
+
+val bitstring_to_chan : bitstring -> out_channel -> unit
+(** [bitstring_to_file bits filename] writes the bitstring [bits]
+ to the channel [chan].
+
+ Channels are made up of bytes, bitstrings can be any bit length
+ including fractions of bytes. So this function only works
+ if the length of the bitstring is an exact multiple of 8 bits
+ (otherwise it raises [Invalid_argument "bitstring_to_chan"]).
+
+ Furthermore the function is efficient only in the case where
+ the bitstring is stored fully aligned, otherwise it has to
+ do inefficient bit twiddling like {!string_of_bitstring}.
+
+ In the common case where the bitstring was generated by the
+ [BITSTRING] operator and is an exact multiple of 8 bits wide,
+ then this function will always work efficiently.
+*)
(** {3 Printing bitstrings} *)
(** {3 Miscellaneous} *)
+val package : string
+(** The package name, always ["ocaml-bitmatch"] *)
+
+val version : string
+(** The package version as a string. *)
+
val debug : bool ref
(** Set this variable to true to enable extended debugging.
This only works if debugging was also enabled in the
val extract_int_le_unsigned : string -> int -> int -> int -> int * int * int
+val extract_int_ne_unsigned : string -> int -> int -> int -> int * int * int
+
+val extract_int_ee_unsigned : endian -> string -> int -> int -> int -> int * int * int
+
val extract_int32_be_unsigned : string -> int -> int -> int -> int32 * int * int
val extract_int32_le_unsigned : string -> int -> int -> int -> int32 * int * int
+val extract_int32_ne_unsigned : string -> int -> int -> int -> int32 * int * int
+
+val extract_int32_ee_unsigned : endian -> string -> int -> int -> int -> int32 * int * int
+
val extract_int64_be_unsigned : string -> int -> int -> int -> int64 * int * int
val extract_int64_le_unsigned : string -> int -> int -> int -> int64 * int * int
+val extract_int64_ne_unsigned : string -> int -> int -> int -> int64 * int * int
+
+val extract_int64_ee_unsigned : endian -> string -> int -> int -> int -> int64 * int * int
+
val construct_bit : Buffer.t -> bool -> int -> exn -> unit
val construct_char_unsigned : Buffer.t -> int -> int -> exn -> unit
val construct_int_be_unsigned : Buffer.t -> int -> int -> exn -> unit
+val construct_int_ne_unsigned : Buffer.t -> int -> int -> exn -> unit
+
+val construct_int_ee_unsigned : endian -> Buffer.t -> int -> int -> exn -> unit
+
val construct_int32_be_unsigned : Buffer.t -> int32 -> int -> exn -> unit
+val construct_int32_ne_unsigned : Buffer.t -> int32 -> int -> exn -> unit
+
+val construct_int32_ee_unsigned : endian -> Buffer.t -> int32 -> int -> exn -> unit
+
val construct_int64_be_unsigned : Buffer.t -> int64 -> int -> exn -> unit
+val construct_int64_ne_unsigned : Buffer.t -> int64 -> int -> exn -> unit
+
+val construct_int64_ee_unsigned : endian -> Buffer.t -> int64 -> int -> exn -> unit
+
val construct_string : Buffer.t -> string -> unit
+
+val construct_bitstring : Buffer.t -> bitstring -> unit