+ {{:#reference}Jump straight to the reference section for
+ documentation on types and functions}.
+
+ {2 Introduction}
+
+ Bitmatch adds Erlang-style bitstrings and matching over bitstrings
+ as a syntax extension and library for OCaml. You can use
+ this module to both parse and generate binary formats, for
+ example, communications protocols, disk formats and binary files.
+
+ {{:http://code.google.com/p/bitmatch/}OCaml bitmatch website}
+
+ {2 Examples}
+
+ A function which can parse IPv4 packets:
+
+{[
+let display pkt =
+ bitmatch pkt with
+ (* IPv4 packet header
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | 4 | IHL |Type of Service| Total Length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Identification |Flags| Fragment Offset |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Time to Live | Protocol | Header Checksum |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Source Address |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Destination Address |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Options | Padding |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *)
+ | { 4 : 4; hdrlen : 4; tos : 8; length : 16;
+ identification : 16; flags : 3; fragoffset : 13;
+ ttl : 8; protocol : 8; checksum : 16;
+ source : 32;
+ dest : 32;
+ options : (hdrlen-5)*32 : bitstring;
+ payload : -1 : bitstring } ->
+
+ printf "IPv4:\n";
+ printf " header length: %d * 32 bit words\n" hdrlen;
+ printf " type of service: %d\n" tos;
+ printf " packet length: %d bytes\n" length;
+ printf " identification: %d\n" identification;
+ printf " flags: %d\n" flags;
+ printf " fragment offset: %d\n" fragoffset;
+ printf " ttl: %d\n" ttl;
+ printf " protocol: %d\n" protocol;
+ printf " checksum: %d\n" checksum;
+ printf " source: %lx dest: %lx\n" source dest;
+ printf " header options + padding:\n";
+ Bitmatch.hexdump_bitstring stdout options;
+ printf " packet payload:\n";
+ Bitmatch.hexdump_bitstring stdout payload
+
+ | { version : 4 } ->
+ eprintf "unknown IP version %d\n" version;
+ exit 1
+
+ | { _ } as pkt ->
+ eprintf "data is smaller than one nibble:\n";
+ Bitmatch.hexdump_bitstring stderr pkt;
+ exit 1
+]}
+
+ A program which can parse
+ {{:http://lxr.linux.no/linux/include/linux/ext3_fs.h}Linux EXT3 filesystem superblocks}:
+
+{[
+let bits = Bitmatch.bitstring_of_file "tests/ext3_sb"
+
+let () =
+ bitmatch bits with
+ | { s_inodes_count : 32 : littleendian; (* Inodes count *)
+ s_blocks_count : 32 : littleendian; (* Blocks count *)
+ s_r_blocks_count : 32 : littleendian; (* Reserved blocks count *)
+ s_free_blocks_count : 32 : littleendian; (* Free blocks count *)
+ s_free_inodes_count : 32 : littleendian; (* Free inodes count *)
+ s_first_data_block : 32 : littleendian; (* First Data Block *)
+ s_log_block_size : 32 : littleendian; (* Block size *)
+ s_log_frag_size : 32 : littleendian; (* Fragment size *)
+ s_blocks_per_group : 32 : littleendian; (* # Blocks per group *)
+ s_frags_per_group : 32 : littleendian; (* # Fragments per group *)
+ s_inodes_per_group : 32 : littleendian; (* # Inodes per group *)
+ s_mtime : 32 : littleendian; (* Mount time *)
+ s_wtime : 32 : littleendian; (* Write time *)
+ s_mnt_count : 16 : littleendian; (* Mount count *)
+ s_max_mnt_count : 16 : littleendian; (* Maximal mount count *)
+ 0xef53 : 16 : littleendian } -> (* Magic signature *)
+
+ printf "ext3 superblock:\n";
+ printf " s_inodes_count = %ld\n" s_inodes_count;
+ printf " s_blocks_count = %ld\n" s_blocks_count;
+ printf " s_free_inodes_count = %ld\n" s_free_inodes_count;
+ printf " s_free_blocks_count = %ld\n" s_free_blocks_count
+
+ | { _ } ->
+ eprintf "not an ext3 superblock!\n%!";
+ exit 2
+]}
+
+ Constructing packets for a simple binary message
+ protocol:
+
+{[
+(*
+ +---------------+---------------+--------------------------+
+ | type | subtype | parameter |
+ +---------------+---------------+--------------------------+
+ <-- 16 bits --> <-- 16 bits --> <------- 32 bits -------->
+
+ All fields are in network byte order.
+*)
+
+let make_message typ subtype param =
+ (BITSTRING {
+ typ : 16;
+ subtype : 16;
+ param : 32
+ }) ;;
+]}
+
+ {2 Loading, creating bitstrings}
+
+ The basic data type is the {!bitstring}, a string of bits of
+ arbitrary length. Bitstrings can be any length in bits and
+ operations do not need to be byte-aligned (although they will
+ generally be more efficient if they are byte-aligned).
+
+ Internally a bitstring is stored as a normal OCaml [string]
+ together with an offset and length, where the offset and length are
+ measured in bits. Thus one can efficiently form substrings of
+ bitstrings, overlay a bitstring on existing data, and load and save
+ bitstrings from files or other external sources.
+
+ To load a bitstring from a file use {!bitstring_of_file} or
+ {!bitstring_of_chan}.
+
+ There are also functions to create bitstrings from arbitrary data.
+ See the {{:#reference}reference} below.
+
+ {2 Matching bitstrings with patterns}
+
+ Use the [bitmatch] operator (part of the syntax extension) to break
+ apart a bitstring into its fields. [bitmatch] works a lot like the
+ OCaml [match] operator.
+
+ The general form of [bitmatch] is:
+
+ [bitmatch {] {i bitstring-expression} [} with]
+
+ [| {] {i pattern} [} ->] {i code}
+
+ [| {] {i pattern} [} ->] {i code}
+
+ [|] ...
+
+ As with normal match, the statement attempts to match the
+ bitstring against each pattern in turn. If none of the patterns
+ match then the standard library [Match_failure] exception is
+ thrown.
+
+ Patterns look a bit different from normal match patterns. They
+ consist of a list of bitfields separated by [;] where each bitfield
+ contains a bind variable, the width (in bits) of the field, and
+ other information. Some example patterns:
+
+{[
+bitmatch bits with
+
+| { version : 8; name : 8; param : 8 } -> ...
+
+ (* Bitstring of at least 3 bytes. First byte is the version
+ number, second byte is a field called name, third byte is
+ a field called parameter. *)
+
+| { flag : 1 } ->
+ printf "flag is %b\n" flag
+
+ (* A single flag bit (mapped into an OCaml boolean). *)
+
+| { len : 4; data : 1+len } ->
+ printf "len = %d, data = 0x%Lx\n" len data
+
+ (* A 4-bit length, followed by 1-16 bits of data, where the
+ length of the data is computed from len. *)
+
+| { ipv6_source : 128 : bitstring;
+ ipv6_dest : 128 : bitstring } -> ...
+
+ (* IPv6 source and destination addresses. Each is 128 bits
+ and is mapped into a bitstring type which will be a substring
+ of the main bitstring expression. *)
+]}
+
+ You can also add conditional when-clauses:
+
+{[
+| { version : 4 }
+ when version = 4 || version = 6 -> ...
+
+ (* Only match and run the code when version is 4 or 6. If
+ it isn't we will drop through to the next case. *)
+]}
+
+ Note that the pattern is only compared against the first part of
+ the bitstring (there may be more data in the bitstring following
+ the pattern, which is not matched). In terms of regular
+ expressions you might say that the pattern matches [^pattern], not
+ [^pattern$]. To ensure that the bitstring contains only the
+ pattern, add a length -1 bitstring to the end and test that its
+ length is zero in the when-clause:
+
+{[
+| { n : 4;
+ rest : -1 : bitstring }
+ when Bitmatch.bitstring_length rest = 0 -> ...
+
+ (* Only matches exactly 4 bits. *)
+]}
+
+ Normally the first part of each field is a binding variable,
+ but you can also match a constant, as in:
+
+{[
+| { (4|6) : 4 } -> ...
+
+ (* Only matches if the first 4 bits contain either
+ the integer 4 or the integer 6. *)
+]}
+
+ One may also match on strings:
+
+{[
+| { "MAGIC" : 5*8 : string } -> ...
+
+ (* Only matches if the string "MAGIC" appears at the start
+ of the input. *)
+]}
+
+ {3:patternfieldreference Pattern field reference}
+
+ The exact format of each pattern field is:
+
+ [pattern : length [: qualifier [,qualifier ...]]]
+
+ [pattern] is the pattern, binding variable name, or constant to
+ match. [length] is the length in bits which may be either a
+ constant or an expression. The length expression is just an OCaml
+ expression and can use any values defined in the program, and refer
+ back to earlier fields (but not to later fields).
+
+ Integers can only have lengths in the range \[1..64\] bits. See the
+ {{:#integertypes}integer types} section below for how these are
+ mapped to the OCaml int/int32/int64 types. This is checked
+ at compile time if the length expression is constant, otherwise it is
+ checked at runtime and you will get a runtime exception eg. in
+ the case of a computed length expression.
+
+ A bitstring field of length -1 matches all the rest of the
+ bitstring (thus this is only useful as the last field in a
+ pattern).
+
+ A bitstring field of length 0 matches an empty bitstring
+ (occasionally useful when matching optional subfields).
+
+ Qualifiers are a list of identifiers which control the type,
+ signedness and endianness of the field. Permissible qualifiers are:
+
+ - [int] (field has an integer type)
+ - [string] (field is a string type)
+ - [bitstring] (field is a bitstring type)
+ - [signed] (field is signed)
+ - [unsigned] (field is unsigned)
+ - [bigendian] (field is big endian - a.k.a network byte order)
+ - [littleendian] (field is little endian - a.k.a Intel byte order)
+ - [nativeendian] (field is same endianness as the machine)
+
+ The default settings are [int], [unsigned], [bigendian].
+
+ Note that many of these qualifiers cannot be used together,
+ eg. bitstrings do not have endianness. The syntax extension should
+ give you a compile-time error if you use incompatible qualifiers.
+
+ {3 Other cases in bitmatch}
+
+ As well as a list of fields, it is possible to name the
+ bitstring and/or have a default match case:
+
+{[
+| { _ } -> ...
+
+ (* Default match case. *)
+
+| { _ } as pkt -> ...
+
+ (* Default match case, with 'pkt' bound to the whole bitstring. *)
+]}
+
+ {2 Constructing bitstrings}
+
+ Bitstrings may be constructed using the [BITSTRING] operator (as an
+ expression). The [BITSTRING] operator takes a list of fields,
+ similar to the list of fields for matching:
+
+{[
+let version = 1 ;;
+let data = 10 ;;
+let bits =
+ BITSTRING {
+ version : 4;
+ data : 12
+ } ;;
+
+ (* Constructs a 16-bit bitstring with the first four bits containing
+ the integer 1, and the following 12 bits containing the integer 10,
+ arranged in network byte order. *)
+
+Bitmatch.hexdump_bitstring stdout bits ;;
+
+ (* Prints:
+
+ 00000000 10 0a |.. |
+ *)
+]}
+
+ The format of each field is the same as for pattern fields (see
+ {{:#patternfieldreference}Pattern field reference section}), and
+ things like computed length fields, fixed value fields, insertion
+ of bitstrings within bitstrings, etc. are all supported.
+
+ {3 Construction exception}
+
+ The [BITSTRING] operator may throw a {!Construct_failure}
+ exception at runtime.
+
+ Runtime errors include:
+
+ - int field length not in the range \[1..64\]
+ - a bitstring with a length declared which doesn't have the
+ same length at runtime
+ - trying to insert an out of range value into an int field
+ (eg. an unsigned int field which is 2 bits wide can only
+ take values in the range \[0..3\]).
+
+ {2:integertypes Integer types}
+
+ Integer types are mapped to OCaml types [bool], [int], [int32] or
+ [int64] using a system which tries to ensure that (a) the types are
+ reasonably predictable and (b) the most efficient type is
+ preferred.
+
+ The rules are slightly different depending on whether the bit
+ length expression in the field is a compile-time constant or a
+ computed expression.
+
+ Detection of compile-time constants is quite simplistic so only an
+ simple integer literals and simple expressions (eg. [5*8]) are
+ recognized as constants.
+
+ In any case the bit size of an integer is limited to the range
+ \[1..64\]. This is detected as a compile-time error if that is
+ possible, otherwise a runtime check is added which can throw an
+ [Invalid_argument] exception.
+
+ The mapping is thus:
+
+ {v
+ Bit size ---- OCaml type ----
+ Constant Computed expression
+
+ 1 bool int64
+ 2..31 int int64
+ 32 int32 int64
+ 33..64 int64 int64
+ v}
+
+ A possible future extension may allow people with 64 bit computers
+ to specify a more optimal [int] type for bit sizes in the range
+ [32..63]. If this was implemented then such code {i could not even
+ be compiled} on 32 bit platforms, so it would limit portability.
+
+ Another future extension may be to allow computed
+ expressions to assert min/max range for the bit size,
+ allowing a more efficient data type than int64 to be
+ used. (Of course under such circumstances there would
+ still need to be a runtime check to enforce the
+ size).