1 (** Bitmatch library. *)
2 (* Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 * $Id: bitmatch.mli,v 1.9 2008-04-02 11:06:31 rjones Exp $
24 Bitmatch adds Erlang-style bitstrings and matching over bitstrings
25 as a syntax extension and library for OCaml. You can use
26 this module to both parse and generate binary formats.
30 A function which can parse IPv4 packets:
37 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
38 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
39 | 4 | IHL |Type of Service| Total Length |
40 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
41 | Identification |Flags| Fragment Offset |
42 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
43 | Time to Live | Protocol | Header Checksum |
44 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
46 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
47 | Destination Address |
48 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
50 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
52 | 4 : 4; hdrlen : 4; tos : 8; length : 16;
53 identification : 16; flags : 3; fragoffset : 13;
54 ttl : 8; protocol : 8; checksum : 16;
57 options : (hdrlen-5)*32 : bitstring;
58 payload : -1 : bitstring ->
61 printf " header length: %d * 32 bit words\n" hdrlen;
62 printf " type of service: %d\n" tos;
63 printf " packet length: %d bytes\n" length;
64 printf " identification: %d\n" identification;
65 printf " flags: %d\n" flags;
66 printf " fragment offset: %d\n" fragoffset;
67 printf " ttl: %d\n" ttl;
68 printf " protocol: %d\n" protocol;
69 printf " checksum: %d\n" checksum;
70 printf " source: %lx dest: %lx\n" source dest;
71 printf " header options + padding:\n";
72 Bitmatch.hexdump_bitstring stdout options;
73 printf " packet payload:\n";
74 Bitmatch.hexdump_bitstring stdout payload
77 eprintf "unknown IP version %d\n" version;
81 eprintf "data is smaller than one nibble:\n";
82 Bitmatch.hexdump_bitstring stderr pkt;
86 A program which can parse
87 {{:http://lxr.linux.no/linux/include/linux/ext3_fs.h}Linux EXT3 filesystem superblocks}:
90 let bits = Bitmatch.bitstring_of_file "tests/ext3_sb"
94 | s_inodes_count : 32 : littleendian; (* Inodes count *)
95 s_blocks_count : 32 : littleendian; (* Blocks count *)
96 s_r_blocks_count : 32 : littleendian; (* Reserved blocks count *)
97 s_free_blocks_count : 32 : littleendian; (* Free blocks count *)
98 s_free_inodes_count : 32 : littleendian; (* Free inodes count *)
99 s_first_data_block : 32 : littleendian; (* First Data Block *)
100 s_log_block_size : 32 : littleendian; (* Block size *)
101 s_log_frag_size : 32 : littleendian; (* Fragment size *)
102 s_blocks_per_group : 32 : littleendian; (* # Blocks per group *)
103 s_frags_per_group : 32 : littleendian; (* # Fragments per group *)
104 s_inodes_per_group : 32 : littleendian; (* # Inodes per group *)
105 s_mtime : 32 : littleendian; (* Mount time *)
106 s_wtime : 32 : littleendian; (* Write time *)
107 s_mnt_count : 16 : littleendian; (* Mount count *)
108 s_max_mnt_count : 16 : littleendian; (* Maximal mount count *)
109 0xef53 : 16 : littleendian -> (* Magic signature *)
111 printf "ext3 superblock:\n";
112 printf " s_inodes_count = %ld\n" s_inodes_count;
113 printf " s_blocks_count = %ld\n" s_blocks_count;
114 printf " s_free_inodes_count = %ld\n" s_free_inodes_count;
115 printf " s_free_blocks_count = %ld\n" s_free_blocks_count
118 eprintf "not an ext3 superblock!\n%!";
122 Constructing packets for a simple binary message
127 +---------------+---------------+--------------------------+
128 | type | subtype | parameter |
129 +---------------+---------------+--------------------------+
130 <-- 16 bits --> <-- 16 bits --> <------- 32 bits -------->
132 All fields are in network byte order.
135 let make_message typ subtype param =
142 {2 Loading, creating bitstrings}
144 The basic data type is the {!bitstring}, a string of bits of
145 arbitrary length. Bitstrings can be any length in bits and
146 operations do not need to be byte-aligned (although they will
147 generally be more efficient if they are byte-aligned).
149 Internally a bitstring is stored as a normal OCaml [string]
150 together with an offset and length, where the offset and length are
151 measured in bits. Thus one can efficiently form substrings of
152 bitstrings, overlay a bitstring on existing data, and load and save
153 bitstrings from files or other external sources.
155 To load a bitstring from a file use {!bitstring_of_file} or
156 {!bitstring_of_chan}.
158 There are also functions to create bitstrings from arbitrary data.
159 See the reference below.
161 {2 Matching bitstrings with patterns}
163 Use the [bitmatch] operator (part of the syntax extension) to break
164 apart a bitstring into its fields. [bitmatch] works a lot like the
165 OCaml [match] operator.
167 The general form of [bitmatch] is:
169 [bitmatch] {i bitstring-expression} [with]
171 [|] {i pattern} [->] {i code}
173 [|] {i pattern} [->] {i code}
177 As with normal match, the statement attempts to match the
178 bitstring against each pattern in turn. If none of the patterns
179 match then the standard library [Match_failure] exception is
182 Patterns look a bit different from normal match patterns. The
183 consist of a list of bitfields separated by [;] where each bitfield
184 contains a bind variable, the width (in bits) of the field, and
185 other information. Some example patterns:
190 | version : 8; name : 8; param : 8 -> ...
192 (* Bitstring of at least 3 bytes. First byte is the version
193 number, second byte is a field called name, third byte is
194 a field called parameter. *)
197 printf "flag is %b\n" flag
199 (* A single flag bit (mapped into an OCaml boolean). *)
201 | len : 4; data : 1+len ->
202 printf "len = %d, data = 0x%Lx\n" len data
204 (* A 4-bit length, followed by 1-16 bits of data, where the
205 length of the data is computed from len. *)
207 | ipv6_source : 128 : bitstring;
208 ipv6_dest : 128 : bitstring -> ...
210 (* IPv6 source and destination addresses. Each is 128 bits
211 and is mapped into a bitstring type which will be a substring
212 of the main bitstring expression. *)
215 You can also add conditional when-clauses:
219 when version = 4 || version = 6 -> ...
221 (* Only match and run the code when version is 4 or 6. If
222 it isn't we will drop through to the next case. *)
225 Note that the pattern is only compared against the first part of
226 the bitstring (there may be more data in the bitstring following
227 the pattern, which is not matched). In terms of regular
228 expressions you might say that the pattern matches [^pattern], not
229 [^pattern$]. To ensure that the bitstring contains only the
230 pattern, add a length -1 bitstring to the end and test that its
231 length is zero in the when-clause:
235 rest : -1 : bitstring
236 when Bitmatch.bitstring_length rest = 0 -> ...
238 (* Only matches exactly 4 bits. *)
241 Normally the first part of each field is a binding variable,
242 but you can also match a constant, as in:
247 (* Only matches if the first 4 bits contain the integer 6. *)
250 {3 Pattern field reference}
252 The exact format of each pattern field is:
254 [pattern : length [: qualifier [,qualifier ...]]]
256 [pattern] is the pattern, binding variable name, or constant to
257 match. [length] is the length in bits which may be either a
258 constant or an expression. The length expression is just an OCaml
259 expression and can use any values defined in the program, and refer
260 back to earlier fields (but not to later fields).
262 Integers can only have lengths in the range [1..64] bits. See the
263 {{:#integertypes}integer types} section below for how these are
264 mapped to the OCaml int/int32/int64 types. This is checked
265 at compile time if the length expression is constant, otherwise it is
266 checked at runtime and you will get a runtime exception eg. in
267 the case of a computed length expression.
269 A bitstring field of length -1 matches all the rest of the
270 bitstring (thus this is only useful as the last field in a
273 A bitstring field of length 0 matches an empty bitstring
274 (occasionally useful when matching optional subfields).
276 Qualifiers are a list of identifiers which control the type,
277 signedness and endianness of the field. Permissible qualifiers are:
279 - [int] (field has an integer type)
280 - [bitstring] (field is a bitstring type)
281 - [signed] (field is signed)
282 - [unsigned] (field is unsigned)
283 - [bigendian] (field is big endian - a.k.a network byte order)
284 - [littleendian] (field is little endian - a.k.a Intel byte order)
285 - [nativeendian] (field is same endianness as the machine)
287 The default settings are [int], [unsigned], [bigendian].
289 Note that many of these qualifiers cannot be used together,
290 eg. bitstrings do not have endianness. The syntax extension should
291 give you a compile-time error if you use incompatible qualifiers.
293 {3 Other cases in bitmatch}
295 As well as a list of fields, it is possible to name the
296 bitstring and/or have a default match case:
301 (* Default match case. *)
305 (* Default match case, with 'pkt' bound to the whole bitstring. *)
308 {2 Constructing bitstrings}
310 Bitstrings may be constructed using the [BITSTRING] operator (as an
311 expression). The [BITSTRING] operator takes a list of fields,
312 similar to the list of fields for matching:
322 (* Constructs a 16-bit bitstring with the first four bits containing
323 the integer 1, and the following 12 bits containing the integer 10,
324 arranged in network byte order. *)
326 Bitmatch.hexdump_bitstring stdout bits ;;
337 {2:integertypes Integer types}
350 {2 Safety and security considerations}
365 type bitstring = string * int * int
366 (** [bitstring] is the basic type used to store bitstrings.
368 The type contains the underlying data (a string),
369 the current bit offset within the string and the
370 current bit length of the string (counting from the
371 bit offset). Note that the offsets are bits, not bytes.
373 Normally you don't need to use the bitstring type
374 directly, since there are functions and syntax
375 extensions which hide the details.
376 See {!bitstring_of_file}, {!hexdump_bitstring},
380 (** {3 Exceptions} *)
382 exception Construct_failure of string * string * int * int
383 (** [Construct_failure (message, file, line, char)] may be
384 raised by the [BITSTRING] constructor.
386 Common reasons are that values are out of range of
387 the fields that contain them, or that computed lengths
388 are impossible (eg. negative length bitfields).
390 [message] is the error message.
392 [file], [line] and [char] point to the original source
393 location of the [BITSTRING] constructor that failed.
396 (** {3 Bitstrings} *)
398 val empty_bitstring : bitstring
399 (** [empty_bitstring] is the empty, zero-length bitstring. *)
401 val create_bitstring : int -> bitstring
402 (** [create_bitstring n] creates an [n] bit bitstring
403 containing all zeroes. *)
405 val make_bitstring : int -> char -> bitstring
406 (** [make_bitstring n c] creates an [n] bit bitstring
407 containing the repeated 8 bit pattern in [c].
409 For example, [make_bitstring 16 '\x5a'] will create
410 the bitstring [0x5a5a] or in binary [0101 1010 0101 1010].
412 Note that the length is in bits, not bytes. *)
414 val bitstring_of_chan : in_channel -> bitstring
415 (** [bitstring_of_chan chan] loads the contents of
416 the input channel [chan] as a bitstring.
418 The length of the final bitstring is determined
419 by the remaining input in [chan], but will always
420 be a multiple of 8 bits. *)
422 val bitstring_of_file : string -> bitstring
423 (** [bitstring_of_file filename] loads the named file
426 val hexdump_bitstring : out_channel -> bitstring -> unit
427 (** [hexdump_bitstring chan bitstring] prints the bitstring
428 to the output channel in a format similar to the
429 Unix command [hexdump -C]. *)
431 val bitstring_length : bitstring -> int
432 (** [bitstring_length bitstring] returns the length of
433 the bitstring in bits. *)
435 (** {3 Bitstring buffer} *)
439 val create : unit -> t
440 val contents : t -> bitstring
441 val add_bits : t -> string -> int -> unit
442 val add_bit : t -> bool -> unit
443 val add_byte : t -> int -> unit
445 (** Buffers are mainly used by the [BITSTRING] constructor, but
446 may also be useful for end users. They work much like the
447 standard library [Buffer] module. *)
449 (** {3 Miscellaneous} *)
452 (** Set this variable to true to enable extended debugging.
453 This only works if debugging was also enabled in the
454 [pa_bitmatch.ml] file at compile time, otherwise it
459 (* Private functions, called from generated code. Do not use
460 * these directly - they are not safe.
463 val extract_bitstring : string -> int -> int -> int -> bitstring * int * int
465 val extract_remainder : string -> int -> int -> bitstring * int * int
467 val extract_bit : string -> int -> int -> int -> bool * int * int
469 val extract_char_unsigned : string -> int -> int -> int -> int * int * int
471 val extract_int_be_unsigned : string -> int -> int -> int -> int * int * int
473 val extract_int_le_unsigned : string -> int -> int -> int -> int * int * int
475 val extract_int32_be_unsigned : string -> int -> int -> int -> int32 * int * int
477 val extract_int32_le_unsigned : string -> int -> int -> int -> int32 * int * int
479 val extract_int64_be_unsigned : string -> int -> int -> int -> int64 * int * int
481 val construct_bit : Buffer.t -> bool -> int -> unit
483 val construct_char_unsigned : Buffer.t -> int -> int -> exn -> unit
485 val construct_int_be_unsigned : Buffer.t -> int -> int -> exn -> unit
487 val construct_int64_be_unsigned : Buffer.t -> int64 -> int -> exn -> unit