X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=bitmatch.mli;h=90f6acccdfd664602b443e43ba780a8fc18e3819;hb=e87f0879fef8e32e7ae7f7103f420c1612f3863f;hp=eb24575adbe0c4d1b05423a05f5e1e3b47cc5859;hpb=535e666b821198395c4ce8e2032436dcc9fe5828;p=ocaml-bitstring.git diff --git a/bitmatch.mli b/bitmatch.mli index eb24575..90f6acc 100644 --- a/bitmatch.mli +++ b/bitmatch.mli @@ -15,7 +15,7 @@ * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * - * $Id: bitmatch.mli,v 1.15 2008-04-25 12:08:51 rjones Exp $ + * $Id: bitmatch.mli,v 1.20 2008-05-08 21:28:28 rjones Exp $ *) (** @@ -29,7 +29,7 @@ this module to both parse and generate binary formats, for example, communications protocols, disk formats and binary files. - {{:http://et.redhat.com/~rjones/bitmatch/}OCaml bitmatch website} + {{:http://code.google.com/p/bitmatch/}OCaml bitmatch website} {2 Examples} @@ -186,7 +186,7 @@ let make_message typ subtype param = match then the standard library [Match_failure] exception is thrown. - Patterns look a bit different from normal match patterns. The + Patterns look a bit different from normal match patterns. They consist of a list of bitfields separated by [;] where each bitfield contains a bind variable, the width (in bits) of the field, and other information. Some example patterns: @@ -255,6 +255,15 @@ bitmatch bits with the integer 4 or the integer 6. *) ]} + One may also match on strings: + +{[ +| { "MAGIC" : 5*8 : string } -> ... + + (* Only matches if the string "MAGIC" appears at the start + of the input. *) +]} + {3:patternfieldreference Pattern field reference} The exact format of each pattern field is: @@ -285,6 +294,7 @@ bitmatch bits with signedness and endianness of the field. Permissible qualifiers are: - [int] (field has an integer type) + - [string] (field is a string type) - [bitstring] (field is a bitstring type) - [signed] (field is signed) - [unsigned] (field is unsigned) @@ -371,9 +381,8 @@ Bitmatch.hexdump_bitstring stdout bits ;; computed expression. Detection of compile-time constants is quite simplistic so only an - immediate, simple integer is recognised as a constant and anything - else is considered a computed expression, even expressions such as - [5-2] which are obviously (to our eyes) constant. + simple integer literals and simple expressions (eg. [5*8]) are + recognized as constants. In any case the bit size of an integer is limited to the range \[1..64\]. This is detected as a compile-time error if that is @@ -433,16 +442,16 @@ Bitmatch.hexdump_bitstring stdout bits ;; overflows. In addition to OCaml's normal bounds checks, we check that field lengths are >= 0, and many additional checks. - Denial of service attacks are more problematic although we still - believe that the library is robust. We only work forwards through - the bitstring, thus computation will eventually terminate. As for - computed lengths, code such as this is thought to be secure: + Denial of service attacks are more problematic. We only work + forwards through the bitstring, thus computation will eventually + terminate. As for computed lengths, code such as this is thought + to be secure: -{[ -bitmatch bits with -| { len : 64; - buffer : Int64.to_int len : bitstring } -> -]} + {[ + bitmatch bits with + | { len : 64; + buffer : Int64.to_int len : bitstring } -> + ]} The [len] field can be set arbitrarily large by an attacker, but when pattern-matching against the [buffer] field this merely causes @@ -451,22 +460,28 @@ bitmatch bits with allocation of sub-bitstrings is efficient and doesn't involve an arbitary-sized allocation or any copying. - The main protection against attackers should therefore be to ensure - that the main program will only read input bitstrings up to a - certain length, which is outside the scope of this library. + However the above does not necessarily apply to strings used in + matching, since they may cause the library to use the + {!Bitmatch.string_of_bitstring} function, which allocates a string. + So you should take care if you use the [string] type particularly + with a computed length that is derived from external input. + + The main protection against attackers should be to ensure that the + main program will only read input bitstrings up to a certain + length, which is outside the scope of this library. {3 Security on output} As with the input side, computed lengths are believed to be safe. For example: -{[ -let len = read_untrusted_source () in -let buffer = allocate_bitstring () in -BITSTRING { - buffer : len : bitstring -} -]} + {[ + let len = read_untrusted_source () in + let buffer = allocate_bitstring () in + BITSTRING { + buffer : len : bitstring + } + ]} This code merely causes a check that buffer's length is the same as [len]. However the program function [allocate_bitstring] must @@ -517,13 +532,15 @@ type bitstring = string * int * int The type contains the underlying data (a string), the current bit offset within the string and the current bit length of the string (counting from the - bit offset). Note that the offsets are bits, not bytes. + bit offset). Note that the offset and length are + in {b bits}, not bytes. Normally you don't need to use the bitstring type directly, since there are functions and syntax extensions which hide the details. - See {!bitstring_of_file}, {!hexdump_bitstring}, - {!bitstring_length}. + + See also {!bitstring_of_string}, {!bitstring_of_file}, + {!hexdump_bitstring}, {!bitstring_length}. *) (** {3 Exceptions} *) @@ -560,22 +577,46 @@ val make_bitstring : int -> char -> bitstring Note that the length is in bits, not bytes. *) +val bitstring_of_string : string -> bitstring +(** [bitstring_of_string str] creates a bitstring + of length [String.length str * 8] (bits) containing the + bits in [str]. + + Note that the bitstring uses [str] as the underlying + string (see the representation of {!bitstring}) so you + should not change [str] after calling this. *) + +val bitstring_of_file : string -> bitstring +(** [bitstring_of_file filename] loads the named file + into a bitstring. *) + val bitstring_of_chan : in_channel -> bitstring (** [bitstring_of_chan chan] loads the contents of the input channel [chan] as a bitstring. The length of the final bitstring is determined by the remaining input in [chan], but will always - be a multiple of 8 bits. *) + be a multiple of 8 bits. -val bitstring_of_file : string -> bitstring -(** [bitstring_of_file filename] loads the named file - into a bitstring. *) + See also {!bitstring_of_chan_max}. *) -val hexdump_bitstring : out_channel -> bitstring -> unit -(** [hexdump_bitstring chan bitstring] prints the bitstring - to the output channel in a format similar to the - Unix command [hexdump -C]. *) +val bitstring_of_chan_max : in_channel -> int -> bitstring +(** [bitstring_of_chan_max chan max] works like + {!bitstring_of_chan} but will only read up to + [max] bytes from the channel (or fewer if the end of input + occurs before that). *) + +val bitstring_of_file_descr : Unix.file_descr -> bitstring +(** [bitstring_of_file_descr fd] loads the contents of + the file descriptor [fd] as a bitstring. + + See also {!bitstring_of_chan}, {!bitstring_of_file_descr_max}. *) + +val bitstring_of_file_descr_max : Unix.file_descr -> int -> bitstring +(** [bitstring_of_file_descr_max fd max] works like + {!bitstring_of_file_descr} but will only read up to + [max] bytes from the channel (or fewer if the end of input + occurs before that). *) val bitstring_length : bitstring -> int (** [bitstring_length bitstring] returns the length of @@ -588,10 +629,14 @@ val string_of_bitstring : bitstring -> string This function is inefficient. In the best case when the bitstring is nicely byte-aligned we do a [String.sub] operation. If the bitstring isn't aligned then this involves a lot of bit twiddling - and is particularly inefficient. + and is particularly inefficient. *) - XXX This function wouldn't be needed so much if the [bitmatch] - operator allowed us to pattern-match on strings. *) +(** {3 Printing bitstrings} *) + +val hexdump_bitstring : out_channel -> bitstring -> unit +(** [hexdump_bitstring chan bitstring] prints the bitstring + to the output channel in a format similar to the + Unix command [hexdump -C]. *) (** {3 Bitstring buffer} *) @@ -639,10 +684,16 @@ val extract_int32_le_unsigned : string -> int -> int -> int -> int32 * int * int val extract_int64_be_unsigned : string -> int -> int -> int -> int64 * int * int -val construct_bit : Buffer.t -> bool -> int -> unit +val extract_int64_le_unsigned : string -> int -> int -> int -> int64 * int * int + +val construct_bit : Buffer.t -> bool -> int -> exn -> unit val construct_char_unsigned : Buffer.t -> int -> int -> exn -> unit val construct_int_be_unsigned : Buffer.t -> int -> int -> exn -> unit +val construct_int32_be_unsigned : Buffer.t -> int32 -> int -> exn -> unit + val construct_int64_be_unsigned : Buffer.t -> int64 -> int -> exn -> unit + +val construct_string : Buffer.t -> string -> unit