Implement dropbits, takebits, subbitstring.

[ocaml-bitstring.git] / bitmatch.mli
diff --git a/bitmatch.mli b/bitmatch.mli

index 90f6acc..cd59f95 100644 (file)
--- a/bitmatch.mli
+++ b/bitmatch.mli
@@ -4,7 +4,8 @@
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2 of the License, or (at your option) any later version,
+ * with the OCaml linking exception described in COPYING.LIB.
   *
   * This library is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,7 +16,7 @@
   * License along with this library; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   *
- * $Id: bitmatch.mli,v 1.20 2008-05-08 21:28:28 rjones Exp $
+ * $Id$
   *)
  
  (**
@@ -173,7 +174,7 @@ let make_message typ subtype param =
  
     The general form of [bitmatch] is:
  
-   [bitmatch {] {i bitstring-expression} [} with]
+   [bitmatch] {i bitstring-expression} [with]
  
     [| {] {i pattern} [} ->] {i code}
  
@@ -290,19 +291,24 @@ bitmatch bits with
     A bitstring field of length 0 matches an empty bitstring
     (occasionally useful when matching optional subfields).
  
-   Qualifiers are a list of identifiers which control the type,
+   Qualifiers are a list of identifiers/expressions which control the type,
     signedness and endianness of the field.  Permissible qualifiers are:
  
-   - [int] (field has an integer type)
-   - [string] (field is a string type)
-   - [bitstring] (field is a bitstring type)
-   - [signed] (field is signed)
-   - [unsigned] (field is unsigned)
-   - [bigendian] (field is big endian - a.k.a network byte order)
-   - [littleendian] (field is little endian - a.k.a Intel byte order)
-   - [nativeendian] (field is same endianness as the machine)
-
-   The default settings are [int], [unsigned], [bigendian].
+   - [int]: field has an integer type
+   - [string]: field is a string type
+   - [bitstring]: field is a bitstring type
+   - [signed]: field is signed
+   - [unsigned]: field is unsigned
+   - [bigendian]: field is big endian - a.k.a network byte order
+   - [littleendian]: field is little endian - a.k.a Intel byte order
+   - [nativeendian]: field is same endianness as the machine
+   - [endian (expr)]: [expr] should be an expression which evaluates to
+       a {!endian} type, ie. [LittleEndian], [BigEndian] or [NativeEndian].
+       The expression is an arbitrary OCaml expression and can use the
+       value of earlier fields in the bitmatch.
+   - [offset (expr)]: see {{:#computedoffsets}computed offsets} below.
+
+   The default settings are [int], [unsigned], [bigendian], no offset.
  
     Note that many of these qualifiers cannot be used together,
     eg. bitstrings do not have endianness.  The syntax extension should
@@ -380,7 +386,7 @@ Bitmatch.hexdump_bitstring stdout bits ;;
     length expression in the field is a compile-time constant or a
     computed expression.
  
-   Detection of compile-time constants is quite simplistic so only an
+   Detection of compile-time constants is quite simplistic so only
     simple integer literals and simple expressions (eg. [5*8]) are
     recognized as constants.
  
@@ -413,21 +419,51 @@ Bitmatch.hexdump_bitstring stdout bits ;;
     still need to be a runtime check to enforce the
     size).
  
+   {2:computedoffsets Computed offsets}
+
+   You can add an [offset(..)] qualifier to bitmatch patterns in order
+   to move the current offset within the bitstring forwards.
+
+   For example:
+
+{[
+bitmatch bits with
+| { field1 : 8;
+    field2 : 8 : offset(160) } -> ...
+]}
+
+   matches [field1] at the start of the bitstring and [field2]
+   at 160 bits into the bitstring.  The middle 152 bits go
+   unmatched (ie. can be anything).
+
+   The generated code is efficient.  If field lengths and offsets
+   are known to be constant at compile time, then almost all
+   runtime checks are avoided.  Non-constant field lengths and/or
+   non-constant offsets can result in more runtime checks being added.
+
+   Note that moving the offset backwards, and moving the offset in
+   [BITSTRING] constructors, are both not supported at present.
+
+   {2 Named patterns and persistent patterns}
+
+   Please see {!Bitmatch_persistent} for documentation on this subject.
+
     {2 Compiling}
  
     Using the compiler directly you can do:
  
     {v
     ocamlc -I +bitmatch \
-     -pp "camlp4o `ocamlc -where`/bitmatch/pa_bitmatch.cmo" \
-     bitmatch.cma test.ml -o test
+     -pp "camlp4of bitmatch.cma bitmatch_persistent.cma \
+            `ocamlc -where`/bitmatch/pa_bitmatch.cmo" \
+     unix.cma bitmatch.cma test.ml -o test
     v}
  
     Simpler method using findlib:
  
     {v
     ocamlfind ocamlc \
-     -package bitmatch.syntax -syntax bitmatch.syntax \
+     -package bitmatch,bitmatch.syntax -syntax bitmatch.syntax \
       -linkpkg test.ml -o test
     v}
  
@@ -479,7 +515,7 @@ Bitmatch.hexdump_bitstring stdout bits ;;
     let len = read_untrusted_source () in
     let buffer = allocate_bitstring () in
     BITSTRING {
-   buffer : len : bitstring
+     buffer : len : bitstring
     }
     ]}
  
@@ -526,6 +562,11 @@ Bitmatch.hexdump_bitstring stdout bits ;;
     {3 Types}
  *)
  
+type endian = BigEndian | LittleEndian | NativeEndian
+
+val string_of_endian : endian -> string
+(** Endianness. *)
+
  type bitstring = string * int * int
  (** [bitstring] is the basic type used to store bitstrings.
  
@@ -559,7 +600,46 @@ exception Construct_failure of string * string * int * int
      location of the [BITSTRING] constructor that failed.
  *)
  
-(** {3 Bitstrings} *)
+(** {3 Bitstring manipulation} *)
+
+val bitstring_length : bitstring -> int
+(** [bitstring_length bitstring] returns the length of
+    the bitstring in bits.
+
+    Note this just returns the third field in the {!bitstring} tuple. *)
+
+val subbitstring : bitstring -> int -> int -> bitstring
+(** [subbitstring bits off len] returns a sub-bitstring
+    of the bitstring, starting at offset [off] bits and
+    with length [len] bits.
+
+    If the original bitstring is not long enough to do this
+    then the function raises [Invalid_argument "subbitstring"].
+
+    Note that this function just changes the offset and length
+    fields of the {!bitstring} tuple, so is very efficient. *)
+
+val dropbits : int -> bitstring -> bitstring
+(** Drop the first n bits of the bitstring and return a new
+    bitstring which is shorter by n bits.
+
+    If the length of the original bitstring is less than n bits,
+    this raises [Invalid_argument "dropbits"].
+
+    Note that this function just changes the offset and length
+    fields of the {!bitstring} tuple, so is very efficient. *)
+
+val takebits : int -> bitstring -> bitstring
+(** Take the first n bits of the bitstring and return a new
+    bitstring which is exactly n bits long.
+
+    If the length of the original bitstring is less than n bits,
+    this raises [Invalid_argument "takebits"].
+
+    Note that this function just changes the offset and length
+    fields of the {!bitstring} tuple, so is very efficient. *)
+
+(** {3 Constructing bitstrings} *)
  
  val empty_bitstring : bitstring
  (** [empty_bitstring] is the empty, zero-length bitstring. *)
@@ -575,7 +655,16 @@ val make_bitstring : int -> char -> bitstring
      For example, [make_bitstring 16 '\x5a'] will create
      the bitstring [0x5a5a] or in binary [0101 1010 0101 1010].
  
-    Note that the length is in bits, not bytes. *)
+    Note that the length is in bits, not bytes.  The length does NOT
+    need to be a multiple of 8. *)
+
+val zeroes_bitstring : int -> bitstring
+(** [zeroes_bitstring] creates an [n] bit bitstring of all 0's.
+
+    Actually this is the same as {!create_bitstring}. *)
+
+val ones_bitstring : int -> bitstring
+(** [ones_bitstring] creates an [n] bit bitstring of all 1's. *)
  
  val bitstring_of_string : string -> bitstring
  (** [bitstring_of_string str] creates a bitstring
@@ -618,9 +707,7 @@ val bitstring_of_file_descr_max : Unix.file_descr -> int -> bitstring
      [max] bytes from the channel (or fewer if the end of input
      occurs before that). *)
  
-val bitstring_length : bitstring -> int
-(** [bitstring_length bitstring] returns the length of
-    the bitstring in bits. *)
+(** {3 Converting bitstrings} *)
  
  val string_of_bitstring : bitstring -> string
  (** [string_of_bitstring bitstring] converts a bitstring to a string
@@ -629,7 +716,35 @@ val string_of_bitstring : bitstring -> string
      This function is inefficient.  In the best case when the bitstring
      is nicely byte-aligned we do a [String.sub] operation.  If the
      bitstring isn't aligned then this involves a lot of bit twiddling
-    and is particularly inefficient. *)
+    and is particularly inefficient.
+
+    If the bitstring is not a multiple of 8 bits wide then the
+    final byte of the string contains the high bits set to the
+    remaining bits and the low bits set to 0. *)
+
+val bitstring_to_file : bitstring -> string -> unit
+(** [bitstring_to_file bits filename] writes the bitstring [bits]
+    to the file [filename].  It overwrites the output file.
+
+    Some restrictions apply, see {!bitstring_to_chan}. *)
+
+val bitstring_to_chan : bitstring -> out_channel -> unit
+(** [bitstring_to_file bits filename] writes the bitstring [bits]
+    to the channel [chan].
+
+    Channels are made up of bytes, bitstrings can be any bit length
+    including fractions of bytes.  So this function only works
+    if the length of the bitstring is an exact multiple of 8 bits
+    (otherwise it raises [Invalid_argument "bitstring_to_chan"]).
+
+    Furthermore the function is efficient only in the case where
+    the bitstring is stored fully aligned, otherwise it has to
+    do inefficient bit twiddling like {!string_of_bitstring}.
+
+    In the common case where the bitstring was generated by the
+    [BITSTRING] operator and is an exact multiple of 8 bits wide,
+    then this function will always work efficiently.
+*)
  
  (** {3 Printing bitstrings} *)
  
@@ -654,6 +769,12 @@ end
  
  (** {3 Miscellaneous} *)
  
+val package : string
+(** The package name, always ["ocaml-bitmatch"] *)
+
+val version : string
+(** The package version as a string. *)
+
  val debug : bool ref
  (** Set this variable to true to enable extended debugging.
      This only works if debugging was also enabled in the
@@ -678,22 +799,48 @@ val extract_int_be_unsigned : string -> int -> int -> int -> int * int * int
  
  val extract_int_le_unsigned : string -> int -> int -> int -> int * int * int
  
+val extract_int_ne_unsigned : string -> int -> int -> int -> int * int * int
+
+val extract_int_ee_unsigned : endian -> string -> int -> int -> int -> int * int * int
+
  val extract_int32_be_unsigned : string -> int -> int -> int -> int32 * int * int
  
  val extract_int32_le_unsigned : string -> int -> int -> int -> int32 * int * int
  
+val extract_int32_ne_unsigned : string -> int -> int -> int -> int32 * int * int
+
+val extract_int32_ee_unsigned : endian -> string -> int -> int -> int -> int32 * int * int
+
  val extract_int64_be_unsigned : string -> int -> int -> int -> int64 * int * int
  
  val extract_int64_le_unsigned : string -> int -> int -> int -> int64 * int * int
  
+val extract_int64_ne_unsigned : string -> int -> int -> int -> int64 * int * int
+
+val extract_int64_ee_unsigned : endian -> string -> int -> int -> int -> int64 * int * int
+
  val construct_bit : Buffer.t -> bool -> int -> exn -> unit
  
  val construct_char_unsigned : Buffer.t -> int -> int -> exn -> unit
  
  val construct_int_be_unsigned : Buffer.t -> int -> int -> exn -> unit
  
+val construct_int_ne_unsigned : Buffer.t -> int -> int -> exn -> unit
+
+val construct_int_ee_unsigned : endian -> Buffer.t -> int -> int -> exn -> unit
+
  val construct_int32_be_unsigned : Buffer.t -> int32 -> int -> exn -> unit
  
+val construct_int32_ne_unsigned : Buffer.t -> int32 -> int -> exn -> unit
+
+val construct_int32_ee_unsigned : endian -> Buffer.t -> int32 -> int -> exn -> unit
+
  val construct_int64_be_unsigned : Buffer.t -> int64 -> int -> exn -> unit
  
+val construct_int64_ne_unsigned : Buffer.t -> int64 -> int -> exn -> unit
+
+val construct_int64_ee_unsigned : endian -> Buffer.t -> int64 -> int -> exn -> unit
+
  val construct_string : Buffer.t -> string -> unit
+
+val construct_bitstring : Buffer.t -> bitstring -> unit