Include CHANGES file in tarball.

[ocaml-bitstring.git] / bitmatch.ml
diff --git a/bitmatch.ml b/bitmatch.ml

index 0c9edc0..e382504 100644 (file)
--- a/bitmatch.ml
+++ b/bitmatch.ml
@@ -1,9 +1,28 @@
  (* Bitmatch library.
- * $Id: bitmatch.ml,v 1.5 2008-04-01 17:05:37 rjones Exp $
+ * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * $Id$
   *)
  
  open Printf
  
+include Bitmatch_types
+include Bitmatch_config
+
  (* Enable runtime debug messages.  Must also have been enabled
   * in pa_bitmatch.ml.
   *)
@@ -25,6 +44,8 @@ let make_bitstring len c = String.make ((len+7) lsr 3) c, 0, len
  
  let create_bitstring len = make_bitstring len '\000'
  
+let bitstring_of_string str = str, 0, String.length str lsl 3
+
  let bitstring_of_chan chan =
    let tmpsize = 16384 in
    let buf = Buffer.create tmpsize in
@@ -35,15 +56,258 @@ let bitstring_of_chan chan =
    done;
    Buffer.contents buf, 0, Buffer.length buf lsl 3
  
+let bitstring_of_chan_max chan max =
+  let tmpsize = 16384 in
+  let buf = Buffer.create tmpsize in
+  let tmp = String.create tmpsize in
+  let len = ref 0 in
+  let rec loop () =
+    if !len < max then (
+      let r = min tmpsize (max - !len) in
+      let n = input chan tmp 0 r in
+      if n > 0 then (
+       Buffer.add_substring buf tmp 0 n;
+       len := !len + n;
+       loop ()
+      )
+    )
+  in
+  loop ();
+  Buffer.contents buf, 0, !len lsl 3
+
+let bitstring_of_file_descr fd =
+  let tmpsize = 16384 in
+  let buf = Buffer.create tmpsize in
+  let tmp = String.create tmpsize in
+  let n = ref 0 in
+  while n := Unix.read fd tmp 0 tmpsize; !n > 0 do
+    Buffer.add_substring buf tmp 0 !n;
+  done;
+  Buffer.contents buf, 0, Buffer.length buf lsl 3
+
+let bitstring_of_file_descr_max fd max =
+  let tmpsize = 16384 in
+  let buf = Buffer.create tmpsize in
+  let tmp = String.create tmpsize in
+  let len = ref 0 in
+  let rec loop () =
+    if !len < max then (
+      let r = min tmpsize (max - !len) in
+      let n = Unix.read fd tmp 0 r in
+      if n > 0 then (
+       Buffer.add_substring buf tmp 0 n;
+       len := !len + n;
+       loop ()
+      )
+    )
+  in
+  loop ();
+  Buffer.contents buf, 0, !len lsl 3
+
  let bitstring_of_file fname =
    let chan = open_in_bin fname in
-  let bs = bitstring_of_chan chan in
-  close_in chan;
-  bs
+  try
+    let bs = bitstring_of_chan chan in
+    close_in chan;
+    bs
+  with exn ->
+    close_in chan;
+    raise exn
  
  let bitstring_length (_, _, len) = len
  
  (*----------------------------------------------------------------------*)
+(* Bitwise functions.
+ *
+ * We try to isolate all bitwise functions within these modules.
+ *)
+
+module I = struct
+  (* Bitwise operations on ints.  Note that we assume int <= 31 bits. *)
+  let (<<) = (lsl)
+  let (>>) = (lsr)
+  external to_int : int -> int = "%identity"
+  let zero = 0
+  let one = 1
+  let minus_one = -1
+  let ff = 0xff
+
+  (* Create a mask so many bits wide. *)
+  let mask bits =
+    if bits < 30 then
+      pred (one << bits)
+    else if bits = 30 then
+      max_int
+    else if bits = 31 then
+      minus_one
+    else
+      invalid_arg "Bitmatch.I.mask"
+
+  (* Byte swap an int of a given size. *)
+  let byteswap v bits =
+    if bits <= 8 then v
+    else if bits <= 16 then (
+      let shift = bits-8 in
+      let v1 = v >> shift in
+      let v2 = (v land (mask shift)) << 8 in
+      v2 lor v1
+    ) else if bits <= 24 then (
+      let shift = bits - 16 in
+      let v1 = v >> (8+shift) in
+      let v2 = ((v >> shift) land ff) << 8 in
+      let v3 = (v land (mask shift)) << 16 in
+      v3 lor v2 lor v1
+    ) else (
+      let shift = bits - 24 in
+      let v1 = v >> (16+shift) in
+      let v2 = ((v >> (8+shift)) land ff) << 8 in
+      let v3 = ((v >> shift) land ff) << 16 in
+      let v4 = (v land (mask shift)) << 24 in
+      v4 lor v3 lor v2 lor v1
+    )
+
+  (* Check a value is in range 0 .. 2^bits-1. *)
+  let range_unsigned v bits =
+    let mask = lnot (mask bits) in
+    (v land mask) = zero
+
+  (* Call function g on the top bits, then f on each full byte
+   * (big endian - so start at top).
+   *)
+  let rec map_bytes_be g f v bits =
+    if bits >= 8 then (
+      map_bytes_be g f (v >> 8) (bits-8);
+      let lsb = v land ff in
+      f (to_int lsb)
+    ) else if bits > 0 then (
+      let lsb = v land (mask bits) in
+      g (to_int lsb) bits
+    )
+end
+
+module I32 = struct
+  (* Bitwise operations on int32s.  Note we try to keep it as similar
+   * as possible to the I module above, to make it easier to track
+   * down bugs.
+   *)
+  let (<<) = Int32.shift_left
+  let (>>) = Int32.shift_right_logical
+  let (land) = Int32.logand
+  let (lor) = Int32.logor
+  let lnot = Int32.lognot
+  let pred = Int32.pred
+  let max_int = Int32.max_int
+  let to_int = Int32.to_int
+  let zero = Int32.zero
+  let one = Int32.one
+  let minus_one = Int32.minus_one
+  let ff = 0xff_l
+
+  (* Create a mask so many bits wide. *)
+  let mask bits =
+    if bits < 31 then
+      pred (one << bits)
+    else if bits = 31 then
+      max_int
+    else if bits = 32 then
+      minus_one
+    else
+      invalid_arg "Bitmatch.I32.mask"
+
+  (* Byte swap an int of a given size. *)
+  let byteswap v bits =
+    if bits <= 8 then v
+    else if bits <= 16 then (
+      let shift = bits-8 in
+      let v1 = v >> shift in
+      let v2 = (v land (mask shift)) << 8 in
+      v2 lor v1
+    ) else if bits <= 24 then (
+      let shift = bits - 16 in
+      let v1 = v >> (8+shift) in
+      let v2 = ((v >> shift) land ff) << 8 in
+      let v3 = (v land (mask shift)) << 16 in
+      v3 lor v2 lor v1
+    ) else (
+      let shift = bits - 24 in
+      let v1 = v >> (16+shift) in
+      let v2 = ((v >> (8+shift)) land ff) << 8 in
+      let v3 = ((v >> shift) land ff) << 16 in
+      let v4 = (v land (mask shift)) << 24 in
+      v4 lor v3 lor v2 lor v1
+    )
+
+  (* Check a value is in range 0 .. 2^bits-1. *)
+  let range_unsigned v bits =
+    let mask = lnot (mask bits) in
+    (v land mask) = zero
+
+  (* Call function g on the top bits, then f on each full byte
+   * (big endian - so start at top).
+   *)
+  let rec map_bytes_be g f v bits =
+    if bits >= 8 then (
+      map_bytes_be g f (v >> 8) (bits-8);
+      let lsb = v land ff in
+      f (to_int lsb)
+    ) else if bits > 0 then (
+      let lsb = v land (mask bits) in
+      g (to_int lsb) bits
+    )
+end
+
+module I64 = struct
+  (* Bitwise operations on int64s.  Note we try to keep it as similar
+   * as possible to the I/I32 modules above, to make it easier to track
+   * down bugs.
+   *)
+  let (<<) = Int64.shift_left
+  let (>>) = Int64.shift_right_logical
+  let (land) = Int64.logand
+  let (lor) = Int64.logor
+  let lnot = Int64.lognot
+  let pred = Int64.pred
+  let max_int = Int64.max_int
+  let to_int = Int64.to_int
+  let zero = Int64.zero
+  let one = Int64.one
+  let minus_one = Int64.minus_one
+  let ff = 0xff_L
+
+  (* Create a mask so many bits wide. *)
+  let mask bits =
+    if bits < 63 then
+      pred (one << bits)
+    else if bits = 63 then
+      max_int
+    else if bits = 64 then
+      minus_one
+    else
+      invalid_arg "Bitmatch.I64.mask"
+
+  (* Byte swap an int of a given size. *)
+  (* let byteswap v bits = *)
+
+  (* Check a value is in range 0 .. 2^bits-1. *)
+  let range_unsigned v bits =
+    let mask = lnot (mask bits) in
+    (v land mask) = zero
+
+  (* Call function g on the top bits, then f on each full byte
+   * (big endian - so start at top).
+   *)
+  let rec map_bytes_be g f v bits =
+    if bits >= 8 then (
+      map_bytes_be g f (v >> 8) (bits-8);
+      let lsb = v land ff in
+      f (to_int lsb)
+    ) else if bits > 0 then (
+      let lsb = v land (mask bits) in
+      g (to_int lsb) bits
+    )
+end
+
+(*----------------------------------------------------------------------*)
  (* Extraction functions.
   *
   * NB: internal functions, called from the generated macros, and
@@ -146,6 +410,21 @@ let extract_int_be_unsigned data off len flen =
      ) in
    word, off+flen, len-flen
  
+let extract_int_le_unsigned data off len flen =
+  let v, off, len = extract_int_be_unsigned data off len flen in
+  let v = I.byteswap v flen in
+  v, off, len
+
+let extract_int_ne_unsigned =
+  if nativeendian = BigEndian
+  then extract_int_be_unsigned
+  else extract_int_le_unsigned
+
+let extract_int_ee_unsigned = function
+  | BigEndian -> extract_int_be_unsigned
+  | LittleEndian -> extract_int_le_unsigned
+  | NativeEndian -> extract_int_ne_unsigned
+
  let _make_int32_be c0 c1 c2 c3 =
    Int32.logor
      (Int32.logor
@@ -155,6 +434,15 @@ let _make_int32_be c0 c1 c2 c3 =
         (Int32.shift_left c2 8))
      c3
  
+let _make_int32_le c0 c1 c2 c3 =
+  Int32.logor
+    (Int32.logor
+       (Int32.logor
+         (Int32.shift_left c3 24)
+         (Int32.shift_left c2 16))
+       (Int32.shift_left c1 8))
+    c0
+
  (* Extract exactly 32 bits.  We have to consider endianness and signedness. *)
  let extract_int32_be_unsigned data off len flen =
    let byteoff = off lsr 3 in
@@ -187,6 +475,21 @@ let extract_int32_be_unsigned data off len flen =
      ) in
    word, off+flen, len-flen
  
+let extract_int32_le_unsigned data off len flen =
+  let v, off, len = extract_int32_be_unsigned data off len flen in
+  let v = I32.byteswap v flen in
+  v, off, len
+
+let extract_int32_ne_unsigned =
+  if nativeendian = BigEndian
+  then extract_int32_be_unsigned
+  else extract_int32_le_unsigned
+
+let extract_int32_ee_unsigned = function
+  | BigEndian -> extract_int32_be_unsigned
+  | LittleEndian -> extract_int32_le_unsigned
+  | NativeEndian -> extract_int32_ne_unsigned
+
  let _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 =
    Int64.logor
      (Int64.logor
@@ -204,6 +507,9 @@ let _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 =
         (Int64.shift_left c6 8))
      c7
  
+let _make_int64_le c0 c1 c2 c3 c4 c5 c6 c7 =
+  _make_int64_be c7 c6 c5 c4 c3 c2 c1 c0
+
  (* Extract [1..64] bits.  We have to consider endianness and signedness. *)
  let extract_int64_be_unsigned data off len flen =
    let byteoff = off lsr 3 in
@@ -248,6 +554,59 @@ let extract_int64_be_unsigned data off len flen =
      ) in
    word, off+flen, len-flen
  
+let extract_int64_le_unsigned data off len flen =
+  let byteoff = off lsr 3 in
+
+  let strlen = String.length data in
+
+  let word =
+    (* Optimize the common (byte-aligned) case. *)
+    if off land 7 = 0 then (
+      let word =
+       let c0 = _get_byte64 data byteoff strlen in
+       let c1 = _get_byte64 data (byteoff+1) strlen in
+       let c2 = _get_byte64 data (byteoff+2) strlen in
+       let c3 = _get_byte64 data (byteoff+3) strlen in
+       let c4 = _get_byte64 data (byteoff+4) strlen in
+       let c5 = _get_byte64 data (byteoff+5) strlen in
+       let c6 = _get_byte64 data (byteoff+6) strlen in
+       let c7 = _get_byte64 data (byteoff+7) strlen in
+       _make_int64_le c0 c1 c2 c3 c4 c5 c6 c7 in
+      Int64.logand word (I64.mask flen)
+    ) else (
+      (* Extract the next 64 bits, slow method. *)
+      let word =
+       let c0, off, len = extract_char_unsigned data off len 8 in
+       let c1, off, len = extract_char_unsigned data off len 8 in
+       let c2, off, len = extract_char_unsigned data off len 8 in
+       let c3, off, len = extract_char_unsigned data off len 8 in
+       let c4, off, len = extract_char_unsigned data off len 8 in
+       let c5, off, len = extract_char_unsigned data off len 8 in
+       let c6, off, len = extract_char_unsigned data off len 8 in
+       let c7, _, _ = extract_char_unsigned data off len 8 in
+       let c0 = Int64.of_int c0 in
+       let c1 = Int64.of_int c1 in
+       let c2 = Int64.of_int c2 in
+       let c3 = Int64.of_int c3 in
+       let c4 = Int64.of_int c4 in
+       let c5 = Int64.of_int c5 in
+       let c6 = Int64.of_int c6 in
+       let c7 = Int64.of_int c7 in
+       _make_int64_le c0 c1 c2 c3 c4 c5 c6 c7 in
+      Int64.logand word (I64.mask flen)
+    ) in
+  word, off+flen, len-flen
+
+let extract_int64_ne_unsigned =
+  if nativeendian = BigEndian
+  then extract_int64_be_unsigned
+  else extract_int64_le_unsigned
+
+let extract_int64_ee_unsigned = function
+  | BigEndian -> extract_int64_be_unsigned
+  | LittleEndian -> extract_int64_le_unsigned
+  | NativeEndian -> extract_int64_ne_unsigned
+
  (*----------------------------------------------------------------------*)
  (* Constructor functions. *)
  
@@ -327,13 +686,15 @@ module Buffer = struct
            *)
           let slenbytes = slen lsr 3 in
           if slenbytes > 0 then Buffer.add_substring buf str 0 slenbytes;
-         t.last <- Char.code str.[slenbytes] lsl (8 - (slen land 7))
+         let last = Char.code str.[slenbytes] in (* last char *)
+         let mask = 0xff lsl (8 - (slen land 7)) in
+         t.last <- last land mask
         );
         t.len <- len + slen
        ) else (
         (* Target buffer is unaligned.  Copy whole bytes using
          * add_byte which knows how to deal with an unaligned
-        * target buffer, then call _add_bits for the remaining < 8 bits.
+        * target buffer, then call add_bit for the remaining < 8 bits.
          *
          * XXX This is going to be dog-slow.
          *)
@@ -342,13 +703,20 @@ module Buffer = struct
           let byte = Char.code str.[i] in
           add_byte t byte
         done;
-       _add_bits t (Char.code str.[slenbytes]) (slen - (slenbytes lsl 3))
+       let bitsleft = slen - (slenbytes lsl 3) in
+       if bitsleft > 0 then (
+         let c = Char.code str.[slenbytes] in
+         for i = 0 to bitsleft - 1 do
+           let bit = c land (0x80 lsr i) <> 0 in
+           add_bit t bit
+         done
+       )
        );
      )
  end
  
  (* Construct a single bit. *)
-let construct_bit buf b _ =
+let construct_bit buf b _ _ =
    Buffer.add_bit buf b
  
  (* Construct a field, flen = [2..8]. *)
@@ -360,31 +728,130 @@ let construct_char_unsigned buf v flen exn =
    else
      Buffer._add_bits buf v flen
  
-(* Generate a mask with the lower 'bits' bits set. *)
-let mask64 bits =
-  if bits < 63 then Int64.pred (Int64.shift_left 1L bits)
-  else if bits = 63 then Int64.max_int
-  else if bits = 64 then -1L
-  else invalid_arg "Bitmatch.mask64"
+(* Construct a field of up to 31 bits. *)
+let construct_int_be_unsigned buf v flen exn =
+  (* Check value is within range. *)
+  if not (I.range_unsigned v flen) then raise exn;
+  (* Add the bytes. *)
+  I.map_bytes_be (Buffer._add_bits buf) (Buffer.add_byte buf) v flen
+
+let construct_int_ne_unsigned =
+  if nativeendian = BigEndian
+  then construct_int_be_unsigned
+  else (*construct_int_le_unsigned*)
+    fun _ _ _ _ -> failwith "construct_int_le_unsigned"
+
+let construct_int_ee_unsigned = function
+  | BigEndian -> construct_int_be_unsigned
+  | LittleEndian -> (*construct_int_le_unsigned*)
+      (fun _ _ _ _ -> failwith "construct_int_le_unsigned")
+  | NativeEndian -> construct_int_ne_unsigned
+
+(* Construct a field of exactly 32 bits. *)
+let construct_int32_be_unsigned buf v flen _ =
+  Buffer.add_byte buf
+    (Int32.to_int (Int32.shift_right_logical v 24));
+  Buffer.add_byte buf
+    (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 16) 0xff_l)));
+  Buffer.add_byte buf
+    (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 8) 0xff_l)));
+  Buffer.add_byte buf
+    (Int32.to_int (Int32.logand v 0xff_l))
+
+let construct_int32_le_unsigned buf v flen _ =
+  Buffer.add_byte buf
+    (Int32.to_int (Int32.logand v 0xff_l));
+  Buffer.add_byte buf
+    (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 8) 0xff_l)));
+  Buffer.add_byte buf
+    (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 16) 0xff_l)));
+  Buffer.add_byte buf
+    (Int32.to_int (Int32.shift_right_logical v 24))
+
+let construct_int32_ne_unsigned =
+  if nativeendian = BigEndian
+  then construct_int32_be_unsigned
+  else construct_int32_le_unsigned
+
+let construct_int32_ee_unsigned = function
+  | BigEndian -> construct_int32_be_unsigned
+  | LittleEndian -> construct_int32_le_unsigned
+  | NativeEndian -> construct_int32_ne_unsigned
  
  (* Construct a field of up to 64 bits. *)
  let construct_int64_be_unsigned buf v flen exn =
    (* Check value is within range. *)
-  let m = Int64.lognot (mask64 flen) in
-  if Int64.logand v m <> 0L then raise exn;
-
+  if not (I64.range_unsigned v flen) then raise exn;
    (* Add the bytes. *)
-  let rec loop v flen =
-    if flen > 8 then (
-      loop (Int64.shift_right_logical v 8) (flen-8);
-      let lsb = Int64.to_int (Int64.logand v 0xffL) in
-      Buffer.add_byte buf lsb
-    ) else if flen > 0 then (
-      let lsb = Int64.to_int (Int64.logand v (mask64 flen)) in
-      Buffer._add_bits buf lsb flen
-    )
-  in
-  loop v flen
+  I64.map_bytes_be (Buffer._add_bits buf) (Buffer.add_byte buf) v flen
+
+let construct_int64_ne_unsigned =
+  if nativeendian = BigEndian
+  then construct_int64_be_unsigned
+  else (*construct_int64_le_unsigned*)
+    fun _ _ _ _ -> failwith "construct_int64_le_unsigned"
+
+let construct_int64_ee_unsigned = function
+  | BigEndian -> construct_int64_be_unsigned
+  | LittleEndian -> (*construct_int64_le_unsigned*)
+      (fun _ _ _ _ -> failwith "construct_int64_le_unsigned")
+  | NativeEndian -> construct_int64_ne_unsigned
+
+(* Construct from a string of bytes, exact multiple of 8 bits
+ * in length of course.
+ *)
+let construct_string buf str =
+  let len = String.length str in
+  Buffer.add_bits buf str (len lsl 3)
+
+(*----------------------------------------------------------------------*)
+(* Extract a string from a bitstring. *)
+
+let string_of_bitstring (data, off, len) =
+  if off land 7 = 0 && len land 7 = 0 then
+    (* Easy case: everything is byte-aligned. *)
+    String.sub data (off lsr 3) (len lsr 3)
+  else (
+    (* Bit-twiddling case. *)
+    let strlen = (len + 7) lsr 3 in
+    let str = String.make strlen '\000' in
+    let rec loop data off len i =
+      if len >= 8 then (
+       let c, off, len = extract_char_unsigned data off len 8 in
+       str.[i] <- Char.chr c;
+       loop data off len (i+1)
+      ) else if len > 0 then (
+       let c, _, _ = extract_char_unsigned data off len len in
+       str.[i] <- Char.chr (c lsl (8-len))
+      )
+    in
+    loop data off len 0;
+    str
+  )
+
+(* To channel. *)
+
+let bitstring_to_chan ((data, off, len) as bits) chan =
+  (* Fail if the bitstring length isn't a multiple of 8. *)
+  if len land 7 <> 0 then invalid_arg "bitstring_to_chan";
+
+  if off land 7 = 0 then
+    (* Easy case: string is byte-aligned. *)
+    output chan data (off lsr 3) (len lsr 3)
+  else (
+    (* Bit-twiddling case: reuse string_of_bitstring *)
+    let str = string_of_bitstring bits in
+    output_string chan str
+  )
+
+let bitstring_to_file bits filename =
+  let chan = open_out_bin filename in
+  try
+    bitstring_to_chan bits chan;
+    close_out chan
+  with exn ->
+    close_out chan;
+    raise exn
  
  (*----------------------------------------------------------------------*)
  (* Display functions. *)
@@ -426,6 +893,6 @@ let hexdump_bitstring chan (data, off, len) =
    if !linelen > 0 then (
      let skip = (16 - !linelen) * 3 + if !linelen < 8 then 1 else 0 in
      for i = 0 to skip-1 do fprintf chan " " done;
-    fprintf chan " |%s|\n" linechars
+    fprintf chan " |%s|\n%!" linechars
    ) else
-    fprintf chan "\n"
+    fprintf chan "\n%!"