X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=bitmatch.ml;h=e3825045a5a61494b6f1424c03ba5120fff54ad9;hb=5522213ceeb1a53555ab0d8354a9dc789e488784;hp=0c9edc0bf47fc0ac5d477ecfcf8ef48705927f16;hpb=277441c3a2a9118c5da99bac9246a912860fa210;p=ocaml-bitstring.git diff --git a/bitmatch.ml b/bitmatch.ml index 0c9edc0..e382504 100644 --- a/bitmatch.ml +++ b/bitmatch.ml @@ -1,9 +1,28 @@ (* Bitmatch library. - * $Id: bitmatch.ml,v 1.5 2008-04-01 17:05:37 rjones Exp $ + * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * $Id$ *) open Printf +include Bitmatch_types +include Bitmatch_config + (* Enable runtime debug messages. Must also have been enabled * in pa_bitmatch.ml. *) @@ -25,6 +44,8 @@ let make_bitstring len c = String.make ((len+7) lsr 3) c, 0, len let create_bitstring len = make_bitstring len '\000' +let bitstring_of_string str = str, 0, String.length str lsl 3 + let bitstring_of_chan chan = let tmpsize = 16384 in let buf = Buffer.create tmpsize in @@ -35,15 +56,258 @@ let bitstring_of_chan chan = done; Buffer.contents buf, 0, Buffer.length buf lsl 3 +let bitstring_of_chan_max chan max = + let tmpsize = 16384 in + let buf = Buffer.create tmpsize in + let tmp = String.create tmpsize in + let len = ref 0 in + let rec loop () = + if !len < max then ( + let r = min tmpsize (max - !len) in + let n = input chan tmp 0 r in + if n > 0 then ( + Buffer.add_substring buf tmp 0 n; + len := !len + n; + loop () + ) + ) + in + loop (); + Buffer.contents buf, 0, !len lsl 3 + +let bitstring_of_file_descr fd = + let tmpsize = 16384 in + let buf = Buffer.create tmpsize in + let tmp = String.create tmpsize in + let n = ref 0 in + while n := Unix.read fd tmp 0 tmpsize; !n > 0 do + Buffer.add_substring buf tmp 0 !n; + done; + Buffer.contents buf, 0, Buffer.length buf lsl 3 + +let bitstring_of_file_descr_max fd max = + let tmpsize = 16384 in + let buf = Buffer.create tmpsize in + let tmp = String.create tmpsize in + let len = ref 0 in + let rec loop () = + if !len < max then ( + let r = min tmpsize (max - !len) in + let n = Unix.read fd tmp 0 r in + if n > 0 then ( + Buffer.add_substring buf tmp 0 n; + len := !len + n; + loop () + ) + ) + in + loop (); + Buffer.contents buf, 0, !len lsl 3 + let bitstring_of_file fname = let chan = open_in_bin fname in - let bs = bitstring_of_chan chan in - close_in chan; - bs + try + let bs = bitstring_of_chan chan in + close_in chan; + bs + with exn -> + close_in chan; + raise exn let bitstring_length (_, _, len) = len (*----------------------------------------------------------------------*) +(* Bitwise functions. + * + * We try to isolate all bitwise functions within these modules. + *) + +module I = struct + (* Bitwise operations on ints. Note that we assume int <= 31 bits. *) + let (<<) = (lsl) + let (>>) = (lsr) + external to_int : int -> int = "%identity" + let zero = 0 + let one = 1 + let minus_one = -1 + let ff = 0xff + + (* Create a mask so many bits wide. *) + let mask bits = + if bits < 30 then + pred (one << bits) + else if bits = 30 then + max_int + else if bits = 31 then + minus_one + else + invalid_arg "Bitmatch.I.mask" + + (* Byte swap an int of a given size. *) + let byteswap v bits = + if bits <= 8 then v + else if bits <= 16 then ( + let shift = bits-8 in + let v1 = v >> shift in + let v2 = (v land (mask shift)) << 8 in + v2 lor v1 + ) else if bits <= 24 then ( + let shift = bits - 16 in + let v1 = v >> (8+shift) in + let v2 = ((v >> shift) land ff) << 8 in + let v3 = (v land (mask shift)) << 16 in + v3 lor v2 lor v1 + ) else ( + let shift = bits - 24 in + let v1 = v >> (16+shift) in + let v2 = ((v >> (8+shift)) land ff) << 8 in + let v3 = ((v >> shift) land ff) << 16 in + let v4 = (v land (mask shift)) << 24 in + v4 lor v3 lor v2 lor v1 + ) + + (* Check a value is in range 0 .. 2^bits-1. *) + let range_unsigned v bits = + let mask = lnot (mask bits) in + (v land mask) = zero + + (* Call function g on the top bits, then f on each full byte + * (big endian - so start at top). + *) + let rec map_bytes_be g f v bits = + if bits >= 8 then ( + map_bytes_be g f (v >> 8) (bits-8); + let lsb = v land ff in + f (to_int lsb) + ) else if bits > 0 then ( + let lsb = v land (mask bits) in + g (to_int lsb) bits + ) +end + +module I32 = struct + (* Bitwise operations on int32s. Note we try to keep it as similar + * as possible to the I module above, to make it easier to track + * down bugs. + *) + let (<<) = Int32.shift_left + let (>>) = Int32.shift_right_logical + let (land) = Int32.logand + let (lor) = Int32.logor + let lnot = Int32.lognot + let pred = Int32.pred + let max_int = Int32.max_int + let to_int = Int32.to_int + let zero = Int32.zero + let one = Int32.one + let minus_one = Int32.minus_one + let ff = 0xff_l + + (* Create a mask so many bits wide. *) + let mask bits = + if bits < 31 then + pred (one << bits) + else if bits = 31 then + max_int + else if bits = 32 then + minus_one + else + invalid_arg "Bitmatch.I32.mask" + + (* Byte swap an int of a given size. *) + let byteswap v bits = + if bits <= 8 then v + else if bits <= 16 then ( + let shift = bits-8 in + let v1 = v >> shift in + let v2 = (v land (mask shift)) << 8 in + v2 lor v1 + ) else if bits <= 24 then ( + let shift = bits - 16 in + let v1 = v >> (8+shift) in + let v2 = ((v >> shift) land ff) << 8 in + let v3 = (v land (mask shift)) << 16 in + v3 lor v2 lor v1 + ) else ( + let shift = bits - 24 in + let v1 = v >> (16+shift) in + let v2 = ((v >> (8+shift)) land ff) << 8 in + let v3 = ((v >> shift) land ff) << 16 in + let v4 = (v land (mask shift)) << 24 in + v4 lor v3 lor v2 lor v1 + ) + + (* Check a value is in range 0 .. 2^bits-1. *) + let range_unsigned v bits = + let mask = lnot (mask bits) in + (v land mask) = zero + + (* Call function g on the top bits, then f on each full byte + * (big endian - so start at top). + *) + let rec map_bytes_be g f v bits = + if bits >= 8 then ( + map_bytes_be g f (v >> 8) (bits-8); + let lsb = v land ff in + f (to_int lsb) + ) else if bits > 0 then ( + let lsb = v land (mask bits) in + g (to_int lsb) bits + ) +end + +module I64 = struct + (* Bitwise operations on int64s. Note we try to keep it as similar + * as possible to the I/I32 modules above, to make it easier to track + * down bugs. + *) + let (<<) = Int64.shift_left + let (>>) = Int64.shift_right_logical + let (land) = Int64.logand + let (lor) = Int64.logor + let lnot = Int64.lognot + let pred = Int64.pred + let max_int = Int64.max_int + let to_int = Int64.to_int + let zero = Int64.zero + let one = Int64.one + let minus_one = Int64.minus_one + let ff = 0xff_L + + (* Create a mask so many bits wide. *) + let mask bits = + if bits < 63 then + pred (one << bits) + else if bits = 63 then + max_int + else if bits = 64 then + minus_one + else + invalid_arg "Bitmatch.I64.mask" + + (* Byte swap an int of a given size. *) + (* let byteswap v bits = *) + + (* Check a value is in range 0 .. 2^bits-1. *) + let range_unsigned v bits = + let mask = lnot (mask bits) in + (v land mask) = zero + + (* Call function g on the top bits, then f on each full byte + * (big endian - so start at top). + *) + let rec map_bytes_be g f v bits = + if bits >= 8 then ( + map_bytes_be g f (v >> 8) (bits-8); + let lsb = v land ff in + f (to_int lsb) + ) else if bits > 0 then ( + let lsb = v land (mask bits) in + g (to_int lsb) bits + ) +end + +(*----------------------------------------------------------------------*) (* Extraction functions. * * NB: internal functions, called from the generated macros, and @@ -146,6 +410,21 @@ let extract_int_be_unsigned data off len flen = ) in word, off+flen, len-flen +let extract_int_le_unsigned data off len flen = + let v, off, len = extract_int_be_unsigned data off len flen in + let v = I.byteswap v flen in + v, off, len + +let extract_int_ne_unsigned = + if nativeendian = BigEndian + then extract_int_be_unsigned + else extract_int_le_unsigned + +let extract_int_ee_unsigned = function + | BigEndian -> extract_int_be_unsigned + | LittleEndian -> extract_int_le_unsigned + | NativeEndian -> extract_int_ne_unsigned + let _make_int32_be c0 c1 c2 c3 = Int32.logor (Int32.logor @@ -155,6 +434,15 @@ let _make_int32_be c0 c1 c2 c3 = (Int32.shift_left c2 8)) c3 +let _make_int32_le c0 c1 c2 c3 = + Int32.logor + (Int32.logor + (Int32.logor + (Int32.shift_left c3 24) + (Int32.shift_left c2 16)) + (Int32.shift_left c1 8)) + c0 + (* Extract exactly 32 bits. We have to consider endianness and signedness. *) let extract_int32_be_unsigned data off len flen = let byteoff = off lsr 3 in @@ -187,6 +475,21 @@ let extract_int32_be_unsigned data off len flen = ) in word, off+flen, len-flen +let extract_int32_le_unsigned data off len flen = + let v, off, len = extract_int32_be_unsigned data off len flen in + let v = I32.byteswap v flen in + v, off, len + +let extract_int32_ne_unsigned = + if nativeendian = BigEndian + then extract_int32_be_unsigned + else extract_int32_le_unsigned + +let extract_int32_ee_unsigned = function + | BigEndian -> extract_int32_be_unsigned + | LittleEndian -> extract_int32_le_unsigned + | NativeEndian -> extract_int32_ne_unsigned + let _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 = Int64.logor (Int64.logor @@ -204,6 +507,9 @@ let _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 = (Int64.shift_left c6 8)) c7 +let _make_int64_le c0 c1 c2 c3 c4 c5 c6 c7 = + _make_int64_be c7 c6 c5 c4 c3 c2 c1 c0 + (* Extract [1..64] bits. We have to consider endianness and signedness. *) let extract_int64_be_unsigned data off len flen = let byteoff = off lsr 3 in @@ -248,6 +554,59 @@ let extract_int64_be_unsigned data off len flen = ) in word, off+flen, len-flen +let extract_int64_le_unsigned data off len flen = + let byteoff = off lsr 3 in + + let strlen = String.length data in + + let word = + (* Optimize the common (byte-aligned) case. *) + if off land 7 = 0 then ( + let word = + let c0 = _get_byte64 data byteoff strlen in + let c1 = _get_byte64 data (byteoff+1) strlen in + let c2 = _get_byte64 data (byteoff+2) strlen in + let c3 = _get_byte64 data (byteoff+3) strlen in + let c4 = _get_byte64 data (byteoff+4) strlen in + let c5 = _get_byte64 data (byteoff+5) strlen in + let c6 = _get_byte64 data (byteoff+6) strlen in + let c7 = _get_byte64 data (byteoff+7) strlen in + _make_int64_le c0 c1 c2 c3 c4 c5 c6 c7 in + Int64.logand word (I64.mask flen) + ) else ( + (* Extract the next 64 bits, slow method. *) + let word = + let c0, off, len = extract_char_unsigned data off len 8 in + let c1, off, len = extract_char_unsigned data off len 8 in + let c2, off, len = extract_char_unsigned data off len 8 in + let c3, off, len = extract_char_unsigned data off len 8 in + let c4, off, len = extract_char_unsigned data off len 8 in + let c5, off, len = extract_char_unsigned data off len 8 in + let c6, off, len = extract_char_unsigned data off len 8 in + let c7, _, _ = extract_char_unsigned data off len 8 in + let c0 = Int64.of_int c0 in + let c1 = Int64.of_int c1 in + let c2 = Int64.of_int c2 in + let c3 = Int64.of_int c3 in + let c4 = Int64.of_int c4 in + let c5 = Int64.of_int c5 in + let c6 = Int64.of_int c6 in + let c7 = Int64.of_int c7 in + _make_int64_le c0 c1 c2 c3 c4 c5 c6 c7 in + Int64.logand word (I64.mask flen) + ) in + word, off+flen, len-flen + +let extract_int64_ne_unsigned = + if nativeendian = BigEndian + then extract_int64_be_unsigned + else extract_int64_le_unsigned + +let extract_int64_ee_unsigned = function + | BigEndian -> extract_int64_be_unsigned + | LittleEndian -> extract_int64_le_unsigned + | NativeEndian -> extract_int64_ne_unsigned + (*----------------------------------------------------------------------*) (* Constructor functions. *) @@ -327,13 +686,15 @@ module Buffer = struct *) let slenbytes = slen lsr 3 in if slenbytes > 0 then Buffer.add_substring buf str 0 slenbytes; - t.last <- Char.code str.[slenbytes] lsl (8 - (slen land 7)) + let last = Char.code str.[slenbytes] in (* last char *) + let mask = 0xff lsl (8 - (slen land 7)) in + t.last <- last land mask ); t.len <- len + slen ) else ( (* Target buffer is unaligned. Copy whole bytes using * add_byte which knows how to deal with an unaligned - * target buffer, then call _add_bits for the remaining < 8 bits. + * target buffer, then call add_bit for the remaining < 8 bits. * * XXX This is going to be dog-slow. *) @@ -342,13 +703,20 @@ module Buffer = struct let byte = Char.code str.[i] in add_byte t byte done; - _add_bits t (Char.code str.[slenbytes]) (slen - (slenbytes lsl 3)) + let bitsleft = slen - (slenbytes lsl 3) in + if bitsleft > 0 then ( + let c = Char.code str.[slenbytes] in + for i = 0 to bitsleft - 1 do + let bit = c land (0x80 lsr i) <> 0 in + add_bit t bit + done + ) ); ) end (* Construct a single bit. *) -let construct_bit buf b _ = +let construct_bit buf b _ _ = Buffer.add_bit buf b (* Construct a field, flen = [2..8]. *) @@ -360,31 +728,130 @@ let construct_char_unsigned buf v flen exn = else Buffer._add_bits buf v flen -(* Generate a mask with the lower 'bits' bits set. *) -let mask64 bits = - if bits < 63 then Int64.pred (Int64.shift_left 1L bits) - else if bits = 63 then Int64.max_int - else if bits = 64 then -1L - else invalid_arg "Bitmatch.mask64" +(* Construct a field of up to 31 bits. *) +let construct_int_be_unsigned buf v flen exn = + (* Check value is within range. *) + if not (I.range_unsigned v flen) then raise exn; + (* Add the bytes. *) + I.map_bytes_be (Buffer._add_bits buf) (Buffer.add_byte buf) v flen + +let construct_int_ne_unsigned = + if nativeendian = BigEndian + then construct_int_be_unsigned + else (*construct_int_le_unsigned*) + fun _ _ _ _ -> failwith "construct_int_le_unsigned" + +let construct_int_ee_unsigned = function + | BigEndian -> construct_int_be_unsigned + | LittleEndian -> (*construct_int_le_unsigned*) + (fun _ _ _ _ -> failwith "construct_int_le_unsigned") + | NativeEndian -> construct_int_ne_unsigned + +(* Construct a field of exactly 32 bits. *) +let construct_int32_be_unsigned buf v flen _ = + Buffer.add_byte buf + (Int32.to_int (Int32.shift_right_logical v 24)); + Buffer.add_byte buf + (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 16) 0xff_l))); + Buffer.add_byte buf + (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 8) 0xff_l))); + Buffer.add_byte buf + (Int32.to_int (Int32.logand v 0xff_l)) + +let construct_int32_le_unsigned buf v flen _ = + Buffer.add_byte buf + (Int32.to_int (Int32.logand v 0xff_l)); + Buffer.add_byte buf + (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 8) 0xff_l))); + Buffer.add_byte buf + (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 16) 0xff_l))); + Buffer.add_byte buf + (Int32.to_int (Int32.shift_right_logical v 24)) + +let construct_int32_ne_unsigned = + if nativeendian = BigEndian + then construct_int32_be_unsigned + else construct_int32_le_unsigned + +let construct_int32_ee_unsigned = function + | BigEndian -> construct_int32_be_unsigned + | LittleEndian -> construct_int32_le_unsigned + | NativeEndian -> construct_int32_ne_unsigned (* Construct a field of up to 64 bits. *) let construct_int64_be_unsigned buf v flen exn = (* Check value is within range. *) - let m = Int64.lognot (mask64 flen) in - if Int64.logand v m <> 0L then raise exn; - + if not (I64.range_unsigned v flen) then raise exn; (* Add the bytes. *) - let rec loop v flen = - if flen > 8 then ( - loop (Int64.shift_right_logical v 8) (flen-8); - let lsb = Int64.to_int (Int64.logand v 0xffL) in - Buffer.add_byte buf lsb - ) else if flen > 0 then ( - let lsb = Int64.to_int (Int64.logand v (mask64 flen)) in - Buffer._add_bits buf lsb flen - ) - in - loop v flen + I64.map_bytes_be (Buffer._add_bits buf) (Buffer.add_byte buf) v flen + +let construct_int64_ne_unsigned = + if nativeendian = BigEndian + then construct_int64_be_unsigned + else (*construct_int64_le_unsigned*) + fun _ _ _ _ -> failwith "construct_int64_le_unsigned" + +let construct_int64_ee_unsigned = function + | BigEndian -> construct_int64_be_unsigned + | LittleEndian -> (*construct_int64_le_unsigned*) + (fun _ _ _ _ -> failwith "construct_int64_le_unsigned") + | NativeEndian -> construct_int64_ne_unsigned + +(* Construct from a string of bytes, exact multiple of 8 bits + * in length of course. + *) +let construct_string buf str = + let len = String.length str in + Buffer.add_bits buf str (len lsl 3) + +(*----------------------------------------------------------------------*) +(* Extract a string from a bitstring. *) + +let string_of_bitstring (data, off, len) = + if off land 7 = 0 && len land 7 = 0 then + (* Easy case: everything is byte-aligned. *) + String.sub data (off lsr 3) (len lsr 3) + else ( + (* Bit-twiddling case. *) + let strlen = (len + 7) lsr 3 in + let str = String.make strlen '\000' in + let rec loop data off len i = + if len >= 8 then ( + let c, off, len = extract_char_unsigned data off len 8 in + str.[i] <- Char.chr c; + loop data off len (i+1) + ) else if len > 0 then ( + let c, _, _ = extract_char_unsigned data off len len in + str.[i] <- Char.chr (c lsl (8-len)) + ) + in + loop data off len 0; + str + ) + +(* To channel. *) + +let bitstring_to_chan ((data, off, len) as bits) chan = + (* Fail if the bitstring length isn't a multiple of 8. *) + if len land 7 <> 0 then invalid_arg "bitstring_to_chan"; + + if off land 7 = 0 then + (* Easy case: string is byte-aligned. *) + output chan data (off lsr 3) (len lsr 3) + else ( + (* Bit-twiddling case: reuse string_of_bitstring *) + let str = string_of_bitstring bits in + output_string chan str + ) + +let bitstring_to_file bits filename = + let chan = open_out_bin filename in + try + bitstring_to_chan bits chan; + close_out chan + with exn -> + close_out chan; + raise exn (*----------------------------------------------------------------------*) (* Display functions. *) @@ -426,6 +893,6 @@ let hexdump_bitstring chan (data, off, len) = if !linelen > 0 then ( let skip = (16 - !linelen) * 3 + if !linelen < 8 then 1 else 0 in for i = 0 to skip-1 do fprintf chan " " done; - fprintf chan " |%s|\n" linechars + fprintf chan " |%s|\n%!" linechars ) else - fprintf chan "\n" + fprintf chan "\n%!"