2 * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 * $Id: bitmatch.ml,v 1.12 2008-05-07 14:56:53 rjones Exp $
23 (* Enable runtime debug messages. Must also have been enabled
29 exception Construct_failure of string * string * int * int
31 (* A bitstring is simply the data itself (as a string), and the
32 * bitoffset and the bitlength within the string. Note offset/length
33 * are counted in bits, not bytes.
35 type bitstring = string * int * int
37 (* Functions to create and load bitstrings. *)
38 let empty_bitstring = "", 0, 0
40 let make_bitstring len c = String.make ((len+7) lsr 3) c, 0, len
42 let create_bitstring len = make_bitstring len '\000'
44 let bitstring_of_string str = str, 0, String.length str lsl 3
46 let bitstring_of_chan chan =
47 let tmpsize = 16384 in
48 let buf = Buffer.create tmpsize in
49 let tmp = String.create tmpsize in
51 while n := input chan tmp 0 tmpsize; !n > 0 do
52 Buffer.add_substring buf tmp 0 !n;
54 Buffer.contents buf, 0, Buffer.length buf lsl 3
56 let bitstring_of_chan_max chan max =
57 let tmpsize = 16384 in
58 let buf = Buffer.create tmpsize in
59 let tmp = String.create tmpsize in
63 let r = min tmpsize (max - !len) in
64 let n = input chan tmp 0 r in
66 Buffer.add_substring buf tmp 0 n;
73 Buffer.contents buf, 0, !len lsl 3
75 let bitstring_of_file_descr fd =
76 let tmpsize = 16384 in
77 let buf = Buffer.create tmpsize in
78 let tmp = String.create tmpsize in
80 while n := Unix.read fd tmp 0 tmpsize; !n > 0 do
81 Buffer.add_substring buf tmp 0 !n;
83 Buffer.contents buf, 0, Buffer.length buf lsl 3
85 let bitstring_of_file_descr_max fd max =
86 let tmpsize = 16384 in
87 let buf = Buffer.create tmpsize in
88 let tmp = String.create tmpsize in
92 let r = min tmpsize (max - !len) in
93 let n = Unix.read fd tmp 0 r in
95 Buffer.add_substring buf tmp 0 n;
102 Buffer.contents buf, 0, !len lsl 3
104 let bitstring_of_file fname =
105 let chan = open_in_bin fname in
106 let bs = bitstring_of_chan chan in
110 let bitstring_length (_, _, len) = len
112 (*----------------------------------------------------------------------*)
113 (* Bitwise functions.
115 * We try to isolate all bitwise functions within these modules.
119 (* Bitwise operations on ints. Note that we assume int <= 31 bits. *)
122 external to_int : int -> int = "%identity"
128 (* Create a mask so many bits wide. *)
132 else if bits = 30 then
134 else if bits = 31 then
137 invalid_arg "Bitmatch.I.mask"
139 (* Byte swap an int of a given size. *)
140 let byteswap v bits =
142 else if bits <= 16 then (
143 let shift = bits-8 in
144 let v1 = v >> shift in
145 let v2 = (v land (mask shift)) << 8 in
147 ) else if bits <= 24 then (
148 let shift = bits - 16 in
149 let v1 = v >> (8+shift) in
150 let v2 = ((v >> shift) land ff) << 8 in
151 let v3 = (v land (mask shift)) << 16 in
154 let shift = bits - 24 in
155 let v1 = v >> (16+shift) in
156 let v2 = ((v >> (8+shift)) land ff) << 8 in
157 let v3 = ((v >> shift) land ff) << 16 in
158 let v4 = (v land (mask shift)) << 24 in
159 v4 lor v3 lor v2 lor v1
162 (* Check a value is in range 0 .. 2^bits-1. *)
163 let range_unsigned v bits =
164 let mask = lnot (mask bits) in
167 (* Call function g on the top bits, then f on each full byte
168 * (big endian - so start at top).
170 let rec map_bytes_be g f v bits =
172 map_bytes_be g f (v >> 8) (bits-8);
173 let lsb = v land ff in
175 ) else if bits > 0 then (
176 let lsb = v land (mask bits) in
182 (* Bitwise operations on int32s. Note we try to keep it as similar
183 * as possible to the I module above, to make it easier to track
186 let (<<) = Int32.shift_left
187 let (>>) = Int32.shift_right_logical
188 let (land) = Int32.logand
189 let (lor) = Int32.logor
190 let lnot = Int32.lognot
191 let pred = Int32.pred
192 let max_int = Int32.max_int
193 let to_int = Int32.to_int
194 let zero = Int32.zero
196 let minus_one = Int32.minus_one
199 (* Create a mask so many bits wide. *)
203 else if bits = 31 then
205 else if bits = 32 then
208 invalid_arg "Bitmatch.I32.mask"
210 (* Byte swap an int of a given size. *)
211 let byteswap v bits =
213 else if bits <= 16 then (
214 let shift = bits-8 in
215 let v1 = v >> shift in
216 let v2 = (v land (mask shift)) << 8 in
218 ) else if bits <= 24 then (
219 let shift = bits - 16 in
220 let v1 = v >> (8+shift) in
221 let v2 = ((v >> shift) land ff) << 8 in
222 let v3 = (v land (mask shift)) << 16 in
225 let shift = bits - 24 in
226 let v1 = v >> (16+shift) in
227 let v2 = ((v >> (8+shift)) land ff) << 8 in
228 let v3 = ((v >> shift) land ff) << 16 in
229 let v4 = (v land (mask shift)) << 24 in
230 v4 lor v3 lor v2 lor v1
233 (* Check a value is in range 0 .. 2^bits-1. *)
234 let range_unsigned v bits =
235 let mask = lnot (mask bits) in
238 (* Call function g on the top bits, then f on each full byte
239 * (big endian - so start at top).
241 let rec map_bytes_be g f v bits =
243 map_bytes_be g f (v >> 8) (bits-8);
244 let lsb = v land ff in
246 ) else if bits > 0 then (
247 let lsb = v land (mask bits) in
253 (* Bitwise operations on int64s. Note we try to keep it as similar
254 * as possible to the I/I32 modules above, to make it easier to track
257 let (<<) = Int64.shift_left
258 let (>>) = Int64.shift_right_logical
259 let (land) = Int64.logand
260 let (lor) = Int64.logor
261 let lnot = Int64.lognot
262 let pred = Int64.pred
263 let max_int = Int64.max_int
264 let to_int = Int64.to_int
265 let zero = Int64.zero
267 let minus_one = Int64.minus_one
270 (* Create a mask so many bits wide. *)
274 else if bits = 63 then
276 else if bits = 64 then
279 invalid_arg "Bitmatch.I64.mask"
281 (* Byte swap an int of a given size. *)
282 (* let byteswap v bits = *)
284 (* Check a value is in range 0 .. 2^bits-1. *)
285 let range_unsigned v bits =
286 let mask = lnot (mask bits) in
289 (* Call function g on the top bits, then f on each full byte
290 * (big endian - so start at top).
292 let rec map_bytes_be g f v bits =
294 map_bytes_be g f (v >> 8) (bits-8);
295 let lsb = v land ff in
297 ) else if bits > 0 then (
298 let lsb = v land (mask bits) in
303 (*----------------------------------------------------------------------*)
304 (* Extraction functions.
306 * NB: internal functions, called from the generated macros, and
307 * the parameters should have been checked for sanity already).
311 let extract_bitstring data off len flen =
312 (data, off, flen), off+flen, len-flen
314 let extract_remainder data off len =
315 (data, off, len), off+len, 0
317 (* Extract and convert to numeric. A single bit is returned as
318 * a boolean. There are no endianness or signedness considerations.
320 let extract_bit data off len _ = (* final param is always 1 *)
321 let byteoff = off lsr 3 in
322 let bitmask = 1 lsl (7 - (off land 7)) in
323 let b = Char.code data.[byteoff] land bitmask <> 0 in
326 (* Returns 8 bit unsigned aligned bytes from the string.
327 * If the string ends then this returns 0's.
329 let _get_byte data byteoff strlen =
330 if strlen > byteoff then Char.code data.[byteoff] else 0
331 let _get_byte32 data byteoff strlen =
332 if strlen > byteoff then Int32.of_int (Char.code data.[byteoff]) else 0l
333 let _get_byte64 data byteoff strlen =
334 if strlen > byteoff then Int64.of_int (Char.code data.[byteoff]) else 0L
336 (* Extract [2..8] bits. Because the result fits into a single
337 * byte we don't have to worry about endianness, only signedness.
339 let extract_char_unsigned data off len flen =
340 let byteoff = off lsr 3 in
342 (* Optimize the common (byte-aligned) case. *)
343 if off land 7 = 0 then (
344 let byte = Char.code data.[byteoff] in
345 byte lsr (8 - flen), off+flen, len-flen
347 (* Extract the 16 bits at byteoff and byteoff+1 (note that the
348 * second byte might not exist in the original string).
350 let strlen = String.length data in
353 (_get_byte data byteoff strlen lsl 8) +
354 _get_byte data (byteoff+1) strlen in
356 (* Mask off the top bits. *)
357 let bitmask = (1 lsl (16 - (off land 7))) - 1 in
358 let word = word land bitmask in
359 (* Shift right to get rid of the bottom bits. *)
360 let shift = 16 - ((off land 7) + flen) in
361 let word = word lsr shift in
363 word, off+flen, len-flen
366 (* Extract [9..31] bits. We have to consider endianness and signedness. *)
367 let extract_int_be_unsigned data off len flen =
368 let byteoff = off lsr 3 in
370 let strlen = String.length data in
373 (* Optimize the common (byte-aligned) case. *)
374 if off land 7 = 0 then (
376 (_get_byte data byteoff strlen lsl 23) +
377 (_get_byte data (byteoff+1) strlen lsl 15) +
378 (_get_byte data (byteoff+2) strlen lsl 7) +
379 (_get_byte data (byteoff+3) strlen lsr 1) in
381 ) else if flen <= 24 then (
382 (* Extract the 31 bits at byteoff .. byteoff+3. *)
384 (_get_byte data byteoff strlen lsl 23) +
385 (_get_byte data (byteoff+1) strlen lsl 15) +
386 (_get_byte data (byteoff+2) strlen lsl 7) +
387 (_get_byte data (byteoff+3) strlen lsr 1) in
388 (* Mask off the top bits. *)
389 let bitmask = (1 lsl (31 - (off land 7))) - 1 in
390 let word = word land bitmask in
391 (* Shift right to get rid of the bottom bits. *)
392 let shift = 31 - ((off land 7) + flen) in
395 (* Extract the next 31 bits, slow method. *)
397 let c0, off, len = extract_char_unsigned data off len 8 in
398 let c1, off, len = extract_char_unsigned data off len 8 in
399 let c2, off, len = extract_char_unsigned data off len 8 in
400 let c3, off, len = extract_char_unsigned data off len 7 in
401 (c0 lsl 23) + (c1 lsl 15) + (c2 lsl 7) + c3 in
404 word, off+flen, len-flen
406 let extract_int_le_unsigned data off len flen =
407 let v, off, len = extract_int_be_unsigned data off len flen in
408 let v = I.byteswap v flen in
411 let _make_int32_be c0 c1 c2 c3 =
415 (Int32.shift_left c0 24)
416 (Int32.shift_left c1 16))
417 (Int32.shift_left c2 8))
420 let _make_int32_le c0 c1 c2 c3 =
424 (Int32.shift_left c3 24)
425 (Int32.shift_left c2 16))
426 (Int32.shift_left c1 8))
429 (* Extract exactly 32 bits. We have to consider endianness and signedness. *)
430 let extract_int32_be_unsigned data off len flen =
431 let byteoff = off lsr 3 in
433 let strlen = String.length data in
436 (* Optimize the common (byte-aligned) case. *)
437 if off land 7 = 0 then (
439 let c0 = _get_byte32 data byteoff strlen in
440 let c1 = _get_byte32 data (byteoff+1) strlen in
441 let c2 = _get_byte32 data (byteoff+2) strlen in
442 let c3 = _get_byte32 data (byteoff+3) strlen in
443 _make_int32_be c0 c1 c2 c3 in
444 Int32.shift_right_logical word (32 - flen)
446 (* Extract the next 32 bits, slow method. *)
448 let c0, off, len = extract_char_unsigned data off len 8 in
449 let c1, off, len = extract_char_unsigned data off len 8 in
450 let c2, off, len = extract_char_unsigned data off len 8 in
451 let c3, _, _ = extract_char_unsigned data off len 8 in
452 let c0 = Int32.of_int c0 in
453 let c1 = Int32.of_int c1 in
454 let c2 = Int32.of_int c2 in
455 let c3 = Int32.of_int c3 in
456 _make_int32_be c0 c1 c2 c3 in
457 Int32.shift_right_logical word (32 - flen)
459 word, off+flen, len-flen
461 let extract_int32_le_unsigned data off len flen =
462 let v, off, len = extract_int32_be_unsigned data off len flen in
463 let v = I32.byteswap v flen in
466 let _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 =
474 (Int64.shift_left c0 56)
475 (Int64.shift_left c1 48))
476 (Int64.shift_left c2 40))
477 (Int64.shift_left c3 32))
478 (Int64.shift_left c4 24))
479 (Int64.shift_left c5 16))
480 (Int64.shift_left c6 8))
483 (* Extract [1..64] bits. We have to consider endianness and signedness. *)
484 let extract_int64_be_unsigned data off len flen =
485 let byteoff = off lsr 3 in
487 let strlen = String.length data in
490 (* Optimize the common (byte-aligned) case. *)
491 if off land 7 = 0 then (
493 let c0 = _get_byte64 data byteoff strlen in
494 let c1 = _get_byte64 data (byteoff+1) strlen in
495 let c2 = _get_byte64 data (byteoff+2) strlen in
496 let c3 = _get_byte64 data (byteoff+3) strlen in
497 let c4 = _get_byte64 data (byteoff+4) strlen in
498 let c5 = _get_byte64 data (byteoff+5) strlen in
499 let c6 = _get_byte64 data (byteoff+6) strlen in
500 let c7 = _get_byte64 data (byteoff+7) strlen in
501 _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 in
502 Int64.shift_right_logical word (64 - flen)
504 (* Extract the next 64 bits, slow method. *)
506 let c0, off, len = extract_char_unsigned data off len 8 in
507 let c1, off, len = extract_char_unsigned data off len 8 in
508 let c2, off, len = extract_char_unsigned data off len 8 in
509 let c3, off, len = extract_char_unsigned data off len 8 in
510 let c4, off, len = extract_char_unsigned data off len 8 in
511 let c5, off, len = extract_char_unsigned data off len 8 in
512 let c6, off, len = extract_char_unsigned data off len 8 in
513 let c7, _, _ = extract_char_unsigned data off len 8 in
514 let c0 = Int64.of_int c0 in
515 let c1 = Int64.of_int c1 in
516 let c2 = Int64.of_int c2 in
517 let c3 = Int64.of_int c3 in
518 let c4 = Int64.of_int c4 in
519 let c5 = Int64.of_int c5 in
520 let c6 = Int64.of_int c6 in
521 let c7 = Int64.of_int c7 in
522 _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 in
523 Int64.shift_right_logical word (64 - flen)
525 word, off+flen, len-flen
527 (*----------------------------------------------------------------------*)
528 (* Constructor functions. *)
530 module Buffer = struct
533 mutable len : int; (* Length in bits. *)
534 (* Last byte in the buffer (if len is not aligned). We store
535 * it outside the buffer because buffers aren't mutable.
541 (* XXX We have almost enough information in the generator to
542 * choose a good initial size.
544 { buf = Buffer.create 128; len = 0; last = 0 }
546 let contents { buf = buf; len = len; last = last } =
548 if len land 7 = 0 then
551 Buffer.contents buf ^ (String.make 1 (Char.chr last)) in
554 (* Add exactly 8 bits. *)
555 let add_byte ({ buf = buf; len = len; last = last } as t) byte =
556 if byte < 0 || byte > 255 then invalid_arg "Bitmatch.Buffer.add_byte";
557 let shift = len land 7 in
559 (* Target buffer is byte-aligned. *)
560 Buffer.add_char buf (Char.chr byte)
562 (* Target buffer is unaligned. 'last' is meaningful. *)
563 let first = byte lsr shift in
564 let second = (byte lsl (8 - shift)) land 0xff in
565 Buffer.add_char buf (Char.chr (last lor first));
570 (* Add exactly 1 bit. *)
571 let add_bit ({ buf = buf; len = len; last = last } as t) bit =
572 let shift = 7 - (len land 7) in
574 (* Somewhere in the middle of 'last'. *)
575 t.last <- last lor ((if bit then 1 else 0) lsl shift)
577 (* Just a single spare bit in 'last'. *)
578 let last = last lor if bit then 1 else 0 in
579 Buffer.add_char buf (Char.chr last);
584 (* Add a small number of bits (definitely < 8). This uses a loop
585 * to call add_bit so it's slow.
587 let _add_bits t c slen =
588 if slen < 1 || slen >= 8 then invalid_arg "Bitmatch.Buffer._add_bits";
589 for i = slen-1 downto 0 do
590 let bit = c land (1 lsl i) <> 0 in
594 let add_bits ({ buf = buf; len = len } as t) str slen =
596 if len land 7 = 0 then (
597 if slen land 7 = 0 then
598 (* Common case - everything is byte-aligned. *)
599 Buffer.add_substring buf str 0 (slen lsr 3)
601 (* Target buffer is aligned. Copy whole bytes then leave the
602 * remaining bits in last.
604 let slenbytes = slen lsr 3 in
605 if slenbytes > 0 then Buffer.add_substring buf str 0 slenbytes;
606 t.last <- Char.code str.[slenbytes] lsl (8 - (slen land 7))
610 (* Target buffer is unaligned. Copy whole bytes using
611 * add_byte which knows how to deal with an unaligned
612 * target buffer, then call _add_bits for the remaining < 8 bits.
614 * XXX This is going to be dog-slow.
616 let slenbytes = slen lsr 3 in
617 for i = 0 to slenbytes-1 do
618 let byte = Char.code str.[i] in
621 _add_bits t (Char.code str.[slenbytes]) (slen - (slenbytes lsl 3))
626 (* Construct a single bit. *)
627 let construct_bit buf b _ _ =
630 (* Construct a field, flen = [2..8]. *)
631 let construct_char_unsigned buf v flen exn =
632 let max_val = 1 lsl flen in
633 if v < 0 || v >= max_val then raise exn;
635 Buffer.add_byte buf v
637 Buffer._add_bits buf v flen
639 (* Construct a field of up to 31 bits. *)
640 let construct_int_be_unsigned buf v flen exn =
641 (* Check value is within range. *)
642 if not (I.range_unsigned v flen) then raise exn;
644 I.map_bytes_be (Buffer._add_bits buf) (Buffer.add_byte buf) v flen
646 (* Construct a field of exactly 32 bits. *)
647 let construct_int32_be_unsigned buf v flen _ =
649 (Int32.to_int (Int32.shift_right_logical v 24));
651 (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 16) 0xff_l)));
653 (Int32.to_int ((Int32.logand (Int32.shift_right_logical v 8) 0xff_l)));
655 (Int32.to_int (Int32.logand v 0xff_l))
657 (* Construct a field of up to 64 bits. *)
658 let construct_int64_be_unsigned buf v flen exn =
659 (* Check value is within range. *)
660 if not (I64.range_unsigned v flen) then raise exn;
662 I64.map_bytes_be (Buffer._add_bits buf) (Buffer.add_byte buf) v flen
664 (* Construct from a string of bytes, exact multiple of 8 bits
665 * in length of course.
667 let construct_string buf str =
668 let len = String.length str in
669 Buffer.add_bits buf str (len lsl 3)
671 (*----------------------------------------------------------------------*)
672 (* Extract a string from a bitstring. *)
674 let string_of_bitstring (data, off, len) =
675 if off land 7 = 0 && len land 7 = 0 then
676 (* Easy case: everything is byte-aligned. *)
677 String.sub data (off lsr 3) (len lsr 3)
679 (* Bit-twiddling case. *)
680 let strlen = (len + 7) lsr 3 in
681 let str = String.make strlen '\000' in
682 let rec loop data off len i =
684 let c, off, len = extract_char_unsigned data off len 8 in
685 str.[i] <- Char.chr c;
686 loop data off len (i+1)
687 ) else if len > 0 then (
688 let c, off, len = extract_char_unsigned data off len len in
689 str.[i] <- Char.chr c
696 (*----------------------------------------------------------------------*)
697 (* Display functions. *)
700 let c = Char.code c in
703 let hexdump_bitstring chan (data, off, len) =
707 let linelen = ref 0 in
708 let linechars = String.make 16 ' ' in
710 fprintf chan "00000000 ";
713 let bits = min !len 8 in
714 let byte, off', len' = extract_char_unsigned data !off !len bits in
715 off := off'; len := len';
717 let byte = byte lsl (8-bits) in
718 fprintf chan "%02x " byte;
721 linechars.[!linelen] <-
722 (let c = Char.chr byte in
723 if isprint c then c else '.');
725 if !linelen = 8 then fprintf chan " ";
726 if !linelen = 16 then (
727 fprintf chan " |%s|\n%08x " linechars !count;
729 for i = 0 to 15 do linechars.[i] <- ' ' done
733 if !linelen > 0 then (
734 let skip = (16 - !linelen) * 3 + if !linelen < 8 then 1 else 0 in
735 for i = 0 to skip-1 do fprintf chan " " done;
736 fprintf chan " |%s|\n%!" linechars