2 * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 * $Id: bitmatch.ml,v 1.9 2008-04-15 13:40:51 rjones Exp $
23 (* Enable runtime debug messages. Must also have been enabled
29 exception Construct_failure of string * string * int * int
31 (* A bitstring is simply the data itself (as a string), and the
32 * bitoffset and the bitlength within the string. Note offset/length
33 * are counted in bits, not bytes.
35 type bitstring = string * int * int
37 (* Functions to create and load bitstrings. *)
38 let empty_bitstring = "", 0, 0
40 let make_bitstring len c = String.make ((len+7) lsr 3) c, 0, len
42 let create_bitstring len = make_bitstring len '\000'
44 let bitstring_of_chan chan =
45 let tmpsize = 16384 in
46 let buf = Buffer.create tmpsize in
47 let tmp = String.create tmpsize in
49 while n := input chan tmp 0 tmpsize; !n > 0 do
50 Buffer.add_substring buf tmp 0 !n;
52 Buffer.contents buf, 0, Buffer.length buf lsl 3
54 let bitstring_of_file fname =
55 let chan = open_in_bin fname in
56 let bs = bitstring_of_chan chan in
60 let bitstring_length (_, _, len) = len
62 (*----------------------------------------------------------------------*)
65 * We try to isolate all bitwise functions within these modules.
69 (* Bitwise operations on ints. Note that we assume int <= 31 bits. *)
72 external to_int : int -> int = "%identity"
78 (* Create a mask so many bits wide. *)
82 else if bits = 30 then
84 else if bits = 31 then
87 invalid_arg "Bitmatch.I.mask"
89 (* Byte swap an int of a given size. *)
92 else if bits <= 16 then (
94 let v1 = v >> shift in
95 let v2 = (v land (mask shift)) << 8 in
97 ) else if bits <= 24 then (
98 let shift = bits - 16 in
99 let v1 = v >> (8+shift) in
100 let v2 = ((v >> shift) land ff) << 8 in
101 let v3 = (v land (mask shift)) << 16 in
104 let shift = bits - 24 in
105 let v1 = v >> (16+shift) in
106 let v2 = ((v >> (8+shift)) land ff) << 8 in
107 let v3 = ((v >> shift) land ff) << 16 in
108 let v4 = (v land (mask shift)) << 24 in
109 v4 lor v3 lor v2 lor v1
112 (* Check a value is in range 0 .. 2^bits-1. *)
113 let range_unsigned v bits =
114 let mask = lnot (mask bits) in
117 (* Call function g on the top bits, then f on each full byte
118 * (big endian - so start at top).
120 let rec map_bytes_be g f v bits =
122 map_bytes_be g f (v >> 8) (bits-8);
123 let lsb = v land ff in
125 ) else if bits > 0 then (
126 let lsb = v land (mask bits) in
132 (* Bitwise operations on int32s. Note we try to keep it as similar
133 * as possible to the I module above, to make it easier to track
136 let (<<) = Int32.shift_left
137 let (>>) = Int32.shift_right_logical
138 let (land) = Int32.logand
139 let (lor) = Int32.logor
140 let lnot = Int32.lognot
141 let pred = Int32.pred
142 let max_int = Int32.max_int
143 let to_int = Int32.to_int
144 let zero = Int32.zero
146 let minus_one = Int32.minus_one
149 (* Create a mask so many bits wide. *)
153 else if bits = 31 then
155 else if bits = 32 then
158 invalid_arg "Bitmatch.I32.mask"
160 (* Byte swap an int of a given size. *)
161 let byteswap v bits =
163 else if bits <= 16 then (
164 let shift = bits-8 in
165 let v1 = v >> shift in
166 let v2 = (v land (mask shift)) << 8 in
168 ) else if bits <= 24 then (
169 let shift = bits - 16 in
170 let v1 = v >> (8+shift) in
171 let v2 = ((v >> shift) land ff) << 8 in
172 let v3 = (v land (mask shift)) << 16 in
175 let shift = bits - 24 in
176 let v1 = v >> (16+shift) in
177 let v2 = ((v >> (8+shift)) land ff) << 8 in
178 let v3 = ((v >> shift) land ff) << 16 in
179 let v4 = (v land (mask shift)) << 24 in
180 v4 lor v3 lor v2 lor v1
183 (* Check a value is in range 0 .. 2^bits-1. *)
184 let range_unsigned v bits =
185 let mask = lnot (mask bits) in
188 (* Call function g on the top bits, then f on each full byte
189 * (big endian - so start at top).
191 let rec map_bytes_be g f v bits =
193 map_bytes_be g f (v >> 8) (bits-8);
194 let lsb = v land ff in
196 ) else if bits > 0 then (
197 let lsb = v land (mask bits) in
203 (* Bitwise operations on int64s. Note we try to keep it as similar
204 * as possible to the I/I32 modules above, to make it easier to track
207 let (<<) = Int64.shift_left
208 let (>>) = Int64.shift_right_logical
209 let (land) = Int64.logand
210 let (lor) = Int64.logor
211 let lnot = Int64.lognot
212 let pred = Int64.pred
213 let max_int = Int64.max_int
214 let to_int = Int64.to_int
215 let zero = Int64.zero
217 let minus_one = Int64.minus_one
220 (* Create a mask so many bits wide. *)
224 else if bits = 63 then
226 else if bits = 64 then
229 invalid_arg "Bitmatch.I64.mask"
231 (* Byte swap an int of a given size. *)
232 (* let byteswap v bits = *)
234 (* Check a value is in range 0 .. 2^bits-1. *)
235 let range_unsigned v bits =
236 let mask = lnot (mask bits) in
239 (* Call function g on the top bits, then f on each full byte
240 * (big endian - so start at top).
242 let rec map_bytes_be g f v bits =
244 map_bytes_be g f (v >> 8) (bits-8);
245 let lsb = v land ff in
247 ) else if bits > 0 then (
248 let lsb = v land (mask bits) in
253 (*----------------------------------------------------------------------*)
254 (* Extraction functions.
256 * NB: internal functions, called from the generated macros, and
257 * the parameters should have been checked for sanity already).
261 let extract_bitstring data off len flen =
262 (data, off, flen), off+flen, len-flen
264 let extract_remainder data off len =
265 (data, off, len), off+len, 0
267 (* Extract and convert to numeric. A single bit is returned as
268 * a boolean. There are no endianness or signedness considerations.
270 let extract_bit data off len _ = (* final param is always 1 *)
271 let byteoff = off lsr 3 in
272 let bitmask = 1 lsl (7 - (off land 7)) in
273 let b = Char.code data.[byteoff] land bitmask <> 0 in
276 (* Returns 8 bit unsigned aligned bytes from the string.
277 * If the string ends then this returns 0's.
279 let _get_byte data byteoff strlen =
280 if strlen > byteoff then Char.code data.[byteoff] else 0
281 let _get_byte32 data byteoff strlen =
282 if strlen > byteoff then Int32.of_int (Char.code data.[byteoff]) else 0l
283 let _get_byte64 data byteoff strlen =
284 if strlen > byteoff then Int64.of_int (Char.code data.[byteoff]) else 0L
286 (* Extract [2..8] bits. Because the result fits into a single
287 * byte we don't have to worry about endianness, only signedness.
289 let extract_char_unsigned data off len flen =
290 let byteoff = off lsr 3 in
292 (* Optimize the common (byte-aligned) case. *)
293 if off land 7 = 0 then (
294 let byte = Char.code data.[byteoff] in
295 byte lsr (8 - flen), off+flen, len-flen
297 (* Extract the 16 bits at byteoff and byteoff+1 (note that the
298 * second byte might not exist in the original string).
300 let strlen = String.length data in
303 (_get_byte data byteoff strlen lsl 8) +
304 _get_byte data (byteoff+1) strlen in
306 (* Mask off the top bits. *)
307 let bitmask = (1 lsl (16 - (off land 7))) - 1 in
308 let word = word land bitmask in
309 (* Shift right to get rid of the bottom bits. *)
310 let shift = 16 - ((off land 7) + flen) in
311 let word = word lsr shift in
313 word, off+flen, len-flen
316 (* Extract [9..31] bits. We have to consider endianness and signedness. *)
317 let extract_int_be_unsigned data off len flen =
318 let byteoff = off lsr 3 in
320 let strlen = String.length data in
323 (* Optimize the common (byte-aligned) case. *)
324 if off land 7 = 0 then (
326 (_get_byte data byteoff strlen lsl 23) +
327 (_get_byte data (byteoff+1) strlen lsl 15) +
328 (_get_byte data (byteoff+2) strlen lsl 7) +
329 (_get_byte data (byteoff+3) strlen lsr 1) in
331 ) else if flen <= 24 then (
332 (* Extract the 31 bits at byteoff .. byteoff+3. *)
334 (_get_byte data byteoff strlen lsl 23) +
335 (_get_byte data (byteoff+1) strlen lsl 15) +
336 (_get_byte data (byteoff+2) strlen lsl 7) +
337 (_get_byte data (byteoff+3) strlen lsr 1) in
338 (* Mask off the top bits. *)
339 let bitmask = (1 lsl (31 - (off land 7))) - 1 in
340 let word = word land bitmask in
341 (* Shift right to get rid of the bottom bits. *)
342 let shift = 31 - ((off land 7) + flen) in
345 (* Extract the next 31 bits, slow method. *)
347 let c0, off, len = extract_char_unsigned data off len 8 in
348 let c1, off, len = extract_char_unsigned data off len 8 in
349 let c2, off, len = extract_char_unsigned data off len 8 in
350 let c3, off, len = extract_char_unsigned data off len 7 in
351 (c0 lsl 23) + (c1 lsl 15) + (c2 lsl 7) + c3 in
354 word, off+flen, len-flen
356 let extract_int_le_unsigned data off len flen =
357 let v, off, len = extract_int_be_unsigned data off len flen in
358 let v = I.byteswap v flen in
361 let _make_int32_be c0 c1 c2 c3 =
365 (Int32.shift_left c0 24)
366 (Int32.shift_left c1 16))
367 (Int32.shift_left c2 8))
370 let _make_int32_le c0 c1 c2 c3 =
374 (Int32.shift_left c3 24)
375 (Int32.shift_left c2 16))
376 (Int32.shift_left c1 8))
379 (* Extract exactly 32 bits. We have to consider endianness and signedness. *)
380 let extract_int32_be_unsigned data off len flen =
381 let byteoff = off lsr 3 in
383 let strlen = String.length data in
386 (* Optimize the common (byte-aligned) case. *)
387 if off land 7 = 0 then (
389 let c0 = _get_byte32 data byteoff strlen in
390 let c1 = _get_byte32 data (byteoff+1) strlen in
391 let c2 = _get_byte32 data (byteoff+2) strlen in
392 let c3 = _get_byte32 data (byteoff+3) strlen in
393 _make_int32_be c0 c1 c2 c3 in
394 Int32.shift_right_logical word (32 - flen)
396 (* Extract the next 32 bits, slow method. *)
398 let c0, off, len = extract_char_unsigned data off len 8 in
399 let c1, off, len = extract_char_unsigned data off len 8 in
400 let c2, off, len = extract_char_unsigned data off len 8 in
401 let c3, _, _ = extract_char_unsigned data off len 8 in
402 let c0 = Int32.of_int c0 in
403 let c1 = Int32.of_int c1 in
404 let c2 = Int32.of_int c2 in
405 let c3 = Int32.of_int c3 in
406 _make_int32_be c0 c1 c2 c3 in
407 Int32.shift_right_logical word (32 - flen)
409 word, off+flen, len-flen
411 let extract_int32_le_unsigned data off len flen =
412 let v, off, len = extract_int32_be_unsigned data off len flen in
413 let v = I32.byteswap v flen in
416 let _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 =
424 (Int64.shift_left c0 56)
425 (Int64.shift_left c1 48))
426 (Int64.shift_left c2 40))
427 (Int64.shift_left c3 32))
428 (Int64.shift_left c4 24))
429 (Int64.shift_left c5 16))
430 (Int64.shift_left c6 8))
433 (* Extract [1..64] bits. We have to consider endianness and signedness. *)
434 let extract_int64_be_unsigned data off len flen =
435 let byteoff = off lsr 3 in
437 let strlen = String.length data in
440 (* Optimize the common (byte-aligned) case. *)
441 if off land 7 = 0 then (
443 let c0 = _get_byte64 data byteoff strlen in
444 let c1 = _get_byte64 data (byteoff+1) strlen in
445 let c2 = _get_byte64 data (byteoff+2) strlen in
446 let c3 = _get_byte64 data (byteoff+3) strlen in
447 let c4 = _get_byte64 data (byteoff+4) strlen in
448 let c5 = _get_byte64 data (byteoff+5) strlen in
449 let c6 = _get_byte64 data (byteoff+6) strlen in
450 let c7 = _get_byte64 data (byteoff+7) strlen in
451 _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 in
452 Int64.shift_right_logical word (64 - flen)
454 (* Extract the next 64 bits, slow method. *)
456 let c0, off, len = extract_char_unsigned data off len 8 in
457 let c1, off, len = extract_char_unsigned data off len 8 in
458 let c2, off, len = extract_char_unsigned data off len 8 in
459 let c3, off, len = extract_char_unsigned data off len 8 in
460 let c4, off, len = extract_char_unsigned data off len 8 in
461 let c5, off, len = extract_char_unsigned data off len 8 in
462 let c6, off, len = extract_char_unsigned data off len 8 in
463 let c7, _, _ = extract_char_unsigned data off len 8 in
464 let c0 = Int64.of_int c0 in
465 let c1 = Int64.of_int c1 in
466 let c2 = Int64.of_int c2 in
467 let c3 = Int64.of_int c3 in
468 let c4 = Int64.of_int c4 in
469 let c5 = Int64.of_int c5 in
470 let c6 = Int64.of_int c6 in
471 let c7 = Int64.of_int c7 in
472 _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 in
473 Int64.shift_right_logical word (64 - flen)
475 word, off+flen, len-flen
477 (*----------------------------------------------------------------------*)
478 (* Constructor functions. *)
480 module Buffer = struct
483 mutable len : int; (* Length in bits. *)
484 (* Last byte in the buffer (if len is not aligned). We store
485 * it outside the buffer because buffers aren't mutable.
491 (* XXX We have almost enough information in the generator to
492 * choose a good initial size.
494 { buf = Buffer.create 128; len = 0; last = 0 }
496 let contents { buf = buf; len = len; last = last } =
498 if len land 7 = 0 then
501 Buffer.contents buf ^ (String.make 1 (Char.chr last)) in
504 (* Add exactly 8 bits. *)
505 let add_byte ({ buf = buf; len = len; last = last } as t) byte =
506 if byte < 0 || byte > 255 then invalid_arg "Bitmatch.Buffer.add_byte";
507 let shift = len land 7 in
509 (* Target buffer is byte-aligned. *)
510 Buffer.add_char buf (Char.chr byte)
512 (* Target buffer is unaligned. 'last' is meaningful. *)
513 let first = byte lsr shift in
514 let second = (byte lsl (8 - shift)) land 0xff in
515 Buffer.add_char buf (Char.chr (last lor first));
520 (* Add exactly 1 bit. *)
521 let add_bit ({ buf = buf; len = len; last = last } as t) bit =
522 let shift = 7 - (len land 7) in
524 (* Somewhere in the middle of 'last'. *)
525 t.last <- last lor ((if bit then 1 else 0) lsl shift)
527 (* Just a single spare bit in 'last'. *)
528 let last = last lor if bit then 1 else 0 in
529 Buffer.add_char buf (Char.chr last);
534 (* Add a small number of bits (definitely < 8). This uses a loop
535 * to call add_bit so it's slow.
537 let _add_bits t c slen =
538 if slen < 1 || slen >= 8 then invalid_arg "Bitmatch.Buffer._add_bits";
539 for i = slen-1 downto 0 do
540 let bit = c land (1 lsl i) <> 0 in
544 let add_bits ({ buf = buf; len = len } as t) str slen =
546 if len land 7 = 0 then (
547 if slen land 7 = 0 then
548 (* Common case - everything is byte-aligned. *)
549 Buffer.add_substring buf str 0 (slen lsr 3)
551 (* Target buffer is aligned. Copy whole bytes then leave the
552 * remaining bits in last.
554 let slenbytes = slen lsr 3 in
555 if slenbytes > 0 then Buffer.add_substring buf str 0 slenbytes;
556 t.last <- Char.code str.[slenbytes] lsl (8 - (slen land 7))
560 (* Target buffer is unaligned. Copy whole bytes using
561 * add_byte which knows how to deal with an unaligned
562 * target buffer, then call _add_bits for the remaining < 8 bits.
564 * XXX This is going to be dog-slow.
566 let slenbytes = slen lsr 3 in
567 for i = 0 to slenbytes-1 do
568 let byte = Char.code str.[i] in
571 _add_bits t (Char.code str.[slenbytes]) (slen - (slenbytes lsl 3))
576 (* Construct a single bit. *)
577 let construct_bit buf b _ =
580 (* Construct a field, flen = [2..8]. *)
581 let construct_char_unsigned buf v flen exn =
582 let max_val = 1 lsl flen in
583 if v < 0 || v >= max_val then raise exn;
585 Buffer.add_byte buf v
587 Buffer._add_bits buf v flen
589 (* Construct a field of up to 31 bits. *)
590 let construct_int_be_unsigned buf v flen exn =
591 (* Check value is within range. *)
592 if not (I.range_unsigned v flen) then raise exn;
594 I.map_bytes_be (Buffer._add_bits buf) (Buffer.add_byte buf) v flen
596 (* Construct a field of up to 64 bits. *)
597 let construct_int64_be_unsigned buf v flen exn =
598 (* Check value is within range. *)
599 if not (I64.range_unsigned v flen) then raise exn;
601 I64.map_bytes_be (Buffer._add_bits buf) (Buffer.add_byte buf) v flen
603 (*----------------------------------------------------------------------*)
604 (* Extract a string from a bitstring. *)
606 let string_of_bitstring (data, off, len) =
607 if off land 7 = 0 && len land 7 = 0 then
608 (* Easy case: everything is byte-aligned. *)
609 String.sub data (off lsr 3) (len lsr 3)
611 (* Bit-twiddling case. *)
612 let strlen = (len + 7) lsr 3 in
613 let str = String.make strlen '\000' in
614 let rec loop data off len i =
616 let c, off, len = extract_char_unsigned data off len 8 in
617 str.[i] <- Char.chr c;
618 loop data off len (i+1)
619 ) else if len > 0 then (
620 let c, off, len = extract_char_unsigned data off len len in
621 str.[i] <- Char.chr c
628 (*----------------------------------------------------------------------*)
629 (* Display functions. *)
632 let c = Char.code c in
635 let hexdump_bitstring chan (data, off, len) =
639 let linelen = ref 0 in
640 let linechars = String.make 16 ' ' in
642 fprintf chan "00000000 ";
645 let bits = min !len 8 in
646 let byte, off', len' = extract_char_unsigned data !off !len bits in
647 off := off'; len := len';
649 let byte = byte lsl (8-bits) in
650 fprintf chan "%02x " byte;
653 linechars.[!linelen] <-
654 (let c = Char.chr byte in
655 if isprint c then c else '.');
657 if !linelen = 8 then fprintf chan " ";
658 if !linelen = 16 then (
659 fprintf chan " |%s|\n%08x " linechars !count;
661 for i = 0 to 15 do linechars.[i] <- ' ' done
665 if !linelen > 0 then (
666 let skip = (16 - !linelen) * 3 + if !linelen < 8 then 1 else 0 in
667 for i = 0 to skip-1 do fprintf chan " " done;
668 fprintf chan " |%s|\n%!" linechars