2 * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 * $Id: bitmatch.ml,v 1.7 2008-04-02 08:05:58 rjones Exp $
23 (* Enable runtime debug messages. Must also have been enabled
29 exception Construct_failure of string * string * int * int
31 (* A bitstring is simply the data itself (as a string), and the
32 * bitoffset and the bitlength within the string. Note offset/length
33 * are counted in bits, not bytes.
35 type bitstring = string * int * int
37 (* Functions to create and load bitstrings. *)
38 let empty_bitstring = "", 0, 0
40 let make_bitstring len c = String.make ((len+7) lsr 3) c, 0, len
42 let create_bitstring len = make_bitstring len '\000'
44 let bitstring_of_chan chan =
45 let tmpsize = 16384 in
46 let buf = Buffer.create tmpsize in
47 let tmp = String.create tmpsize in
49 while n := input chan tmp 0 tmpsize; !n > 0 do
50 Buffer.add_substring buf tmp 0 !n;
52 Buffer.contents buf, 0, Buffer.length buf lsl 3
54 let bitstring_of_file fname =
55 let chan = open_in_bin fname in
56 let bs = bitstring_of_chan chan in
60 let bitstring_length (_, _, len) = len
62 (*----------------------------------------------------------------------*)
65 * We try to isolate all bitwise functions within these modules.
69 (* Bitwise operations on ints. Note that we assume int <= 31 bits. *)
76 (* Create a mask so many bits wide. *)
80 else if bits = 30 then
85 (* Byte swap an int of a given size. *)
88 else if bits <= 16 then (
90 let v1 = v >> shift in
91 let v2 = (v land (mask shift)) << 8 in
93 ) else if bits <= 24 then (
94 let shift = bits - 16 in
95 let v1 = v >> (8+shift) in
96 let v2 = ((v >> shift) land ff) << 8 in
97 let v3 = (v land (mask shift)) << 16 in
100 let shift = bits - 24 in
101 let v1 = v >> (16+shift) in
102 let v2 = ((v >> (8+shift)) land ff) << 8 in
103 let v3 = ((v >> shift) land ff) << 16 in
104 let v4 = (v land (mask shift)) << 24 in
105 v4 lor v3 lor v2 lor v1
110 (* Bitwise operations on int32s. Note we try to keep it as similar
111 * as possible to the I module above, to make it easier to track
114 let (<<) = Int32.shift_left
115 let (>>) = Int32.shift_right_logical
116 let (land) = Int32.logand
117 let (lor) = Int32.logor
118 let pred = Int32.pred
119 let max_int = Int32.max_int
121 let minus_one = Int32.minus_one
124 (* Create a mask so many bits wide. *)
128 else if bits = 31 then
133 (* Byte swap an int of a given size. *)
134 let byteswap v bits =
136 else if bits <= 16 then (
137 let shift = bits-8 in
138 let v1 = v >> shift in
139 let v2 = (v land (mask shift)) << 8 in
141 ) else if bits <= 24 then (
142 let shift = bits - 16 in
143 let v1 = v >> (8+shift) in
144 let v2 = ((v >> shift) land ff) << 8 in
145 let v3 = (v land (mask shift)) << 16 in
148 let shift = bits - 24 in
149 let v1 = v >> (16+shift) in
150 let v2 = ((v >> (8+shift)) land ff) << 8 in
151 let v3 = ((v >> shift) land ff) << 16 in
152 let v4 = (v land (mask shift)) << 24 in
153 v4 lor v3 lor v2 lor v1
157 (*----------------------------------------------------------------------*)
158 (* Extraction functions.
160 * NB: internal functions, called from the generated macros, and
161 * the parameters should have been checked for sanity already).
165 let extract_bitstring data off len flen =
166 (data, off, flen), off+flen, len-flen
168 let extract_remainder data off len =
169 (data, off, len), off+len, 0
171 (* Extract and convert to numeric. A single bit is returned as
172 * a boolean. There are no endianness or signedness considerations.
174 let extract_bit data off len _ = (* final param is always 1 *)
175 let byteoff = off lsr 3 in
176 let bitmask = 1 lsl (7 - (off land 7)) in
177 let b = Char.code data.[byteoff] land bitmask <> 0 in
180 (* Returns 8 bit unsigned aligned bytes from the string.
181 * If the string ends then this returns 0's.
183 let _get_byte data byteoff strlen =
184 if strlen > byteoff then Char.code data.[byteoff] else 0
185 let _get_byte32 data byteoff strlen =
186 if strlen > byteoff then Int32.of_int (Char.code data.[byteoff]) else 0l
187 let _get_byte64 data byteoff strlen =
188 if strlen > byteoff then Int64.of_int (Char.code data.[byteoff]) else 0L
190 (* Extract [2..8] bits. Because the result fits into a single
191 * byte we don't have to worry about endianness, only signedness.
193 let extract_char_unsigned data off len flen =
194 let byteoff = off lsr 3 in
196 (* Optimize the common (byte-aligned) case. *)
197 if off land 7 = 0 then (
198 let byte = Char.code data.[byteoff] in
199 byte lsr (8 - flen), off+flen, len-flen
201 (* Extract the 16 bits at byteoff and byteoff+1 (note that the
202 * second byte might not exist in the original string).
204 let strlen = String.length data in
207 (_get_byte data byteoff strlen lsl 8) +
208 _get_byte data (byteoff+1) strlen in
210 (* Mask off the top bits. *)
211 let bitmask = (1 lsl (16 - (off land 7))) - 1 in
212 let word = word land bitmask in
213 (* Shift right to get rid of the bottom bits. *)
214 let shift = 16 - ((off land 7) + flen) in
215 let word = word lsr shift in
217 word, off+flen, len-flen
220 (* Extract [9..31] bits. We have to consider endianness and signedness. *)
221 let extract_int_be_unsigned data off len flen =
222 let byteoff = off lsr 3 in
224 let strlen = String.length data in
227 (* Optimize the common (byte-aligned) case. *)
228 if off land 7 = 0 then (
230 (_get_byte data byteoff strlen lsl 23) +
231 (_get_byte data (byteoff+1) strlen lsl 15) +
232 (_get_byte data (byteoff+2) strlen lsl 7) +
233 (_get_byte data (byteoff+3) strlen lsr 1) in
235 ) else if flen <= 24 then (
236 (* Extract the 31 bits at byteoff .. byteoff+3. *)
238 (_get_byte data byteoff strlen lsl 23) +
239 (_get_byte data (byteoff+1) strlen lsl 15) +
240 (_get_byte data (byteoff+2) strlen lsl 7) +
241 (_get_byte data (byteoff+3) strlen lsr 1) in
242 (* Mask off the top bits. *)
243 let bitmask = (1 lsl (31 - (off land 7))) - 1 in
244 let word = word land bitmask in
245 (* Shift right to get rid of the bottom bits. *)
246 let shift = 31 - ((off land 7) + flen) in
249 (* Extract the next 31 bits, slow method. *)
251 let c0, off, len = extract_char_unsigned data off len 8 in
252 let c1, off, len = extract_char_unsigned data off len 8 in
253 let c2, off, len = extract_char_unsigned data off len 8 in
254 let c3, off, len = extract_char_unsigned data off len 7 in
255 (c0 lsl 23) + (c1 lsl 15) + (c2 lsl 7) + c3 in
258 word, off+flen, len-flen
260 let extract_int_le_unsigned data off len flen =
261 let v, off, len = extract_int_be_unsigned data off len flen in
262 let v = I.byteswap v flen in
265 let _make_int32_be c0 c1 c2 c3 =
269 (Int32.shift_left c0 24)
270 (Int32.shift_left c1 16))
271 (Int32.shift_left c2 8))
274 let _make_int32_le c0 c1 c2 c3 =
278 (Int32.shift_left c3 24)
279 (Int32.shift_left c2 16))
280 (Int32.shift_left c1 8))
283 (* Extract exactly 32 bits. We have to consider endianness and signedness. *)
284 let extract_int32_be_unsigned data off len flen =
285 let byteoff = off lsr 3 in
287 let strlen = String.length data in
290 (* Optimize the common (byte-aligned) case. *)
291 if off land 7 = 0 then (
293 let c0 = _get_byte32 data byteoff strlen in
294 let c1 = _get_byte32 data (byteoff+1) strlen in
295 let c2 = _get_byte32 data (byteoff+2) strlen in
296 let c3 = _get_byte32 data (byteoff+3) strlen in
297 _make_int32_be c0 c1 c2 c3 in
298 Int32.shift_right_logical word (32 - flen)
300 (* Extract the next 32 bits, slow method. *)
302 let c0, off, len = extract_char_unsigned data off len 8 in
303 let c1, off, len = extract_char_unsigned data off len 8 in
304 let c2, off, len = extract_char_unsigned data off len 8 in
305 let c3, _, _ = extract_char_unsigned data off len 8 in
306 let c0 = Int32.of_int c0 in
307 let c1 = Int32.of_int c1 in
308 let c2 = Int32.of_int c2 in
309 let c3 = Int32.of_int c3 in
310 _make_int32_be c0 c1 c2 c3 in
311 Int32.shift_right_logical word (32 - flen)
313 word, off+flen, len-flen
315 let extract_int32_le_unsigned data off len flen =
316 let v, off, len = extract_int32_be_unsigned data off len flen in
317 let v = I32.byteswap v flen in
320 let _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 =
328 (Int64.shift_left c0 56)
329 (Int64.shift_left c1 48))
330 (Int64.shift_left c2 40))
331 (Int64.shift_left c3 32))
332 (Int64.shift_left c4 24))
333 (Int64.shift_left c5 16))
334 (Int64.shift_left c6 8))
337 (* Extract [1..64] bits. We have to consider endianness and signedness. *)
338 let extract_int64_be_unsigned data off len flen =
339 let byteoff = off lsr 3 in
341 let strlen = String.length data in
344 (* Optimize the common (byte-aligned) case. *)
345 if off land 7 = 0 then (
347 let c0 = _get_byte64 data byteoff strlen in
348 let c1 = _get_byte64 data (byteoff+1) strlen in
349 let c2 = _get_byte64 data (byteoff+2) strlen in
350 let c3 = _get_byte64 data (byteoff+3) strlen in
351 let c4 = _get_byte64 data (byteoff+4) strlen in
352 let c5 = _get_byte64 data (byteoff+5) strlen in
353 let c6 = _get_byte64 data (byteoff+6) strlen in
354 let c7 = _get_byte64 data (byteoff+7) strlen in
355 _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 in
356 Int64.shift_right_logical word (64 - flen)
358 (* Extract the next 64 bits, slow method. *)
360 let c0, off, len = extract_char_unsigned data off len 8 in
361 let c1, off, len = extract_char_unsigned data off len 8 in
362 let c2, off, len = extract_char_unsigned data off len 8 in
363 let c3, off, len = extract_char_unsigned data off len 8 in
364 let c4, off, len = extract_char_unsigned data off len 8 in
365 let c5, off, len = extract_char_unsigned data off len 8 in
366 let c6, off, len = extract_char_unsigned data off len 8 in
367 let c7, _, _ = extract_char_unsigned data off len 8 in
368 let c0 = Int64.of_int c0 in
369 let c1 = Int64.of_int c1 in
370 let c2 = Int64.of_int c2 in
371 let c3 = Int64.of_int c3 in
372 let c4 = Int64.of_int c4 in
373 let c5 = Int64.of_int c5 in
374 let c6 = Int64.of_int c6 in
375 let c7 = Int64.of_int c7 in
376 _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 in
377 Int64.shift_right_logical word (64 - flen)
379 word, off+flen, len-flen
381 (*----------------------------------------------------------------------*)
382 (* Constructor functions. *)
384 module Buffer = struct
387 mutable len : int; (* Length in bits. *)
388 (* Last byte in the buffer (if len is not aligned). We store
389 * it outside the buffer because buffers aren't mutable.
395 (* XXX We have almost enough information in the generator to
396 * choose a good initial size.
398 { buf = Buffer.create 128; len = 0; last = 0 }
400 let contents { buf = buf; len = len; last = last } =
402 if len land 7 = 0 then
405 Buffer.contents buf ^ (String.make 1 (Char.chr last)) in
408 (* Add exactly 8 bits. *)
409 let add_byte ({ buf = buf; len = len; last = last } as t) byte =
410 if byte < 0 || byte > 255 then invalid_arg "Bitmatch.Buffer.add_byte";
411 let shift = len land 7 in
413 (* Target buffer is byte-aligned. *)
414 Buffer.add_char buf (Char.chr byte)
416 (* Target buffer is unaligned. 'last' is meaningful. *)
417 let first = byte lsr shift in
418 let second = (byte lsl (8 - shift)) land 0xff in
419 Buffer.add_char buf (Char.chr (last lor first));
424 (* Add exactly 1 bit. *)
425 let add_bit ({ buf = buf; len = len; last = last } as t) bit =
426 let shift = 7 - (len land 7) in
428 (* Somewhere in the middle of 'last'. *)
429 t.last <- last lor ((if bit then 1 else 0) lsl shift)
431 (* Just a single spare bit in 'last'. *)
432 let last = last lor if bit then 1 else 0 in
433 Buffer.add_char buf (Char.chr last);
438 (* Add a small number of bits (definitely < 8). This uses a loop
439 * to call add_bit so it's slow.
441 let _add_bits t c slen =
442 if slen < 1 || slen >= 8 then invalid_arg "Bitmatch.Buffer._add_bits";
443 for i = slen-1 downto 0 do
444 let bit = c land (1 lsl i) <> 0 in
448 let add_bits ({ buf = buf; len = len } as t) str slen =
450 if len land 7 = 0 then (
451 if slen land 7 = 0 then
452 (* Common case - everything is byte-aligned. *)
453 Buffer.add_substring buf str 0 (slen lsr 3)
455 (* Target buffer is aligned. Copy whole bytes then leave the
456 * remaining bits in last.
458 let slenbytes = slen lsr 3 in
459 if slenbytes > 0 then Buffer.add_substring buf str 0 slenbytes;
460 t.last <- Char.code str.[slenbytes] lsl (8 - (slen land 7))
464 (* Target buffer is unaligned. Copy whole bytes using
465 * add_byte which knows how to deal with an unaligned
466 * target buffer, then call _add_bits for the remaining < 8 bits.
468 * XXX This is going to be dog-slow.
470 let slenbytes = slen lsr 3 in
471 for i = 0 to slenbytes-1 do
472 let byte = Char.code str.[i] in
475 _add_bits t (Char.code str.[slenbytes]) (slen - (slenbytes lsl 3))
480 (* Construct a single bit. *)
481 let construct_bit buf b _ =
484 (* Construct a field, flen = [2..8]. *)
485 let construct_char_unsigned buf v flen exn =
486 let max_val = 1 lsl flen in
487 if v < 0 || v >= max_val then raise exn;
489 Buffer.add_byte buf v
491 Buffer._add_bits buf v flen
493 (* Generate a mask with the lower 'bits' bits set. *)
495 if bits < 63 then Int64.pred (Int64.shift_left 1L bits)
496 else if bits = 63 then Int64.max_int
497 else if bits = 64 then -1L
498 else invalid_arg "Bitmatch.mask64"
500 (* Construct a field of up to 64 bits. *)
501 let construct_int64_be_unsigned buf v flen exn =
502 (* Check value is within range. *)
503 let m = Int64.lognot (mask64 flen) in
504 if Int64.logand v m <> 0L then raise exn;
507 let rec loop v flen =
509 loop (Int64.shift_right_logical v 8) (flen-8);
510 let lsb = Int64.to_int (Int64.logand v 0xffL) in
511 Buffer.add_byte buf lsb
512 ) else if flen > 0 then (
513 let lsb = Int64.to_int (Int64.logand v (mask64 flen)) in
514 Buffer._add_bits buf lsb flen
519 (*----------------------------------------------------------------------*)
520 (* Display functions. *)
523 let c = Char.code c in
526 let hexdump_bitstring chan (data, off, len) =
530 let linelen = ref 0 in
531 let linechars = String.make 16 ' ' in
533 fprintf chan "00000000 ";
536 let bits = min !len 8 in
537 let byte, off', len' = extract_char_unsigned data !off !len bits in
538 off := off'; len := len';
540 let byte = byte lsl (8-bits) in
541 fprintf chan "%02x " byte;
544 linechars.[!linelen] <-
545 (let c = Char.chr byte in
546 if isprint c then c else '.');
548 if !linelen = 8 then fprintf chan " ";
549 if !linelen = 16 then (
550 fprintf chan " |%s|\n%08x " linechars !count;
552 for i = 0 to 15 do linechars.[i] <- ' ' done
556 if !linelen > 0 then (
557 let skip = (16 - !linelen) * 3 + if !linelen < 8 then 1 else 0 in
558 for i = 0 to skip-1 do fprintf chan " " done;
559 fprintf chan " |%s|\n%!" linechars