2 * $Id: bitmatch.ml,v 1.6 2008-04-01 19:10:45 rjones Exp $
7 (* Enable runtime debug messages. Must also have been enabled
13 exception Construct_failure of string * string * int * int
15 (* A bitstring is simply the data itself (as a string), and the
16 * bitoffset and the bitlength within the string. Note offset/length
17 * are counted in bits, not bytes.
19 type bitstring = string * int * int
21 (* Functions to create and load bitstrings. *)
22 let empty_bitstring = "", 0, 0
24 let make_bitstring len c = String.make ((len+7) lsr 3) c, 0, len
26 let create_bitstring len = make_bitstring len '\000'
28 let bitstring_of_chan chan =
29 let tmpsize = 16384 in
30 let buf = Buffer.create tmpsize in
31 let tmp = String.create tmpsize in
33 while n := input chan tmp 0 tmpsize; !n > 0 do
34 Buffer.add_substring buf tmp 0 !n;
36 Buffer.contents buf, 0, Buffer.length buf lsl 3
38 let bitstring_of_file fname =
39 let chan = open_in_bin fname in
40 let bs = bitstring_of_chan chan in
44 let bitstring_length (_, _, len) = len
46 (*----------------------------------------------------------------------*)
49 * We try to isolate all bitwise functions within these modules.
53 (* Bitwise operations on ints. Note that we assume int <= 31 bits. *)
60 (* Create a mask so many bits wide. *)
64 else if bits = 30 then
69 (* Byte swap an int of a given size. *)
72 else if bits <= 16 then (
74 let v1 = v >> shift in
75 let v2 = (v land (mask shift)) << 8 in
77 ) else if bits <= 24 then (
78 let shift = bits - 16 in
79 let v1 = v >> (8+shift) in
80 let v2 = ((v >> shift) land ff) << 8 in
81 let v3 = (v land (mask shift)) << 16 in
84 let shift = bits - 24 in
85 let v1 = v >> (16+shift) in
86 let v2 = ((v >> (8+shift)) land ff) << 8 in
87 let v3 = ((v >> shift) land ff) << 16 in
88 let v4 = (v land (mask shift)) << 24 in
89 v4 lor v3 lor v2 lor v1
94 (* Bitwise operations on int32s. Note we try to keep it as similar
95 * as possible to the I module above, to make it easier to track
98 let (<<) = Int32.shift_left
99 let (>>) = Int32.shift_right_logical
100 let (land) = Int32.logand
101 let (lor) = Int32.logor
102 let pred = Int32.pred
103 let max_int = Int32.max_int
105 let minus_one = Int32.minus_one
108 (* Create a mask so many bits wide. *)
112 else if bits = 31 then
117 (* Byte swap an int of a given size. *)
118 let byteswap v bits =
120 else if bits <= 16 then (
121 let shift = bits-8 in
122 let v1 = v >> shift in
123 let v2 = (v land (mask shift)) << 8 in
125 ) else if bits <= 24 then (
126 let shift = bits - 16 in
127 let v1 = v >> (8+shift) in
128 let v2 = ((v >> shift) land ff) << 8 in
129 let v3 = (v land (mask shift)) << 16 in
132 let shift = bits - 24 in
133 let v1 = v >> (16+shift) in
134 let v2 = ((v >> (8+shift)) land ff) << 8 in
135 let v3 = ((v >> shift) land ff) << 16 in
136 let v4 = (v land (mask shift)) << 24 in
137 v4 lor v3 lor v2 lor v1
141 (*----------------------------------------------------------------------*)
142 (* Extraction functions.
144 * NB: internal functions, called from the generated macros, and
145 * the parameters should have been checked for sanity already).
149 let extract_bitstring data off len flen =
150 (data, off, flen), off+flen, len-flen
152 let extract_remainder data off len =
153 (data, off, len), off+len, 0
155 (* Extract and convert to numeric. A single bit is returned as
156 * a boolean. There are no endianness or signedness considerations.
158 let extract_bit data off len _ = (* final param is always 1 *)
159 let byteoff = off lsr 3 in
160 let bitmask = 1 lsl (7 - (off land 7)) in
161 let b = Char.code data.[byteoff] land bitmask <> 0 in
164 (* Returns 8 bit unsigned aligned bytes from the string.
165 * If the string ends then this returns 0's.
167 let _get_byte data byteoff strlen =
168 if strlen > byteoff then Char.code data.[byteoff] else 0
169 let _get_byte32 data byteoff strlen =
170 if strlen > byteoff then Int32.of_int (Char.code data.[byteoff]) else 0l
171 let _get_byte64 data byteoff strlen =
172 if strlen > byteoff then Int64.of_int (Char.code data.[byteoff]) else 0L
174 (* Extract [2..8] bits. Because the result fits into a single
175 * byte we don't have to worry about endianness, only signedness.
177 let extract_char_unsigned data off len flen =
178 let byteoff = off lsr 3 in
180 (* Optimize the common (byte-aligned) case. *)
181 if off land 7 = 0 then (
182 let byte = Char.code data.[byteoff] in
183 byte lsr (8 - flen), off+flen, len-flen
185 (* Extract the 16 bits at byteoff and byteoff+1 (note that the
186 * second byte might not exist in the original string).
188 let strlen = String.length data in
191 (_get_byte data byteoff strlen lsl 8) +
192 _get_byte data (byteoff+1) strlen in
194 (* Mask off the top bits. *)
195 let bitmask = (1 lsl (16 - (off land 7))) - 1 in
196 let word = word land bitmask in
197 (* Shift right to get rid of the bottom bits. *)
198 let shift = 16 - ((off land 7) + flen) in
199 let word = word lsr shift in
201 word, off+flen, len-flen
204 (* Extract [9..31] bits. We have to consider endianness and signedness. *)
205 let extract_int_be_unsigned data off len flen =
206 let byteoff = off lsr 3 in
208 let strlen = String.length data in
211 (* Optimize the common (byte-aligned) case. *)
212 if off land 7 = 0 then (
214 (_get_byte data byteoff strlen lsl 23) +
215 (_get_byte data (byteoff+1) strlen lsl 15) +
216 (_get_byte data (byteoff+2) strlen lsl 7) +
217 (_get_byte data (byteoff+3) strlen lsr 1) in
219 ) else if flen <= 24 then (
220 (* Extract the 31 bits at byteoff .. byteoff+3. *)
222 (_get_byte data byteoff strlen lsl 23) +
223 (_get_byte data (byteoff+1) strlen lsl 15) +
224 (_get_byte data (byteoff+2) strlen lsl 7) +
225 (_get_byte data (byteoff+3) strlen lsr 1) in
226 (* Mask off the top bits. *)
227 let bitmask = (1 lsl (31 - (off land 7))) - 1 in
228 let word = word land bitmask in
229 (* Shift right to get rid of the bottom bits. *)
230 let shift = 31 - ((off land 7) + flen) in
233 (* Extract the next 31 bits, slow method. *)
235 let c0, off, len = extract_char_unsigned data off len 8 in
236 let c1, off, len = extract_char_unsigned data off len 8 in
237 let c2, off, len = extract_char_unsigned data off len 8 in
238 let c3, off, len = extract_char_unsigned data off len 7 in
239 (c0 lsl 23) + (c1 lsl 15) + (c2 lsl 7) + c3 in
242 word, off+flen, len-flen
244 let extract_int_le_unsigned data off len flen =
245 let v, off, len = extract_int_be_unsigned data off len flen in
246 let v = I.byteswap v flen in
249 let _make_int32_be c0 c1 c2 c3 =
253 (Int32.shift_left c0 24)
254 (Int32.shift_left c1 16))
255 (Int32.shift_left c2 8))
258 let _make_int32_le c0 c1 c2 c3 =
262 (Int32.shift_left c3 24)
263 (Int32.shift_left c2 16))
264 (Int32.shift_left c1 8))
267 (* Extract exactly 32 bits. We have to consider endianness and signedness. *)
268 let extract_int32_be_unsigned data off len flen =
269 let byteoff = off lsr 3 in
271 let strlen = String.length data in
274 (* Optimize the common (byte-aligned) case. *)
275 if off land 7 = 0 then (
277 let c0 = _get_byte32 data byteoff strlen in
278 let c1 = _get_byte32 data (byteoff+1) strlen in
279 let c2 = _get_byte32 data (byteoff+2) strlen in
280 let c3 = _get_byte32 data (byteoff+3) strlen in
281 _make_int32_be c0 c1 c2 c3 in
282 Int32.shift_right_logical word (32 - flen)
284 (* Extract the next 32 bits, slow method. *)
286 let c0, off, len = extract_char_unsigned data off len 8 in
287 let c1, off, len = extract_char_unsigned data off len 8 in
288 let c2, off, len = extract_char_unsigned data off len 8 in
289 let c3, _, _ = extract_char_unsigned data off len 8 in
290 let c0 = Int32.of_int c0 in
291 let c1 = Int32.of_int c1 in
292 let c2 = Int32.of_int c2 in
293 let c3 = Int32.of_int c3 in
294 _make_int32_be c0 c1 c2 c3 in
295 Int32.shift_right_logical word (32 - flen)
297 word, off+flen, len-flen
299 let extract_int32_le_unsigned data off len flen =
300 let v, off, len = extract_int32_be_unsigned data off len flen in
301 let v = I32.byteswap v flen in
304 let _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 =
312 (Int64.shift_left c0 56)
313 (Int64.shift_left c1 48))
314 (Int64.shift_left c2 40))
315 (Int64.shift_left c3 32))
316 (Int64.shift_left c4 24))
317 (Int64.shift_left c5 16))
318 (Int64.shift_left c6 8))
321 (* Extract [1..64] bits. We have to consider endianness and signedness. *)
322 let extract_int64_be_unsigned data off len flen =
323 let byteoff = off lsr 3 in
325 let strlen = String.length data in
328 (* Optimize the common (byte-aligned) case. *)
329 if off land 7 = 0 then (
331 let c0 = _get_byte64 data byteoff strlen in
332 let c1 = _get_byte64 data (byteoff+1) strlen in
333 let c2 = _get_byte64 data (byteoff+2) strlen in
334 let c3 = _get_byte64 data (byteoff+3) strlen in
335 let c4 = _get_byte64 data (byteoff+4) strlen in
336 let c5 = _get_byte64 data (byteoff+5) strlen in
337 let c6 = _get_byte64 data (byteoff+6) strlen in
338 let c7 = _get_byte64 data (byteoff+7) strlen in
339 _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 in
340 Int64.shift_right_logical word (64 - flen)
342 (* Extract the next 64 bits, slow method. *)
344 let c0, off, len = extract_char_unsigned data off len 8 in
345 let c1, off, len = extract_char_unsigned data off len 8 in
346 let c2, off, len = extract_char_unsigned data off len 8 in
347 let c3, off, len = extract_char_unsigned data off len 8 in
348 let c4, off, len = extract_char_unsigned data off len 8 in
349 let c5, off, len = extract_char_unsigned data off len 8 in
350 let c6, off, len = extract_char_unsigned data off len 8 in
351 let c7, _, _ = extract_char_unsigned data off len 8 in
352 let c0 = Int64.of_int c0 in
353 let c1 = Int64.of_int c1 in
354 let c2 = Int64.of_int c2 in
355 let c3 = Int64.of_int c3 in
356 let c4 = Int64.of_int c4 in
357 let c5 = Int64.of_int c5 in
358 let c6 = Int64.of_int c6 in
359 let c7 = Int64.of_int c7 in
360 _make_int64_be c0 c1 c2 c3 c4 c5 c6 c7 in
361 Int64.shift_right_logical word (64 - flen)
363 word, off+flen, len-flen
365 (*----------------------------------------------------------------------*)
366 (* Constructor functions. *)
368 module Buffer = struct
371 mutable len : int; (* Length in bits. *)
372 (* Last byte in the buffer (if len is not aligned). We store
373 * it outside the buffer because buffers aren't mutable.
379 (* XXX We have almost enough information in the generator to
380 * choose a good initial size.
382 { buf = Buffer.create 128; len = 0; last = 0 }
384 let contents { buf = buf; len = len; last = last } =
386 if len land 7 = 0 then
389 Buffer.contents buf ^ (String.make 1 (Char.chr last)) in
392 (* Add exactly 8 bits. *)
393 let add_byte ({ buf = buf; len = len; last = last } as t) byte =
394 if byte < 0 || byte > 255 then invalid_arg "Bitmatch.Buffer.add_byte";
395 let shift = len land 7 in
397 (* Target buffer is byte-aligned. *)
398 Buffer.add_char buf (Char.chr byte)
400 (* Target buffer is unaligned. 'last' is meaningful. *)
401 let first = byte lsr shift in
402 let second = (byte lsl (8 - shift)) land 0xff in
403 Buffer.add_char buf (Char.chr (last lor first));
408 (* Add exactly 1 bit. *)
409 let add_bit ({ buf = buf; len = len; last = last } as t) bit =
410 let shift = 7 - (len land 7) in
412 (* Somewhere in the middle of 'last'. *)
413 t.last <- last lor ((if bit then 1 else 0) lsl shift)
415 (* Just a single spare bit in 'last'. *)
416 let last = last lor if bit then 1 else 0 in
417 Buffer.add_char buf (Char.chr last);
422 (* Add a small number of bits (definitely < 8). This uses a loop
423 * to call add_bit so it's slow.
425 let _add_bits t c slen =
426 if slen < 1 || slen >= 8 then invalid_arg "Bitmatch.Buffer._add_bits";
427 for i = slen-1 downto 0 do
428 let bit = c land (1 lsl i) <> 0 in
432 let add_bits ({ buf = buf; len = len } as t) str slen =
434 if len land 7 = 0 then (
435 if slen land 7 = 0 then
436 (* Common case - everything is byte-aligned. *)
437 Buffer.add_substring buf str 0 (slen lsr 3)
439 (* Target buffer is aligned. Copy whole bytes then leave the
440 * remaining bits in last.
442 let slenbytes = slen lsr 3 in
443 if slenbytes > 0 then Buffer.add_substring buf str 0 slenbytes;
444 t.last <- Char.code str.[slenbytes] lsl (8 - (slen land 7))
448 (* Target buffer is unaligned. Copy whole bytes using
449 * add_byte which knows how to deal with an unaligned
450 * target buffer, then call _add_bits for the remaining < 8 bits.
452 * XXX This is going to be dog-slow.
454 let slenbytes = slen lsr 3 in
455 for i = 0 to slenbytes-1 do
456 let byte = Char.code str.[i] in
459 _add_bits t (Char.code str.[slenbytes]) (slen - (slenbytes lsl 3))
464 (* Construct a single bit. *)
465 let construct_bit buf b _ =
468 (* Construct a field, flen = [2..8]. *)
469 let construct_char_unsigned buf v flen exn =
470 let max_val = 1 lsl flen in
471 if v < 0 || v >= max_val then raise exn;
473 Buffer.add_byte buf v
475 Buffer._add_bits buf v flen
477 (* Generate a mask with the lower 'bits' bits set. *)
479 if bits < 63 then Int64.pred (Int64.shift_left 1L bits)
480 else if bits = 63 then Int64.max_int
481 else if bits = 64 then -1L
482 else invalid_arg "Bitmatch.mask64"
484 (* Construct a field of up to 64 bits. *)
485 let construct_int64_be_unsigned buf v flen exn =
486 (* Check value is within range. *)
487 let m = Int64.lognot (mask64 flen) in
488 if Int64.logand v m <> 0L then raise exn;
491 let rec loop v flen =
493 loop (Int64.shift_right_logical v 8) (flen-8);
494 let lsb = Int64.to_int (Int64.logand v 0xffL) in
495 Buffer.add_byte buf lsb
496 ) else if flen > 0 then (
497 let lsb = Int64.to_int (Int64.logand v (mask64 flen)) in
498 Buffer._add_bits buf lsb flen
503 (*----------------------------------------------------------------------*)
504 (* Display functions. *)
507 let c = Char.code c in
510 let hexdump_bitstring chan (data, off, len) =
514 let linelen = ref 0 in
515 let linechars = String.make 16 ' ' in
517 fprintf chan "00000000 ";
520 let bits = min !len 8 in
521 let byte, off', len' = extract_char_unsigned data !off !len bits in
522 off := off'; len := len';
524 let byte = byte lsl (8-bits) in
525 fprintf chan "%02x " byte;
528 linechars.[!linelen] <-
529 (let c = Char.chr byte in
530 if isprint c then c else '.');
532 if !linelen = 8 then fprintf chan " ";
533 if !linelen = 16 then (
534 fprintf chan " |%s|\n%08x " linechars !count;
536 for i = 0 to 15 do linechars.[i] <- ' ' done
540 if !linelen > 0 then (
541 let skip = (16 - !linelen) * 3 + if !linelen < 8 then 1 else 0 in
542 for i = 0 to skip-1 do fprintf chan " " done;
543 fprintf chan " |%s|\n%!" linechars