1 (* Bitstring syntax extension.
2 * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version,
8 * with the OCaml linking exception described in COPYING.LIB.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 module P = Bitstring_persistent
31 (* If this is true then we emit some debugging code which can
32 * be useful to tell what is happening during matches. You
33 * also need to do 'Bitstring.debug := true' in your main program.
35 * If this is false then no extra debugging code is emitted.
39 (* Hashtable storing named persistent patterns. *)
40 let pattern_hash : (string, P.pattern) Hashtbl.t = Hashtbl.create 13
42 let locfail _loc msg = Loc.raise _loc (Failure msg)
44 (* Work out if an expression is an integer constant.
46 * Returns [Some i] if so (where i is the integer value), else [None].
48 * Fairly simplistic algorithm: we can only detect simple constant
49 * expressions such as [k], [k+c], [k-c] etc.
51 let rec expr_is_constant = function
52 | <:expr< $int:i$ >> -> (* Literal integer constant. *)
53 Some (int_of_string i)
54 | <:expr< $lid:op$ $a$ $b$ >> ->
55 (match expr_is_constant a, expr_is_constant b with
56 | Some a, Some b -> (* Integer binary operations. *)
57 let ops = ["+", (+); "-", (-); "*", ( * ); "/", (/);
58 (* NB: explicit fun .. -> is necessary here to work
59 * around a camlp4 bug in OCaml 3.10.0.
61 "land", (fun a b -> a land b);
62 "lor", (fun a b -> a lor b);
63 "lxor", (fun a b -> a lxor b);
64 "lsl", (fun a b -> a lsl b);
65 "lsr", (fun a b -> a lsr b);
66 "asr", (fun a b -> a asr b);
67 "mod", (fun a b -> a mod b)] in
68 (try Some ((List.assoc op ops) a b) with Not_found -> None)
72 (* Generate a fresh, unique symbol each time called. *)
77 sprintf "__pabitstring_%s_%d" name i
79 (* Used to keep track of which qualifiers we've seen in parse_field. *)
81 endian_set : bool; signed_set : bool; type_set : bool;
82 offset_set : bool; check_set : bool; bind_set : bool;
83 save_offset_to_set : bool;
86 endian_set = false; signed_set = false; type_set = false;
87 offset_set = false; check_set = false; bind_set = false;
88 save_offset_to_set = false
91 (* Deal with the qualifiers which appear for a field of both types. *)
92 let parse_field _loc field qs =
93 let fail = locfail _loc in
97 | None -> noneset, field
99 let check already_set msg = if already_set then fail msg in
100 let apply_qualifier (whatset, field) =
102 | "endian", Some expr ->
103 check whatset.endian_set "an endian flag has been set already";
104 let field = P.set_endian_expr field expr in
105 { whatset with endian_set = true }, field
107 fail "qualifier 'endian' should be followed by an expression"
108 | "offset", Some expr ->
109 check whatset.offset_set "an offset has been set already";
110 let field = P.set_offset field expr in
111 { whatset with offset_set = true }, field
113 fail "qualifier 'offset' should be followed by an expression"
114 | "check", Some expr ->
115 check whatset.check_set "a check-qualifier has been set already";
116 let field = P.set_check field expr in
117 { whatset with check_set = true }, field
119 fail "qualifier 'check' should be followed by an expression"
120 | "bind", Some expr ->
121 check whatset.bind_set "a bind expression has been set already";
122 let field = P.set_bind field expr in
123 { whatset with bind_set = true }, field
125 fail "qualifier 'bind' should be followed by an expression"
126 | "save_offset_to", Some expr (* XXX should be a pattern *) ->
127 check whatset.save_offset_to_set
128 "a save_offset_to-qualifier has been set already";
131 | <:expr< $lid:id$ >> -> id
133 failwith "pa_bitstring: internal error: save_offset_to only supports simple identifiers at the moment. In future we should support full patterns." in
134 let field = P.set_save_offset_to_lident field id in
135 { whatset with save_offset_to_set = true }, field
136 | "save_offset_to", None ->
137 fail "qualifier 'save_offset_to' should be followed by a binding expression"
139 fail (s ^ ": unknown qualifier, or qualifier should not be followed by an expression")
141 let endian_quals = ["bigendian", BigEndian;
142 "littleendian", LittleEndian;
143 "nativeendian", NativeEndian] in
144 let sign_quals = ["signed", true; "unsigned", false] in
145 let type_quals = ["int", P.set_type_int;
146 "string", P.set_type_string;
147 "bitstring", P.set_type_bitstring] in
148 if List.mem_assoc qual endian_quals then (
149 check whatset.endian_set "an endian flag has been set already";
150 let field = P.set_endian field (List.assoc qual endian_quals) in
151 { whatset with endian_set = true }, field
152 ) else if List.mem_assoc qual sign_quals then (
153 check whatset.signed_set "a signed flag has been set already";
154 let field = P.set_signed field (List.assoc qual sign_quals) in
155 { whatset with signed_set = true }, field
156 ) else if List.mem_assoc qual type_quals then (
157 check whatset.type_set "a type flag has been set already";
158 let field = (List.assoc qual type_quals) field in
159 { whatset with type_set = true }, field
161 fail (qual ^ ": unknown qualifier, or qualifier should be followed by an expression") in
162 List.fold_left apply_qualifier (noneset, field) qs in
164 (* If type is set to string or bitstring then endianness and
165 * signedness qualifiers are meaningless and must not be set.
168 let t = P.get_type field in
169 if (t = P.Bitstring || t = P.String) &&
170 (whatset.endian_set || whatset.signed_set) then
171 fail "string types and endian or signed qualifiers cannot be mixed" in
173 (* Default endianness, signedness, type if not set already. *)
175 if whatset.endian_set then field else P.set_endian field BigEndian in
177 if whatset.signed_set then field else P.set_signed field false in
179 if whatset.type_set then field else P.set_type_int field in
183 type functype = ExtractFunc | ConstructFunc
185 (* Choose the right constructor function. *)
186 let build_bitstring_call _loc functype length endian signed =
187 match functype, length, endian, signed with
188 (* XXX The meaning of signed/unsigned breaks down at
189 * 31, 32, 63 and 64 bits.
191 | (ExtractFunc, Some 1, _, _) -> <:expr< Bitstring.extract_bit >>
192 | (ConstructFunc, Some 1, _, _) -> <:expr< Bitstring.construct_bit >>
193 | (functype, Some (2|3|4|5|6|7|8), _, signed) ->
194 let funcname = match functype with
195 | ExtractFunc -> "extract"
196 | ConstructFunc -> "construct" in
197 let sign = if signed then "signed" else "unsigned" in
198 let call = sprintf "%s_char_%s" funcname sign in
199 <:expr< Bitstring.$lid:call$ >>
200 | (functype, len, endian, signed) ->
201 let funcname = match functype with
202 | ExtractFunc -> "extract"
203 | ConstructFunc -> "construct" in
204 let t = match len with
205 | Some i when i <= 31 -> "int"
208 let sign = if signed then "signed" else "unsigned" in
210 | P.ConstantEndian constant ->
211 let endianness = match constant with
213 | LittleEndian -> "le"
214 | NativeEndian -> "ne" in
215 let call = sprintf "%s_%s_%s_%s" funcname t endianness sign in
216 <:expr< Bitstring.$lid:call$ >>
217 | P.EndianExpr expr ->
218 let call = sprintf "%s_%s_%s_%s" funcname t "ee" sign in
219 <:expr< Bitstring.$lid:call$ $expr$ >>
221 (* Generate the code for a constructor, ie. 'BITSTRING ...'. *)
222 let output_constructor _loc fields =
223 (* This function makes code to raise a Bitstring.Construct_failure exception
224 * containing a message and the current _loc context.
225 * (Thanks to Bluestorm for suggesting this).
227 let construct_failure _loc msg =
229 Bitstring.Construct_failure
231 $`str:Loc.file_name _loc$,
232 $`int:Loc.start_line _loc$,
233 $`int:Loc.start_off _loc - Loc.start_bol _loc$)
236 let raise_construct_failure _loc msg =
237 <:expr< raise $construct_failure _loc msg$ >>
240 (* Bitstrings are created like the 'Buffer' module (in fact, using
241 * the Buffer module), by appending snippets to a growing buffer.
242 * This is reasonably efficient and avoids a lot of garbage.
244 let buffer = gensym "buffer" in
246 (* General exception which is raised inside the constructor functions
247 * when an int expression is out of range at runtime.
249 let exn = gensym "exn" in
250 let exn_used = ref false in
252 (* Convert each field to a simple bitstring-generating expression. *)
253 let fields = List.map (
255 let fexpr = P.get_expr field in
256 let flen = P.get_length field in
257 let endian = P.get_endian field in
258 let signed = P.get_signed field in
259 let t = P.get_type field in
260 let _loc = P.get_location field in
262 let fail = locfail _loc in
264 (* offset(), check(), bind(), save_offset_to() not supported in
267 * Implementation of forward-only offsets is fairly
268 * straightforward: we would need to just calculate the length of
269 * padding here and add it to what has been constructed. For
270 * general offsets, including going backwards, that would require
271 * a rethink in how we construct bitstrings.
273 if P.get_offset field <> None then
274 fail "offset expressions are not supported in BITSTRING constructors";
275 if P.get_check field <> None then
276 fail "check expressions are not supported in BITSTRING constructors";
277 if P.get_bind field <> None then
278 fail "bind expressions are not supported in BITSTRING constructors";
279 if P.get_save_offset_to field <> None then
280 fail "save_offset_to is not supported in BITSTRING constructors";
282 (* Is flen an integer constant? If so, what is it? This
283 * is very simple-minded and only detects simple constants.
285 let flen_is_const = expr_is_constant flen in
287 let int_construct_const (i, endian, signed) =
288 build_bitstring_call _loc ConstructFunc (Some i) endian signed in
289 let int_construct (endian, signed) =
290 build_bitstring_call _loc ConstructFunc None endian signed in
293 match t, flen_is_const with
294 (* Common case: int field, constant flen.
296 * Range checks are done inside the construction function
297 * because that's a lot simpler w.r.t. types. It might
298 * be better to move them here. XXX
300 | P.Int, Some i when i > 0 && i <= 64 ->
301 let construct_fn = int_construct_const (i,endian,signed) in
305 $construct_fn$ $lid:buffer$ $fexpr$ $`int:i$ $lid:exn$
309 fail "length of int field must be [1..64]"
311 (* Int field, non-constant length. We need to perform a runtime
312 * test to ensure the length is [1..64].
314 * Range checks are done inside the construction function
315 * because that's a lot simpler w.r.t. types. It might
316 * be better to move them here. XXX
319 let construct_fn = int_construct (endian,signed) in
323 if $flen$ >= 1 && $flen$ <= 64 then
324 $construct_fn$ $lid:buffer$ $fexpr$ $flen$ $lid:exn$
326 $raise_construct_failure _loc "length of int field must be [1..64]"$
329 (* String, constant length > 0, must be a multiple of 8. *)
330 | P.String, Some i when i > 0 && i land 7 = 0 ->
331 let bs = gensym "bs" in
334 let $lid:bs$ = $fexpr$ in
335 if String.length $lid:bs$ = $`int:j$ then
336 Bitstring.construct_string $lid:buffer$ $lid:bs$
338 $raise_construct_failure _loc "length of string does not match declaration"$
341 (* String, constant length -1, means variable length string
344 | P.String, Some (-1) ->
345 <:expr< Bitstring.construct_string $lid:buffer$ $fexpr$ >>
347 (* String, constant length = 0 is probably an error, and so is
350 | P.String, Some _ ->
351 fail "length of string must be > 0 and a multiple of 8, or the special value -1"
353 (* String, non-constant length.
354 * We check at runtime that the length is > 0, a multiple of 8,
355 * and matches the declared length.
358 let bslen = gensym "bslen" in
359 let bs = gensym "bs" in
361 let $lid:bslen$ = $flen$ in
362 if $lid:bslen$ > 0 then (
363 if $lid:bslen$ land 7 = 0 then (
364 let $lid:bs$ = $fexpr$ in
365 if String.length $lid:bs$ = ($lid:bslen$ lsr 3) then
366 Bitstring.construct_string $lid:buffer$ $lid:bs$
368 $raise_construct_failure _loc "length of string does not match declaration"$
370 $raise_construct_failure _loc "length of string must be a multiple of 8"$
372 $raise_construct_failure _loc "length of string must be > 0"$
375 (* Bitstring, constant length >= 0. *)
376 | P.Bitstring, Some i when i >= 0 ->
377 let bs = gensym "bs" in
379 let $lid:bs$ = $fexpr$ in
380 if Bitstring.bitstring_length $lid:bs$ = $`int:i$ then
381 Bitstring.construct_bitstring $lid:buffer$ $lid:bs$
383 $raise_construct_failure _loc "length of bitstring does not match declaration"$
386 (* Bitstring, constant length -1, means variable length bitstring
389 | P.Bitstring, Some (-1) ->
390 <:expr< Bitstring.construct_bitstring $lid:buffer$ $fexpr$ >>
392 (* Bitstring, constant length < -1 is an error. *)
393 | P.Bitstring, Some _ ->
394 fail "length of bitstring must be >= 0 or the special value -1"
396 (* Bitstring, non-constant length.
397 * We check at runtime that the length is >= 0 and matches
398 * the declared length.
400 | P.Bitstring, None ->
401 let bslen = gensym "bslen" in
402 let bs = gensym "bs" in
404 let $lid:bslen$ = $flen$ in
405 if $lid:bslen$ >= 0 then (
406 let $lid:bs$ = $fexpr$ in
407 if Bitstring.bitstring_length $lid:bs$ = $lid:bslen$ then
408 Bitstring.construct_bitstring $lid:buffer$ $lid:bs$
410 $raise_construct_failure _loc "length of bitstring does not match declaration"$
412 $raise_construct_failure _loc "length of bitstring must be > 0"$
417 (* Create the final bitstring. Start by creating an empty buffer
418 * and then evaluate each expression above in turn which will
419 * append some more to the bitstring buffer. Finally extract
422 * XXX We almost have enough information to be able to guess
423 * a good initial size for the buffer.
427 | [] -> <:expr< [] >>
428 | h::t -> List.fold_left (fun h t -> <:expr< $h$; $t$ >>) h t in
432 let $lid:buffer$ = Bitstring.Buffer.create () in
434 Bitstring.Buffer.contents $lid:buffer$
439 let $lid:exn$ = $construct_failure _loc "value out of range"$ in
445 (* Generate the code for a bitmatch statement. '_loc' is the
446 * location, 'bs' is the bitstring parameter, 'cases' are
447 * the list of cases to test against.
449 let output_bitmatch _loc bs cases =
450 (* These symbols are used through the generated code to record our
451 * current position within the bitstring:
453 * data - original bitstring data (string, never changes)
454 * off - current offset within data (int, increments as we move through
456 * len - current remaining length within data (int, decrements as
457 * we move through the bitstring)
461 * original_off - saved offset at the start of the match (never changes)
462 * original_len - saved length at the start of the match (never changes)
463 * off_aligned - true if the original offset is byte-aligned (allows
464 * us to make some common optimizations)
466 let data = gensym "data"
467 and off = gensym "off"
468 and len = gensym "len"
469 and original_off = gensym "original_off"
470 and original_len = gensym "original_len"
471 and off_aligned = gensym "off_aligned"
473 (* This is where the result will be stored (a reference). *)
474 and result = gensym "result" in
476 (* This generates the field extraction code for each
477 * field in a single case. There must be enough remaining data
478 * in the bitstring to satisfy the field.
480 * As we go through the fields, symbols 'data', 'off' and 'len'
481 * track our position and remaining length in the bitstring.
483 * The whole thing is a lot of nested 'if'/'match' statements.
484 * Code is generated from the inner-most (last) field outwards.
486 let rec output_field_extraction inner = function
489 let fpatt = P.get_patt field in
490 let flen = P.get_length field in
491 let endian = P.get_endian field in
492 let signed = P.get_signed field in
493 let t = P.get_type field in
494 let _loc = P.get_location field in
496 let fail = locfail _loc in
498 (* Is flen (field len) an integer constant? If so, what is it?
499 * This will be [Some i] if it's a constant or [None] if it's
500 * non-constant or we couldn't determine.
502 let flen_is_const = expr_is_constant flen in
504 (* Surround the inner expression by check and bind clauses, so:
508 * where the check and bind are switched on only if they are
509 * present in the field. (In the common case when neither
510 * clause is present, expr = inner). Note the order of the
511 * check & bind is visible to the user and defined in the
512 * documentation, so it must not change.
516 match P.get_bind field with
519 <:expr< let $fpatt$ = $bind_expr$ in $expr$ >> in
521 match P.get_check field with
524 <:expr< if $check_expr$ then $expr$ >> in
526 (* Compute the offset of this field within the match, if it
527 * can be known at compile time.
529 * Actually, we'll compute two things: the 'natural_field_offset'
530 * is the offset assuming this field had no offset() qualifier
531 * (in other words, its position, immediately following the
532 * preceding field). 'field_offset' is the real field offset
533 * taking into account any offset() qualifier.
535 * This will be [Some i] if our current offset is known
536 * at compile time, or [None] if we can't determine it.
538 let natural_field_offset, field_offset =
539 let has_constant_offset field =
540 match P.get_offset field with
543 match expr_is_constant expr with
547 let get_constant_offset field =
548 match P.get_offset field with
549 | None -> assert false
551 match expr_is_constant expr with
552 | None -> assert false
556 let has_constant_len field =
557 match expr_is_constant (P.get_length field) with
559 | Some i when i > 0 -> true
562 let get_constant_len field =
563 match expr_is_constant (P.get_length field) with
564 | None -> assert false
565 | Some i when i > 0 -> i
566 | Some _ -> assert false
569 (* NB: We are looping over the PRECEDING fields in reverse order. *)
570 let rec loop = function
571 (* first field has constant offset 0 *)
573 (* preceding field with constant offset & length *)
575 when has_constant_offset f && has_constant_len f ->
576 Some (get_constant_offset f + get_constant_len f)
577 (* preceding field with no offset & constant length *)
579 when P.get_offset f = None && has_constant_len f ->
582 | Some offset -> Some (offset + get_constant_len f))
583 (* else, can't work out the offset *)
587 let natural_field_offset = loop fields in
590 match P.get_offset field with
591 | None -> natural_field_offset
592 | Some expr -> (* has an offset() clause *)
593 match expr_is_constant expr with
597 natural_field_offset, field_offset in
599 (* Also compute if the field_offset is known to be byte-aligned at
600 * compile time, which is usually both the common and best possible
601 * case for generating optimized code.
603 * This is None if not aligned / don't know.
604 * Or Some byte_offset if we can work it out.
606 let field_offset_aligned =
607 match field_offset with
608 | None -> None (* unknown, assume no *)
609 | Some off when off land 7 = 0 -> Some (off lsr 3)
610 | Some _ -> None in (* definitely no *)
612 (* Now build the code which matches a single field. *)
613 let int_extract_const i endian signed =
614 build_bitstring_call _loc ExtractFunc (Some i) endian signed in
615 let int_extract endian signed =
616 build_bitstring_call _loc ExtractFunc None endian signed in
619 match t, flen_is_const, field_offset_aligned, endian, signed with
620 (* Very common cases: int field, constant 8/16/32/64 bit
621 * length, aligned to the match at a known offset. We
622 * still have to check if the bitstring is aligned (can only
623 * be known at runtime) but we may be able to directly access
624 * the bytes in the string.
626 | P.Int, Some 8, Some field_byte_offset, _, _ ->
627 let extract_fn = int_extract_const 8 endian signed in
629 (* The fast-path code when everything is aligned. *)
633 ($lid:original_off$ lsr 3) + $`int:field_byte_offset$ in
634 Char.code (String.unsafe_get $lid:data$ o)
638 if $lid:len$ >= 8 then (
640 if $lid:off_aligned$ then
643 $extract_fn$ $lid:data$ $lid:off$ $lid:len$ 8 in
644 let $lid:off$ = $lid:off$ + 8
645 and $lid:len$ = $lid:len$ - 8 in
646 match v with $fpatt$ when true -> $expr$ | _ -> ()
650 | P.Int, Some ((16|32|64) as i),
651 Some field_byte_offset, (P.ConstantEndian _ as endian), signed ->
652 let extract_fn = int_extract_const i endian signed in
654 (* The fast-path code when everything is aligned. *)
657 let endian = match endian with
658 | P.ConstantEndian BigEndian -> "be"
659 | P.ConstantEndian LittleEndian -> "le"
660 | P.ConstantEndian NativeEndian -> "ne"
661 | P.EndianExpr _ -> assert false in
662 let signed = if signed then "signed" else "unsigned" in
664 sprintf "extract_fastpath_int%d_%s_%s" i endian signed in
667 <:expr< Bitstring.$lid:name$ $lid:data$ o >>
670 (* must allocate a new zero each time *)
671 let zero = Int32.of_int 0 in
672 Bitstring.$lid:name$ $lid:data$ o zero
676 (* must allocate a new zero each time *)
677 let zero = Int64.of_int 0 in
678 Bitstring.$lid:name$ $lid:data$ o zero
680 | _ -> assert false in
682 (* Starting offset within the string. *)
684 ($lid:original_off$ lsr 3) + $`int:field_byte_offset$ in
690 $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $`int:i$
694 if $lid:len$ >= $`int:i$ then (
696 if $lid:off_aligned$ then $fastpath$ else $slowpath$ in
697 let $lid:off$ = $lid:off$ + $`int:i$
698 and $lid:len$ = $lid:len$ - $`int:i$ in
699 match v with $fpatt$ when true -> $expr$ | _ -> ()
703 (* Common case: int field, constant flen *)
704 | P.Int, Some i, _, _, _ when i > 0 && i <= 64 ->
705 let extract_fn = int_extract_const i endian signed in
706 let v = gensym "val" in
708 if $lid:len$ >= $`int:i$ then (
710 $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $`int:i$ in
711 let $lid:off$ = $lid:off$ + $`int:i$
712 and $lid:len$ = $lid:len$ - $`int:i$ in
713 match $lid:v$ with $fpatt$ when true -> $expr$ | _ -> ()
717 | P.Int, Some _, _, _, _ ->
718 fail "length of int field must be [1..64]"
720 (* Int field, non-const flen. We have to test the range of
721 * the field at runtime. If outside the range it's a no-match
724 | P.Int, None, _, _, _ ->
725 let extract_fn = int_extract endian signed in
726 let v = gensym "val" in
728 if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then (
730 $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $flen$ in
731 let $lid:off$ = $lid:off$ + $flen$
732 and $lid:len$ = $lid:len$ - $flen$ in
733 match $lid:v$ with $fpatt$ when true -> $expr$ | _ -> ()
737 (* String, constant flen > 0.
738 * The field is at a known byte-aligned offset so we may
739 * be able to optimize the substring extraction.
741 | P.String, Some i, Some field_byte_offset, _, _
742 when i > 0 && i land 7 = 0 ->
745 (* Starting offset within the string. *)
747 ($lid:original_off$ lsr 3) + $`int:field_byte_offset$ in
748 String.sub $lid:data$ o $`int:(i lsr 3)$
753 Bitstring.string_of_bitstring
754 ($lid:data$, $lid:off$, $`int:i$)
759 if $lid:off_aligned$ then $fastpath$ else $slowpath$
763 if $lid:len$ >= $`int:i$ then (
765 let $lid:off$ = $lid:off$ + $`int:i$
766 and $lid:len$ = $lid:len$ - $`int:i$ in
768 | $fpatt$ when true -> $expr$
773 (* String, constant flen > 0. *)
774 | P.String, Some i, None, _, _ when i > 0 && i land 7 = 0 ->
776 if $lid:len$ >= $`int:i$ then (
778 Bitstring.string_of_bitstring
779 ($lid:data$, $lid:off$, $`int:i$) in
780 let $lid:off$ = $lid:off$ + $`int:i$
781 and $lid:len$ = $lid:len$ - $`int:i$ in
783 | $fpatt$ when true -> $expr$
788 (* String, constant flen = -1, means consume all the
790 * XXX It should be possible to optimize this for known byte
791 * offset, but the optimization is tricky because the end/length
792 * of the string may not be byte-aligned.
794 | P.String, Some i, _, _, _ when i = -1 ->
795 let str = gensym "str" in
799 Bitstring.string_of_bitstring
800 ($lid:data$, $lid:off$, $lid:len$) in
801 let $lid:off$ = $lid:off$ + $lid:len$ in
804 | $fpatt$ when true -> $expr$
808 | P.String, Some _, _, _, _ ->
809 fail "length of string must be > 0 and a multiple of 8, or the special value -1"
811 (* String field, non-const flen. We check the flen is > 0
812 * and a multiple of 8 (-1 is not allowed here), at runtime.
814 | P.String, None, _, _, _ ->
815 let bs = gensym "bs" in
817 if $flen$ >= 0 && $flen$ <= $lid:len$
818 && $flen$ land 7 = 0 then (
819 let $lid:bs$ = ($lid:data$, $lid:off$, $flen$) in
820 let $lid:off$ = $lid:off$ + $flen$
821 and $lid:len$ = $lid:len$ - $flen$ in
822 match Bitstring.string_of_bitstring $lid:bs$ with
823 | $fpatt$ when true -> $expr$
828 (* Bitstring, constant flen >= 0.
829 * At the moment all we can do is assign the bitstring to an
832 | P.Bitstring, Some i, _, _, _ when i >= 0 ->
835 | <:patt< $lid:ident$ >> -> ident
836 | <:patt< _ >> -> "_"
838 fail "cannot compare a bitstring to a constant" in
840 if $lid:len$ >= $`int:i$ then (
841 let $lid:ident$ = ($lid:data$, $lid:off$, $`int:i$) in
842 let $lid:off$ = $lid:off$ + $`int:i$
843 and $lid:len$ = $lid:len$ - $`int:i$ in
848 (* Bitstring, constant flen = -1, means consume all the
851 | P.Bitstring, Some i, _, _, _ when i = -1 ->
854 | <:patt< $lid:ident$ >> -> ident
855 | <:patt< _ >> -> "_"
857 fail "cannot compare a bitstring to a constant" in
859 let $lid:ident$ = ($lid:data$, $lid:off$, $lid:len$) in
860 let $lid:off$ = $lid:off$ + $lid:len$ in
865 | P.Bitstring, Some _, _, _, _ ->
866 fail "length of bitstring must be >= 0 or the special value -1"
868 (* Bitstring field, non-const flen. We check the flen is >= 0
869 * (-1 is not allowed here) at runtime.
871 | P.Bitstring, None, _, _, _ ->
874 | <:patt< $lid:ident$ >> -> ident
875 | <:patt< _ >> -> "_"
877 fail "cannot compare a bitstring to a constant" in
879 if $flen$ >= 0 && $flen$ <= $lid:len$ then (
880 let $lid:ident$ = ($lid:data$, $lid:off$, $flen$) in
881 let $lid:off$ = $lid:off$ + $flen$
882 and $lid:len$ = $lid:len$ - $flen$ in
888 (* Computed offset: only offsets forward are supported.
890 * We try hard to optimize this based on what we know. Are
891 * we at a predictable offset now? (Look at the outer 'fields'
892 * list and see if they all have constant field length starting
893 * at some constant offset). Is this offset constant?
895 * Based on this we can do a lot of the computation at
896 * compile time, or defer it to runtime only if necessary.
898 * In all cases, the off and len fields get updated.
901 match P.get_offset field with
902 | None -> expr (* common case: there was no offset expression *)
903 | Some offset_expr ->
904 (* This will be [Some i] if offset is a constant expression
905 * or [None] if it's a non-constant.
907 let requested_offset = expr_is_constant offset_expr in
909 (* Look at the field offset (if known) and requested offset
910 * cases and determine what code to generate.
912 match natural_field_offset, requested_offset with
913 (* This is the good case: both the field offset and
914 * the requested offset are constant, so we can remove
915 * almost all the runtime checks.
917 | Some natural_field_offset, Some requested_offset ->
918 let move = requested_offset - natural_field_offset in
920 fail (sprintf "requested offset is less than the field offset (%d < %d)" requested_offset natural_field_offset);
921 (* Add some code to move the offset and length by a
922 * constant amount, and a runtime test that len >= 0
923 * (XXX possibly the runtime test is unnecessary?)
926 let $lid:off$ = $lid:off$ + $`int:move$ in
927 let $lid:len$ = $lid:len$ - $`int:move$ in
928 if $lid:len$ >= 0 then $expr$
930 (* In any other case, we need to use runtime checks.
932 * XXX It's not clear if a backwards move detected at runtime
933 * is merely a match failure, or a runtime error. At the
934 * moment it's just a match failure since bitmatch generally
935 * doesn't raise runtime errors.
938 let move = gensym "move" in
941 $offset_expr$ - ($lid:off$ - $lid:original_off$) in
942 if $lid:move$ >= 0 then (
943 let $lid:off$ = $lid:off$ + $lid:move$ in
944 let $lid:len$ = $lid:len$ - $lid:move$ in
945 if $lid:len$ >= 0 then $expr$
947 >> in (* end of computed offset code *)
949 (* save_offset_to(patt) saves the current offset into a variable. *)
951 match P.get_save_offset_to field with
952 | None -> expr (* no save_offset_to *)
955 let $patt$ = $lid:off$ - $lid:original_off$ in
959 (* Emit extra debugging code. *)
961 if not debug then expr else (
962 let field = P.string_of_pattern_field field in
965 if !Bitstring.debug then (
966 Printf.eprintf "PA_BITSTRING: TEST:\n";
967 Printf.eprintf " %s\n" $str:field$;
968 Printf.eprintf " off %d len %d\n%!" $lid:off$ $lid:len$;
969 (*Bitstring.hexdump_bitstring stderr
970 ($lid:data$,$lid:off$,$lid:len$);*)
976 output_field_extraction expr fields
979 (* Convert each case in the match. *)
980 let cases = List.map (
981 fun (fields, bind, whenclause, code) ->
982 let inner = <:expr< $lid:result$ := Some ($code$); raise Exit >> in
984 match whenclause with
986 <:expr< if $whenclause$ then $inner$ >>
992 let $lid:name$ = ($lid:data$, $lid:off$, $lid:len$) in
996 output_field_extraction inner (List.rev fields)
999 (* Join them into a single expression.
1001 * Don't do it with a normal fold_right because that leaves
1002 * 'raise Exit; ()' at the end which causes a compiler warning.
1003 * Hence a bit of complexity here.
1005 * Note that the number of cases is always >= 1 so List.hd is safe.
1007 let cases = List.rev cases in
1009 List.fold_left (fun base case -> <:expr< $case$ ; $base$ >>)
1010 (List.hd cases) (List.tl cases) in
1012 (* The final code just wraps the list of cases in a
1013 * try/with construct so that each case is tried in
1014 * turn until one case matches (that case sets 'result'
1015 * and raises 'Exit' to leave the whole statement).
1016 * If result isn't set by the end then we will raise
1017 * Match_failure with the location of the bitmatch
1018 * statement in the original code.
1020 let loc_fname = Loc.file_name _loc in
1021 let loc_line = string_of_int (Loc.start_line _loc) in
1022 let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in
1025 (* Note we save the original offset/length at the start of the match
1026 * in 'original_off'/'original_len' symbols. 'data' never changes.
1027 * This code also ensures that if original_off/original_len/off_aligned
1028 * aren't actually used, we don't get a warning.
1030 let ($lid:data$, $lid:original_off$, $lid:original_len$) = $bs$ in
1031 let $lid:off$ = $lid:original_off$ and $lid:len$ = $lid:original_len$ in
1032 let $lid:off_aligned$ = $lid:off$ land 7 = 0 in
1033 ignore $lid:off_aligned$;
1034 let $lid:result$ = ref None in
1038 match ! $lid:result$ with
1040 | None -> raise (Match_failure ($str:loc_fname$,
1041 $int:loc_line$, $int:loc_char$))
1044 (* Add a named pattern. *)
1045 let add_named_pattern _loc name pattern =
1046 Hashtbl.add pattern_hash name pattern
1048 (* Expand a named pattern from the pattern_hash. *)
1049 let expand_named_pattern _loc name =
1050 try Hashtbl.find pattern_hash name
1052 locfail _loc (sprintf "named pattern not found: %s" name)
1054 (* Add named patterns from a file. See the documentation on the
1055 * directory search path in bitstring_persistent.mli
1057 let load_patterns_from_file _loc filename =
1059 if Filename.is_relative filename && Filename.is_implicit filename then (
1060 (* Try current directory. *)
1061 try open_in filename
1063 (* Try OCaml library directory. *)
1064 try open_in (Filename.concat Bitstring_config.ocamllibdir filename)
1065 with exn -> Loc.raise _loc exn
1067 try open_in filename
1068 with exn -> Loc.raise _loc exn
1070 let names = ref [] in
1073 let name = P.named_from_channel chan in
1074 names := name :: !names
1077 with End_of_file -> ()
1080 let names = List.rev !names in
1083 | name, P.Pattern patt ->
1085 locfail _loc (sprintf "pattern %s: no fields" name);
1086 add_named_pattern _loc name patt
1087 | _, P.Constructor _ -> () (* just ignore these for now *)
1091 GLOBAL: expr str_item;
1093 (* Qualifiers are a list of identifiers ("string", "bigendian", etc.)
1094 * followed by an optional expression (used in certain cases). Note
1095 * that we are careful not to declare any explicit reserved words.
1100 e = OPT [ "("; e = expr; ")" -> e ] -> (q, e) ]
1104 (* Field used in the bitmatch operator (a pattern). This can actually
1105 * return multiple fields, in the case where the 'field' is a named
1106 * persitent pattern.
1109 [ fpatt = patt; ":"; len = expr LEVEL "top";
1110 qs = OPT [ ":"; qs = qualifiers -> qs ] ->
1111 let field = P.create_pattern_field _loc in
1112 let field = P.set_patt field fpatt in
1113 let field = P.set_length field len in
1114 [parse_field _loc field qs] (* Normal, single field. *)
1115 | ":"; name = LIDENT ->
1116 expand_named_pattern _loc name (* Named -> list of fields. *)
1120 (* Case inside bitmatch operator. *)
1123 fields = LIST0 patt_field SEP ";";
1134 [ fields = patt_fields;
1135 bind = OPT [ "as"; name = LIDENT -> name ];
1136 whenclause = OPT [ "when"; e = expr -> e ]; "->";
1138 (fields, bind, whenclause, code)
1142 (* Field used in the BITSTRING constructor (an expression). *)
1144 [ fexpr = expr LEVEL "top"; ":"; len = expr LEVEL "top";
1145 qs = OPT [ ":"; qs = qualifiers -> qs ] ->
1146 let field = P.create_constructor_field _loc in
1147 let field = P.set_expr field fexpr in
1148 let field = P.set_length field len in
1149 parse_field _loc field qs
1155 fields = LIST0 constr_field SEP ";";
1161 (* 'bitmatch' expressions. *)
1164 bs = expr; "with"; OPT "|";
1165 cases = LIST1 patt_case SEP "|" ->
1166 output_bitmatch _loc bs cases
1171 fields = constr_fields ->
1172 output_constructor _loc fields
1176 (* Named persistent patterns.
1178 * NB: Currently only allowed at the top level. We can probably lift
1179 * this restriction later if necessary. We only deal with patterns
1180 * at the moment, not constructors, but the infrastructure to do
1181 * constructors is in place.
1183 str_item: LEVEL "top" [
1184 [ "let"; "bitmatch";
1185 name = LIDENT; "="; fields = patt_fields ->
1186 add_named_pattern _loc name fields;
1187 (* The statement disappears, but we still need a str_item so ... *)
1189 | "open"; "bitmatch"; filename = STRING ->
1190 load_patterns_from_file _loc filename;