1 (* Bitmatch syntax extension.
2 * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 * $Id: pa_bitmatch.ml,v 1.6 2008-04-02 08:05:58 rjones Exp $
27 (* If this is true then we emit some debugging code which can
28 * be useful to tell what is happening during matches. You
29 * also need to do 'Bitmatch.debug := true' in your main program.
31 * If this is false then no extra debugging code is emitted.
35 type m = Fields of f list (* field ; field -> ... *)
36 | Bind of string option (* _ -> ... *)
38 (* XXX fval should be a patt, not an expr *)
39 fval : expr; (* field binding or value *)
40 flen : expr; (* length in bits, may be non-const *)
41 endian : endian; (* endianness *)
42 signed : bool; (* true if signed, false if unsigned *)
44 _loc : Loc.t; (* location in source code *)
46 and endian = BigEndian | LittleEndian | NativeEndian
47 and t = Int | Bitstring
49 (* Generate a fresh, unique symbol each time called. *)
54 sprintf "__pabitmatch_%s_%d" name i
56 (* Deal with the qualifiers which appear for a field. *)
57 let parse_field _loc fval flen qs =
58 let endian, signed, t =
60 | None -> (None, None, None)
63 fun (endian, signed, t) q ->
66 if endian <> None then
67 Loc.raise _loc (Failure "an endian flag has been set already")
69 let endian = Some BigEndian in
73 if endian <> None then
74 Loc.raise _loc (Failure "an endian flag has been set already")
76 let endian = Some LittleEndian in
80 if endian <> None then
81 Loc.raise _loc (Failure "an endian flag has been set already")
83 let endian = Some NativeEndian in
87 if signed <> None then
88 Loc.raise _loc (Failure "a signed flag has been set already")
90 let signed = Some true in
94 if signed <> None then
95 Loc.raise _loc (Failure "a signed flag has been set already")
97 let signed = Some false in
102 Loc.raise _loc (Failure "a type flag has been set already")
109 Loc.raise _loc (Failure "a type flag has been set already")
111 let t = Some Bitstring in
115 Loc.raise _loc (Failure (s ^ ": unknown qualifier"))
116 ) (None, None, None) qs in
118 (* If type is set to bitstring then endianness and signedness
119 * qualifiers are meaningless and must not be set.
121 if t = Some Bitstring && (endian <> None || signed <> None) then
123 Failure "bitstring type and endian or signed qualifiers cannot be mixed"
126 (* Default endianness, signedness, type. *)
127 let endian = match endian with None -> BigEndian | Some e -> e in
128 let signed = match signed with None -> false | Some s -> s in
129 let t = match t with None -> Int | Some t -> t in
140 let string_of_endian = function
141 | BigEndian -> "bigendian"
142 | LittleEndian -> "littleendian"
143 | NativeEndian -> "nativeendian"
145 let string_of_t = function
147 | Bitstring -> "bitstring"
149 let string_of_field { fval = fval; flen = flen;
150 endian = endian; signed = signed; t = t;
154 | <:expr< $lid:id$ >> -> id
155 | _ -> "[expression]" in
158 | <:expr< $int:i$ >> -> i
159 | _ -> "[non-const-len]" in
160 let endian = string_of_endian endian in
161 let signed = if signed then "signed" else "unsigned" in
162 let t = string_of_t t in
163 let loc_fname = Loc.file_name _loc in
164 let loc_line = Loc.start_line _loc in
165 let loc_char = Loc.start_off _loc - Loc.start_bol _loc in
167 sprintf "%s : %s : %s, %s, %s @ (%S, %d, %d)"
168 fval flen t endian signed loc_fname loc_line loc_char
170 (* Generate the code for a constructor, ie. 'BITSTRING ...'. *)
171 let output_constructor _loc fields =
172 let loc_fname = Loc.file_name _loc in
173 let loc_line = string_of_int (Loc.start_line _loc) in
174 let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in
176 (* Bitstrings are created like the 'Buffer' module (in fact, using
177 * the Buffer module), by appending snippets to a growing buffer.
178 * This is reasonable efficient and avoids a lot of garbage.
180 let buffer = gensym "buffer" in
182 (* General exception which is raised inside the constructor functions
183 * when an int expression is out of range at runtime.
185 let exn = gensym "exn" in
186 let exn_used = ref false in
188 (* Convert each field to a simple bitstring-generating expression. *)
189 let fields = List.map (
190 fun {fval=fval; flen=flen; endian=endian; signed=signed; t=t} ->
191 (* Is flen an integer constant? If so, what is it? This
192 * is very simple-minded and only detects simple constants.
196 | <:expr< $int:i$ >> -> Some (int_of_string i)
199 let name_of_int_construct_const = function
200 (* XXX As an enhancement we should allow a 64-bit-only
201 * mode which lets us use 'int' up to 63 bits and won't
202 * compile on 32-bit platforms.
204 (* XXX The meaning of signed/unsigned breaks down at
205 * 31, 32, 63 and 64 bits.
207 | (1, _, _) -> "construct_bit"
208 | ((2|3|4|5|6|7|8), _, false) -> "construct_char_unsigned"
209 | ((2|3|4|5|6|7|8), _, true) -> "construct_char_signed"
210 | (i, BigEndian, false) when i <= 31 -> "construct_int_be_unsigned"
211 | (i, BigEndian, true) when i <= 31 -> "construct_int_be_signed"
212 | (i, LittleEndian, false) when i <= 31 -> "construct_int_le_unsigned"
213 | (i, LittleEndian, true) when i <= 31 -> "construct_int_le_signed"
214 | (i, NativeEndian, false) when i <= 31 -> "construct_int_ne_unsigned"
215 | (i, NativeEndian, true) when i <= 31 -> "construct_int_ne_signed"
216 | (32, BigEndian, false) -> "construct_int32_be_unsigned"
217 | (32, BigEndian, true) -> "construct_int32_be_signed"
218 | (32, LittleEndian, false) -> "construct_int32_le_unsigned"
219 | (32, LittleEndian, true) -> "construct_int32_le_signed"
220 | (32, NativeEndian, false) -> "construct_int32_ne_unsigned"
221 | (32, NativeEndian, true) -> "construct_int32_ne_signed"
222 | (_, BigEndian, false) -> "construct_int64_be_unsigned"
223 | (_, BigEndian, true) -> "construct_int64_be_signed"
224 | (_, LittleEndian, false) -> "construct_int64_le_unsigned"
225 | (_, LittleEndian, true) -> "construct_int64_le_signed"
226 | (_, NativeEndian, false) -> "construct_int64_ne_unsigned"
227 | (_, NativeEndian, true) -> "construct_int64_ne_signed"
229 let name_of_int_construct = function
230 (* XXX As an enhancement we should allow users to
231 * specify that a field length can fit into a char/int/int32
232 * (of course, this would have to be checked at runtime).
234 | (BigEndian, false) -> "construct_int64_be_unsigned"
235 | (BigEndian, true) -> "construct_int64_be_signed"
236 | (LittleEndian, false) -> "construct_int64_le_unsigned"
237 | (LittleEndian, true) -> "construct_int64_le_signed"
238 | (NativeEndian, false) -> "construct_int64_ne_unsigned"
239 | (NativeEndian, true) -> "construct_int64_ne_signed"
243 match t, flen_is_const with
244 (* Common case: int field, constant flen.
246 * Range checks are done inside the construction function
247 * because that's a lot simpler w.r.t. types. It might
248 * be better to move them here. XXX
250 | Int, Some i when i > 0 && i <= 64 ->
252 name_of_int_construct_const (i,endian,signed) in
256 Bitmatch.$lid:construct_func$ $lid:buffer$ $fval$ $flen$
261 Loc.raise _loc (Failure "length of int field must be [1..64]")
263 (* Int field, non-constant length. We need to perform a runtime
264 * test to ensure the length is [1..64].
266 * Range checks are done inside the construction function
267 * because that's a lot simpler w.r.t. types. It might
268 * be better to move them here. XXX
271 let construct_func = name_of_int_construct (endian,signed) in
275 if $flen$ >= 1 && $flen$ <= 64 then
276 Bitmatch.$lid:construct_func$ $lid:buffer$ $fval$ $flen$
279 raise (Bitmatch.Construct_failure
280 ("length of int field must be [1..64]",
282 $int:loc_line$, $int:loc_char$))
285 (* Bitstring, constant length > 0. *)
286 | Bitstring, Some i when i > 0 ->
287 let bs = gensym "bs" in
289 let $lid:bs$ = $fval$ in
290 if Bitmatch.bitstring_length $lid:bs$ = $flen$ then
291 Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$
293 raise (Bitmatch.Construct_failure
294 ("length of bitstring does not match declaration",
296 $int:loc_line$, $int:loc_char$))
299 (* Bitstring, constant length -1, means variable length bitstring
302 | Bitstring, Some (-1) ->
303 <:expr< Bitmatch.construct_bitstring $lid:buffer$ $fval$ >>
305 (* Bitstring, constant length = 0 is probably an error, and so it
308 | Bitstring, Some _ ->
311 "length of bitstring must be > 0 or the special value -1")
313 (* Bitstring, non-constant length.
314 * We check at runtime that the length is > 0 and matches
315 * the declared length.
318 let bslen = gensym "bslen" in
319 let bs = gensym "bs" in
321 let $lid:bslen$ = $flen$ in
322 if $lid:bslen$ > 0 then (
323 let $lid:bs$ = $fval$ in
324 if Bitmatch.bitstring_length $lid:bs$ = $lid:bslen$ then
325 Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$
327 raise (Bitmatch.Construct_failure
328 ("length of bitstring does not match declaration",
330 $int:loc_line$, $int:loc_char$))
332 raise (Bitmatch.Construct_failure
333 ("length of bitstring must be > 0",
335 $int:loc_line$, $int:loc_char$))
340 (* Create the final bitstring. Start by creating an empty buffer
341 * and then evaluate each expression above in turn which will
342 * append some more to the bitstring buffer. Finally extract
345 * XXX We almost have enough information to be able to guess
346 * a good initial size for the buffer.
350 | [] -> <:expr< [] >>
351 | h::t -> List.fold_left (fun h t -> <:expr< $h$; $t$ >>) h t in
355 let $lid:buffer$ = Bitmatch.Buffer.create () in
357 Bitmatch.Buffer.contents $lid:buffer$
363 Bitmatch.Construct_failure ("value out of range",
365 $int:loc_line$, $int:loc_char$) in
371 (* Generate the code for a bitmatch statement. '_loc' is the
372 * location, 'bs' is the bitstring parameter, 'cases' are
373 * the list of cases to test against.
375 let output_bitmatch _loc bs cases =
376 let data = gensym "data" and off = gensym "off" and len = gensym "len" in
377 let result = gensym "result" in
379 (* This generates the field extraction code for each
380 * field a single case. Each field must be wider than
381 * the minimum permitted for the type and there must be
382 * enough remaining data in the bitstring to satisfy it.
383 * As we go through the fields, symbols 'data', 'off' and 'len'
384 * track our position and remaining length in the bitstring.
386 * The whole thing is a lot of nested 'if' statements. Code
387 * is generated from the inner-most (last) field outwards.
389 let rec output_field_extraction inner = function
392 let {fval=fval; flen=flen; endian=endian; signed=signed; t=t}
395 (* Is fval a binding (an ident) or an expression? If it's
396 * a binding then we will generate a binding for this field.
397 * If it's an expression then we will test the field against
402 | <:expr< $lid:id$ >> -> Some id
405 (* Is flen an integer constant? If so, what is it? This
406 * is very simple-minded and only detects simple constants.
410 | <:expr< $int:i$ >> -> Some (int_of_string i)
413 let name_of_int_extract_const = function
414 (* XXX As an enhancement we should allow a 64-bit-only
415 * mode which lets us use 'int' up to 63 bits and won't
416 * compile on 32-bit platforms.
418 (* XXX The meaning of signed/unsigned breaks down at
419 * 31, 32, 63 and 64 bits.
421 | (1, _, _) -> "extract_bit"
422 | ((2|3|4|5|6|7|8), _, false) -> "extract_char_unsigned"
423 | ((2|3|4|5|6|7|8), _, true) -> "extract_char_signed"
424 | (i, BigEndian, false) when i <= 31 -> "extract_int_be_unsigned"
425 | (i, BigEndian, true) when i <= 31 -> "extract_int_be_signed"
426 | (i, LittleEndian, false) when i <= 31 -> "extract_int_le_unsigned"
427 | (i, LittleEndian, true) when i <= 31 -> "extract_int_le_signed"
428 | (i, NativeEndian, false) when i <= 31 -> "extract_int_ne_unsigned"
429 | (i, NativeEndian, true) when i <= 31 -> "extract_int_ne_signed"
430 | (32, BigEndian, false) -> "extract_int32_be_unsigned"
431 | (32, BigEndian, true) -> "extract_int32_be_signed"
432 | (32, LittleEndian, false) -> "extract_int32_le_unsigned"
433 | (32, LittleEndian, true) -> "extract_int32_le_signed"
434 | (32, NativeEndian, false) -> "extract_int32_ne_unsigned"
435 | (32, NativeEndian, true) -> "extract_int32_ne_signed"
436 | (_, BigEndian, false) -> "extract_int64_be_unsigned"
437 | (_, BigEndian, true) -> "extract_int64_be_signed"
438 | (_, LittleEndian, false) -> "extract_int64_le_unsigned"
439 | (_, LittleEndian, true) -> "extract_int64_le_signed"
440 | (_, NativeEndian, false) -> "extract_int64_ne_unsigned"
441 | (_, NativeEndian, true) -> "extract_int64_ne_signed"
443 let name_of_int_extract = function
444 (* XXX As an enhancement we should allow users to
445 * specify that a field length can fit into a char/int/int32
446 * (of course, this would have to be checked at runtime).
448 | (BigEndian, false) -> "extract_int64_be_unsigned"
449 | (BigEndian, true) -> "extract_int64_be_signed"
450 | (LittleEndian, false) -> "extract_int64_le_unsigned"
451 | (LittleEndian, true) -> "extract_int64_le_signed"
452 | (NativeEndian, false) -> "extract_int64_ne_unsigned"
453 | (NativeEndian, true) -> "extract_int64_ne_signed"
457 match t, fval_is_ident, flen_is_const with
458 (* Common case: int field, binding, constant flen *)
459 | Int, Some ident, Some i when i > 0 && i <= 64 ->
460 let extract_func = name_of_int_extract_const (i,endian,signed) in
462 if $lid:len$ >= $flen$ then (
463 let $lid:ident$, $lid:off$, $lid:len$ =
464 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
470 (* Int field, not a binding, constant flen *)
471 | Int, None, Some i when i > 0 && i <= 64 ->
472 let extract_func = name_of_int_extract_const (i,endian,signed) in
473 let v = gensym "val" in
475 if $lid:len$ >= $flen$ then (
476 let $lid:v$, $lid:off$, $lid:len$ =
477 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
479 if $lid:v$ = $fval$ then (
486 Loc.raise _loc (Failure "length of int field must be [1..64]")
488 (* Int field, non-const flen. We have to test the range of
489 * the field at runtime. If outside the range it's a no-match
492 | Int, Some ident, None ->
493 let extract_func = name_of_int_extract (endian,signed) in
495 if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then (
496 let $lid:ident$, $lid:off$, $lid:len$ =
497 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
504 let extract_func = name_of_int_extract (endian,signed) in
505 let v = gensym "val" in
507 if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then (
508 let $lid:v$, $lid:off$, $lid:len$ =
509 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
511 if $lid:v$ = $fval$ then (
517 (* Can't compare bitstrings at the moment. *)
518 | Bitstring, None, _ ->
520 (Failure "cannot compare a bitstring to a constant")
522 (* Bitstring, constant flen >= 0. *)
523 | Bitstring, Some ident, Some i when i >= 0 ->
525 if $lid:len$ >= $flen$ then (
526 let $lid:ident$, $lid:off$, $lid:len$ =
527 Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$
533 (* Bitstring, constant flen = -1, means consume all the
536 | Bitstring, Some ident, Some i when i = -1 ->
538 let $lid:ident$, $lid:off$, $lid:len$ =
539 Bitmatch.extract_remainder $lid:data$ $lid:off$ $lid:len$ in
543 | Bitstring, _, Some _ ->
544 Loc.raise _loc (Failure "length of bitstring must be >= 0 or the special value -1")
546 (* Bitstring field, non-const flen. We check the flen is >= 0
547 * (-1 is not allowed here) at runtime.
549 | Bitstring, Some ident, None ->
551 if $flen$ >= 0 && $flen$ <= $lid:len$ then (
552 let $lid:ident$, $lid:off$, $lid:len$ =
553 Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$
560 (* Emit extra debugging code. *)
562 if not debug then expr else (
563 let field = string_of_field field in
566 if !Bitmatch.debug then (
567 Printf.eprintf "PA_BITMATCH: TEST:\n";
568 Printf.eprintf " %s\n" $str:field$;
569 Printf.eprintf " off %d len %d\n%!" $lid:off$ $lid:len$;
570 (*Bitmatch.hexdump_bitstring stderr
571 ($lid:data$,$lid:off$,$lid:len$);*)
577 output_field_extraction expr fields
580 (* Convert each case in the match. *)
581 let cases = List.map (
583 (* field : len ; field : len when .. -> ..*)
584 | (Fields fields, Some whenclause, code) ->
587 if $whenclause$ then (
588 $lid:result$ := Some ($code$);
592 output_field_extraction inner (List.rev fields)
594 (* field : len ; field : len -> ... *)
595 | (Fields fields, None, code) ->
598 $lid:result$ := Some ($code$);
601 output_field_extraction inner (List.rev fields)
603 (* _ as name when ... -> ... *)
604 | (Bind (Some name), Some whenclause, code) ->
606 let $lid:name$ = ($lid:data$, $lid:off$, $lid:len$) in
607 if $whenclause$ then (
608 $lid:result$ := Some ($code$);
613 (* _ as name -> ... *)
614 | (Bind (Some name), None, code) ->
616 let $lid:name$ = ($lid:data$, $lid:off$, $lid:len$) in
617 $lid:result$ := Some ($code$);
621 (* _ when ... -> ... *)
622 | (Bind None, Some whenclause, code) ->
624 if $whenclause$ then (
625 $lid:result$ := Some ($code$);
631 | (Bind None, None, code) ->
633 $lid:result$ := Some ($code$);
639 (* Join them into a single expression.
641 * Don't do it with a normal fold_right because that leaves
642 * 'raise Exit; ()' at the end which causes a compiler warning.
643 * Hence a bit of complexity here.
645 * Note that the number of cases is always >= 1 so List.hd is safe.
647 let cases = List.rev cases in
649 List.fold_left (fun base case -> <:expr< $case$ ; $base$ >>)
650 (List.hd cases) (List.tl cases) in
652 (* The final code just wraps the list of cases in a
653 * try/with construct so that each case is tried in
654 * turn until one case matches (that case sets 'result'
655 * and raises 'Exit' to leave the whole statement).
656 * If result isn't set by the end then we will raise
657 * Match_failure with the location of the bitmatch
658 * statement in the original code.
660 let loc_fname = Loc.file_name _loc in
661 let loc_line = string_of_int (Loc.start_line _loc) in
662 let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in
665 let ($lid:data$, $lid:off$, $lid:len$) = $bs$ in
666 let $lid:result$ = ref None in
670 match ! $lid:result$ with
672 | None -> raise (Match_failure ($str:loc_fname$,
673 $int:loc_line$, $int:loc_char$))
680 [ LIST0 [ q = LIDENT -> q ] SEP "," ]
684 [ fval = expr LEVEL "top"; ":"; len = expr LEVEL "top";
685 qs = OPT [ ":"; qs = qualifiers -> qs ] ->
686 parse_field _loc fval len qs
692 bind = OPT [ "as"; name = LIDENT -> name ];
693 w = OPT [ "when"; e = expr -> e ]; "->";
697 | [ fields = LIST0 field SEP ";";
698 w = OPT [ "when"; e = expr -> e ]; "->";
700 (Fields fields, w, code)
704 (* 'bitmatch' expressions. *)
706 [ "bitmatch"; bs = expr; "with"; OPT "|";
707 cases = LIST1 match_case SEP "|" ->
708 output_bitmatch _loc bs cases
713 fields = LIST0 field SEP ";" ->
714 output_constructor _loc fields