1 (* Bitmatch syntax extension.
2 * Copyright (C) 2008 Red Hat Inc., Richard W.M. Jones
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 * $Id: pa_bitmatch.ml,v 1.9 2008-04-25 12:08:51 rjones Exp $
27 (* If this is true then we emit some debugging code which can
28 * be useful to tell what is happening during matches. You
29 * also need to do 'Bitmatch.debug := true' in your main program.
31 * If this is false then no extra debugging code is emitted.
35 (* A field when used in a bitmatch (a pattern). *)
37 fpatt : patt; (* field matching pattern *)
40 (* A field when used in a BITSTRING constructor (an expression). *)
42 fexpr : expr; (* field value *)
47 flen : expr; (* length in bits, may be non-const *)
48 endian : endian; (* endianness *)
49 signed : bool; (* true if signed, false if unsigned *)
51 _loc : Loc.t; (* location in source code *)
53 and endian = BigEndian | LittleEndian | NativeEndian
54 and t = Int | Bitstring
56 (* Generate a fresh, unique symbol each time called. *)
61 sprintf "__pabitmatch_%s_%d" name i
63 (* Heuristic test if a pattern is exhaustive. *)
64 let pattern_is_exhaustive = function
65 | <:patt< $lid:_$ >> -> true
68 let rec parse_patt_field _loc fpatt flen qs =
69 let fpc = parse_field_common _loc flen qs in
70 { fpatt = fpatt; fpc = fpc }
72 and parse_constr_field _loc fexpr flen qs =
73 let fec = parse_field_common _loc flen qs in
74 { fexpr = fexpr; fec = fec }
76 (* Deal with the qualifiers which appear for a field of both types. *)
77 and parse_field_common _loc flen qs =
78 let endian, signed, t =
80 | None -> (None, None, None)
83 fun (endian, signed, t) q ->
86 if endian <> None then
87 Loc.raise _loc (Failure "an endian flag has been set already")
89 let endian = Some BigEndian in
93 if endian <> None then
94 Loc.raise _loc (Failure "an endian flag has been set already")
96 let endian = Some LittleEndian in
100 if endian <> None then
101 Loc.raise _loc (Failure "an endian flag has been set already")
103 let endian = Some NativeEndian in
107 if signed <> None then
108 Loc.raise _loc (Failure "a signed flag has been set already")
110 let signed = Some true in
114 if signed <> None then
115 Loc.raise _loc (Failure "a signed flag has been set already")
117 let signed = Some false in
122 Loc.raise _loc (Failure "a type flag has been set already")
129 Loc.raise _loc (Failure "a type flag has been set already")
131 let t = Some Bitstring in
135 Loc.raise _loc (Failure (s ^ ": unknown qualifier"))
136 ) (None, None, None) qs in
138 (* If type is set to bitstring then endianness and signedness
139 * qualifiers are meaningless and must not be set.
141 if t = Some Bitstring && (endian <> None || signed <> None) then
143 Failure "bitstring type and endian or signed qualifiers cannot be mixed"
146 (* Default endianness, signedness, type. *)
147 let endian = match endian with None -> BigEndian | Some e -> e in
148 let signed = match signed with None -> false | Some s -> s in
149 let t = match t with None -> Int | Some t -> t in
159 let string_of_endian = function
160 | BigEndian -> "bigendian"
161 | LittleEndian -> "littleendian"
162 | NativeEndian -> "nativeendian"
164 let string_of_t = function
166 | Bitstring -> "bitstring"
168 let rec string_of_patt_field { fpatt = fpatt; fpc = fpc } =
169 let fpc = string_of_field_common fpc in
172 | <:patt< $lid:id$ >> -> id
173 | _ -> "[pattern]" in
176 and string_of_constr_field { fexpr = fexpr; fec = fec } =
177 let fec = string_of_field_common fec in
180 | <:expr< $lid:id$ >> -> id
181 | _ -> "[expression]" in
184 and string_of_field_common { flen = flen;
185 endian = endian; signed = signed; t = t;
189 | <:expr< $int:i$ >> -> i
190 | _ -> "[non-const-len]" in
191 let endian = string_of_endian endian in
192 let signed = if signed then "signed" else "unsigned" in
193 let t = string_of_t t in
194 let loc_fname = Loc.file_name _loc in
195 let loc_line = Loc.start_line _loc in
196 let loc_char = Loc.start_off _loc - Loc.start_bol _loc in
198 sprintf "%s : %s, %s, %s @ (%S, %d, %d)"
199 flen t endian signed loc_fname loc_line loc_char
201 (* Generate the code for a constructor, ie. 'BITSTRING ...'. *)
202 let output_constructor _loc fields =
203 let loc_fname = Loc.file_name _loc in
204 let loc_line = string_of_int (Loc.start_line _loc) in
205 let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in
207 (* Bitstrings are created like the 'Buffer' module (in fact, using
208 * the Buffer module), by appending snippets to a growing buffer.
209 * This is reasonably efficient and avoids a lot of garbage.
211 let buffer = gensym "buffer" in
213 (* General exception which is raised inside the constructor functions
214 * when an int expression is out of range at runtime.
216 let exn = gensym "exn" in
217 let exn_used = ref false in
219 (* Convert each field to a simple bitstring-generating expression. *)
220 let fields = List.map (
221 fun {fexpr=fexpr; fec={flen=flen; endian=endian; signed=signed; t=t}} ->
222 (* Is flen an integer constant? If so, what is it? This
223 * is very simple-minded and only detects simple constants.
227 | <:expr< $int:i$ >> -> Some (int_of_string i)
230 let name_of_int_construct_const = function
231 (* XXX As an enhancement we should allow a 64-bit-only
232 * mode which lets us use 'int' up to 63 bits and won't
233 * compile on 32-bit platforms.
235 (* XXX The meaning of signed/unsigned breaks down at
236 * 31, 32, 63 and 64 bits.
238 | (1, _, _) -> "construct_bit"
239 | ((2|3|4|5|6|7|8), _, false) -> "construct_char_unsigned"
240 | ((2|3|4|5|6|7|8), _, true) -> "construct_char_signed"
241 | (i, BigEndian, false) when i <= 31 -> "construct_int_be_unsigned"
242 | (i, BigEndian, true) when i <= 31 -> "construct_int_be_signed"
243 | (i, LittleEndian, false) when i <= 31 -> "construct_int_le_unsigned"
244 | (i, LittleEndian, true) when i <= 31 -> "construct_int_le_signed"
245 | (i, NativeEndian, false) when i <= 31 -> "construct_int_ne_unsigned"
246 | (i, NativeEndian, true) when i <= 31 -> "construct_int_ne_signed"
247 | (32, BigEndian, false) -> "construct_int32_be_unsigned"
248 | (32, BigEndian, true) -> "construct_int32_be_signed"
249 | (32, LittleEndian, false) -> "construct_int32_le_unsigned"
250 | (32, LittleEndian, true) -> "construct_int32_le_signed"
251 | (32, NativeEndian, false) -> "construct_int32_ne_unsigned"
252 | (32, NativeEndian, true) -> "construct_int32_ne_signed"
253 | (_, BigEndian, false) -> "construct_int64_be_unsigned"
254 | (_, BigEndian, true) -> "construct_int64_be_signed"
255 | (_, LittleEndian, false) -> "construct_int64_le_unsigned"
256 | (_, LittleEndian, true) -> "construct_int64_le_signed"
257 | (_, NativeEndian, false) -> "construct_int64_ne_unsigned"
258 | (_, NativeEndian, true) -> "construct_int64_ne_signed"
260 let name_of_int_construct = function
261 (* XXX As an enhancement we should allow users to
262 * specify that a field length can fit into a char/int/int32
263 * (of course, this would have to be checked at runtime).
265 | (BigEndian, false) -> "construct_int64_be_unsigned"
266 | (BigEndian, true) -> "construct_int64_be_signed"
267 | (LittleEndian, false) -> "construct_int64_le_unsigned"
268 | (LittleEndian, true) -> "construct_int64_le_signed"
269 | (NativeEndian, false) -> "construct_int64_ne_unsigned"
270 | (NativeEndian, true) -> "construct_int64_ne_signed"
274 match t, flen_is_const with
275 (* Common case: int field, constant flen.
277 * Range checks are done inside the construction function
278 * because that's a lot simpler w.r.t. types. It might
279 * be better to move them here. XXX
281 | Int, Some i when i > 0 && i <= 64 ->
283 name_of_int_construct_const (i,endian,signed) in
287 Bitmatch.$lid:construct_func$ $lid:buffer$ $fexpr$ $flen$
292 Loc.raise _loc (Failure "length of int field must be [1..64]")
294 (* Int field, non-constant length. We need to perform a runtime
295 * test to ensure the length is [1..64].
297 * Range checks are done inside the construction function
298 * because that's a lot simpler w.r.t. types. It might
299 * be better to move them here. XXX
302 let construct_func = name_of_int_construct (endian,signed) in
306 if $flen$ >= 1 && $flen$ <= 64 then
307 Bitmatch.$lid:construct_func$ $lid:buffer$ $fexpr$ $flen$
310 raise (Bitmatch.Construct_failure
311 ("length of int field must be [1..64]",
313 $int:loc_line$, $int:loc_char$))
316 (* Bitstring, constant length > 0. *)
317 | Bitstring, Some i when i > 0 ->
318 let bs = gensym "bs" in
320 let $lid:bs$ = $fexpr$ in
321 if Bitmatch.bitstring_length $lid:bs$ = $flen$ then
322 Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$
324 raise (Bitmatch.Construct_failure
325 ("length of bitstring does not match declaration",
327 $int:loc_line$, $int:loc_char$))
330 (* Bitstring, constant length -1, means variable length bitstring
333 | Bitstring, Some (-1) ->
334 <:expr< Bitmatch.construct_bitstring $lid:buffer$ $fexpr$ >>
336 (* Bitstring, constant length = 0 is probably an error, and so it
339 | Bitstring, Some _ ->
342 "length of bitstring must be > 0 or the special value -1")
344 (* Bitstring, non-constant length.
345 * We check at runtime that the length is > 0 and matches
346 * the declared length.
349 let bslen = gensym "bslen" in
350 let bs = gensym "bs" in
352 let $lid:bslen$ = $flen$ in
353 if $lid:bslen$ > 0 then (
354 let $lid:bs$ = $fexpr$ in
355 if Bitmatch.bitstring_length $lid:bs$ = $lid:bslen$ then
356 Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$
358 raise (Bitmatch.Construct_failure
359 ("length of bitstring does not match declaration",
361 $int:loc_line$, $int:loc_char$))
363 raise (Bitmatch.Construct_failure
364 ("length of bitstring must be > 0",
366 $int:loc_line$, $int:loc_char$))
371 (* Create the final bitstring. Start by creating an empty buffer
372 * and then evaluate each expression above in turn which will
373 * append some more to the bitstring buffer. Finally extract
376 * XXX We almost have enough information to be able to guess
377 * a good initial size for the buffer.
381 | [] -> <:expr< [] >>
382 | h::t -> List.fold_left (fun h t -> <:expr< $h$; $t$ >>) h t in
386 let $lid:buffer$ = Bitmatch.Buffer.create () in
388 Bitmatch.Buffer.contents $lid:buffer$
394 Bitmatch.Construct_failure ("value out of range",
396 $int:loc_line$, $int:loc_char$) in
402 (* Generate the code for a bitmatch statement. '_loc' is the
403 * location, 'bs' is the bitstring parameter, 'cases' are
404 * the list of cases to test against.
406 let output_bitmatch _loc bs cases =
407 let data = gensym "data" and off = gensym "off" and len = gensym "len" in
408 let result = gensym "result" in
410 (* This generates the field extraction code for each
411 * field a single case. Each field must be wider than
412 * the minimum permitted for the type and there must be
413 * enough remaining data in the bitstring to satisfy it.
414 * As we go through the fields, symbols 'data', 'off' and 'len'
415 * track our position and remaining length in the bitstring.
417 * The whole thing is a lot of nested 'if' statements. Code
418 * is generated from the inner-most (last) field outwards.
420 let rec output_field_extraction inner = function
423 let {fpatt=fpatt; fpc={flen=flen; endian=endian; signed=signed; t=t}}
426 (* Is flen an integer constant? If so, what is it? This
427 * is very simple-minded and only detects simple constants.
431 | <:expr< $int:i$ >> -> Some (int_of_string i)
434 let name_of_int_extract_const = function
435 (* XXX As an enhancement we should allow a 64-bit-only
436 * mode which lets us use 'int' up to 63 bits and won't
437 * compile on 32-bit platforms.
439 (* XXX The meaning of signed/unsigned breaks down at
440 * 31, 32, 63 and 64 bits.
442 | (1, _, _) -> "extract_bit"
443 | ((2|3|4|5|6|7|8), _, false) -> "extract_char_unsigned"
444 | ((2|3|4|5|6|7|8), _, true) -> "extract_char_signed"
445 | (i, BigEndian, false) when i <= 31 -> "extract_int_be_unsigned"
446 | (i, BigEndian, true) when i <= 31 -> "extract_int_be_signed"
447 | (i, LittleEndian, false) when i <= 31 -> "extract_int_le_unsigned"
448 | (i, LittleEndian, true) when i <= 31 -> "extract_int_le_signed"
449 | (i, NativeEndian, false) when i <= 31 -> "extract_int_ne_unsigned"
450 | (i, NativeEndian, true) when i <= 31 -> "extract_int_ne_signed"
451 | (32, BigEndian, false) -> "extract_int32_be_unsigned"
452 | (32, BigEndian, true) -> "extract_int32_be_signed"
453 | (32, LittleEndian, false) -> "extract_int32_le_unsigned"
454 | (32, LittleEndian, true) -> "extract_int32_le_signed"
455 | (32, NativeEndian, false) -> "extract_int32_ne_unsigned"
456 | (32, NativeEndian, true) -> "extract_int32_ne_signed"
457 | (_, BigEndian, false) -> "extract_int64_be_unsigned"
458 | (_, BigEndian, true) -> "extract_int64_be_signed"
459 | (_, LittleEndian, false) -> "extract_int64_le_unsigned"
460 | (_, LittleEndian, true) -> "extract_int64_le_signed"
461 | (_, NativeEndian, false) -> "extract_int64_ne_unsigned"
462 | (_, NativeEndian, true) -> "extract_int64_ne_signed"
464 let name_of_int_extract = function
465 (* XXX As an enhancement we should allow users to
466 * specify that a field length can fit into a char/int/int32
467 * (of course, this would have to be checked at runtime).
469 | (BigEndian, false) -> "extract_int64_be_unsigned"
470 | (BigEndian, true) -> "extract_int64_be_signed"
471 | (LittleEndian, false) -> "extract_int64_le_unsigned"
472 | (LittleEndian, true) -> "extract_int64_le_signed"
473 | (NativeEndian, false) -> "extract_int64_ne_unsigned"
474 | (NativeEndian, true) -> "extract_int64_ne_signed"
478 match t, flen_is_const with
479 (* Common case: int field, constant flen *)
480 | Int, Some i when i > 0 && i <= 64 ->
481 let extract_func = name_of_int_extract_const (i,endian,signed) in
482 let v = gensym "val" in
483 if pattern_is_exhaustive fpatt then
485 if $lid:len$ >= $flen$ then (
486 let $lid:v$, $lid:off$, $lid:len$ =
487 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
489 match $lid:v$ with $fpatt$ -> $inner$
494 if $lid:len$ >= $flen$ then (
495 let $lid:v$, $lid:off$, $lid:len$ =
496 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
498 match $lid:v$ with $fpatt$ -> $inner$ | _ -> ()
503 Loc.raise _loc (Failure "length of int field must be [1..64]")
505 (* Int field, non-const flen. We have to test the range of
506 * the field at runtime. If outside the range it's a no-match
510 let extract_func = name_of_int_extract (endian,signed) in
511 let v = gensym "val" in
512 if pattern_is_exhaustive fpatt then
514 if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then (
515 let $lid:v$, $lid:off$, $lid:len$ =
516 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
518 match $lid:v$ with $fpatt$ -> $inner$
523 if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then (
524 let $lid:v$, $lid:off$, $lid:len$ =
525 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
527 match $lid:v$ with $fpatt$ -> $inner$ | _ -> ()
531 (* Bitstring, constant flen >= 0.
532 * At the moment all we can do is assign the bitstring to an
535 | Bitstring, Some i when i >= 0 ->
538 | <:patt< $lid:ident$ >> -> ident
541 (Failure "cannot compare a bitstring to a constant") in
543 if $lid:len$ >= $flen$ then (
544 let $lid:ident$, $lid:off$, $lid:len$ =
545 Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$
551 (* Bitstring, constant flen = -1, means consume all the
554 | Bitstring, Some i when i = -1 ->
557 | <:patt< $lid:ident$ >> -> ident
560 (Failure "cannot compare a bitstring to a constant") in
562 let $lid:ident$, $lid:off$, $lid:len$ =
563 Bitmatch.extract_remainder $lid:data$ $lid:off$ $lid:len$ in
567 | Bitstring, Some _ ->
568 Loc.raise _loc (Failure "length of bitstring must be >= 0 or the special value -1")
570 (* Bitstring field, non-const flen. We check the flen is >= 0
571 * (-1 is not allowed here) at runtime.
576 | <:patt< $lid:ident$ >> -> ident
579 (Failure "cannot compare a bitstring to a constant") in
581 if $flen$ >= 0 && $flen$ <= $lid:len$ then (
582 let $lid:ident$, $lid:off$, $lid:len$ =
583 Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$
590 (* Emit extra debugging code. *)
592 if not debug then expr else (
593 let field = string_of_patt_field field in
596 if !Bitmatch.debug then (
597 Printf.eprintf "PA_BITMATCH: TEST:\n";
598 Printf.eprintf " %s\n" $str:field$;
599 Printf.eprintf " off %d len %d\n%!" $lid:off$ $lid:len$;
600 (*Bitmatch.hexdump_bitstring stderr
601 ($lid:data$,$lid:off$,$lid:len$);*)
607 output_field_extraction expr fields
610 (* Convert each case in the match. *)
611 let cases = List.map (
612 fun (fields, bind, whenclause, code) ->
613 let inner = <:expr< $lid:result$ := Some ($code$); raise Exit >> in
615 match whenclause with
617 <:expr< if $whenclause$ then $inner$ >>
623 let $lid:name$ = ($lid:data$, $lid:off$, $lid:len$) in
627 output_field_extraction inner (List.rev fields)
630 (* Join them into a single expression.
632 * Don't do it with a normal fold_right because that leaves
633 * 'raise Exit; ()' at the end which causes a compiler warning.
634 * Hence a bit of complexity here.
636 * Note that the number of cases is always >= 1 so List.hd is safe.
638 let cases = List.rev cases in
640 List.fold_left (fun base case -> <:expr< $case$ ; $base$ >>)
641 (List.hd cases) (List.tl cases) in
643 (* The final code just wraps the list of cases in a
644 * try/with construct so that each case is tried in
645 * turn until one case matches (that case sets 'result'
646 * and raises 'Exit' to leave the whole statement).
647 * If result isn't set by the end then we will raise
648 * Match_failure with the location of the bitmatch
649 * statement in the original code.
651 let loc_fname = Loc.file_name _loc in
652 let loc_line = string_of_int (Loc.start_line _loc) in
653 let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in
656 let ($lid:data$, $lid:off$, $lid:len$) = $bs$ in
657 let $lid:result$ = ref None in
661 match ! $lid:result$ with
663 | None -> raise (Match_failure ($str:loc_fname$,
664 $int:loc_line$, $int:loc_char$))
671 [ LIST0 [ q = LIDENT -> q ] SEP "," ]
674 (* Field used in the bitmatch operator (a pattern). *)
676 [ fpatt = patt; ":"; len = expr LEVEL "top";
677 qs = OPT [ ":"; qs = qualifiers -> qs ] ->
678 parse_patt_field _loc fpatt len qs
682 (* Case inside bitmatch operator. *)
685 fields = LIST0 patt_field SEP ";";
687 bind = OPT [ "as"; name = LIDENT -> name ];
688 whenclause = OPT [ "when"; e = expr -> e ]; "->";
690 (fields, bind, whenclause, code)
694 (* Field used in the BITSTRING constructor (an expression). *)
696 [ fexpr = expr LEVEL "top"; ":"; len = expr LEVEL "top";
697 qs = OPT [ ":"; qs = qualifiers -> qs ] ->
698 parse_constr_field _loc fexpr len qs
702 (* 'bitmatch' expressions. *)
705 bs = expr; "with"; OPT "|";
706 cases = LIST1 match_case SEP "|" ->
707 output_bitmatch _loc bs cases
711 | [ "BITSTRING"; "{";
712 fields = LIST0 constr_field SEP ";";
714 output_constructor _loc fields