1 (* Bitmatch syntax extension.
2 * $Id: pa_bitmatch.ml,v 1.4 2008-04-01 17:05:37 rjones Exp $
11 (* If this is true then we emit some debugging code which can
12 * be useful to tell what is happening during matches. You
13 * also need to do 'Bitmatch.debug := true' in your main program.
15 * If this is false then no extra debugging code is emitted.
19 type m = Fields of f list (* field ; field -> ... *)
20 | Bind of string option (* _ -> ... *)
22 (* XXX fval should be a patt, not an expr *)
23 fval : expr; (* field binding or value *)
24 flen : expr; (* length in bits, may be non-const *)
25 endian : endian; (* endianness *)
26 signed : bool; (* true if signed, false if unsigned *)
28 _loc : Loc.t; (* location in source code *)
30 and endian = BigEndian | LittleEndian | NativeEndian
31 and t = Int | Bitstring
33 (* Generate a fresh, unique symbol each time called. *)
38 sprintf "__pabitmatch_%s_%d" name i
40 (* Deal with the qualifiers which appear for a field. *)
41 let parse_field _loc fval flen qs =
42 let endian, signed, t =
44 | None -> (None, None, None)
47 fun (endian, signed, t) q ->
50 if endian <> None then
51 Loc.raise _loc (Failure "an endian flag has been set already")
53 let endian = Some BigEndian in
57 if endian <> None then
58 Loc.raise _loc (Failure "an endian flag has been set already")
60 let endian = Some LittleEndian in
64 if endian <> None then
65 Loc.raise _loc (Failure "an endian flag has been set already")
67 let endian = Some NativeEndian in
71 if signed <> None then
72 Loc.raise _loc (Failure "a signed flag has been set already")
74 let signed = Some true in
78 if signed <> None then
79 Loc.raise _loc (Failure "a signed flag has been set already")
81 let signed = Some false in
86 Loc.raise _loc (Failure "a type flag has been set already")
93 Loc.raise _loc (Failure "a type flag has been set already")
95 let t = Some Bitstring in
99 Loc.raise _loc (Failure (s ^ ": unknown qualifier"))
100 ) (None, None, None) qs in
102 (* If type is set to bitstring then endianness and signedness
103 * qualifiers are meaningless and must not be set.
105 if t = Some Bitstring && (endian <> None || signed <> None) then
107 Failure "bitstring type and endian or signed qualifiers cannot be mixed"
110 (* Default endianness, signedness, type. *)
111 let endian = match endian with None -> BigEndian | Some e -> e in
112 let signed = match signed with None -> false | Some s -> s in
113 let t = match t with None -> Int | Some t -> t in
124 let string_of_endian = function
125 | BigEndian -> "bigendian"
126 | LittleEndian -> "littleendian"
127 | NativeEndian -> "nativeendian"
129 let string_of_t = function
131 | Bitstring -> "bitstring"
133 let string_of_field { fval = fval; flen = flen;
134 endian = endian; signed = signed; t = t;
138 | <:expr< $lid:id$ >> -> id
139 | _ -> "[expression]" in
142 | <:expr< $int:i$ >> -> i
143 | _ -> "[non-const-len]" in
144 let endian = string_of_endian endian in
145 let signed = if signed then "signed" else "unsigned" in
146 let t = string_of_t t in
147 let loc_fname = Loc.file_name _loc in
148 let loc_line = Loc.start_line _loc in
149 let loc_char = Loc.start_off _loc - Loc.start_bol _loc in
151 sprintf "%s : %s : %s, %s, %s @ (%S, %d, %d)"
152 fval flen t endian signed loc_fname loc_line loc_char
154 (* Generate the code for a constructor, ie. 'BITSTRING ...'. *)
155 let output_constructor _loc fields =
156 let loc_fname = Loc.file_name _loc in
157 let loc_line = string_of_int (Loc.start_line _loc) in
158 let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in
160 (* Bitstrings are created like the 'Buffer' module (in fact, using
161 * the Buffer module), by appending snippets to a growing buffer.
162 * This is reasonable efficient and avoids a lot of garbage.
164 let buffer = gensym "buffer" in
166 (* General exception which is raised inside the constructor functions
167 * when an int expression is out of range at runtime.
169 let exn = gensym "exn" in
170 let exn_used = ref false in
172 (* Convert each field to a simple bitstring-generating expression. *)
173 let fields = List.map (
174 fun {fval=fval; flen=flen; endian=endian; signed=signed; t=t} ->
175 (* Is flen an integer constant? If so, what is it? This
176 * is very simple-minded and only detects simple constants.
180 | <:expr< $int:i$ >> -> Some (int_of_string i)
183 let name_of_int_construct_const = function
184 (* XXX As an enhancement we should allow a 64-bit-only
185 * mode which lets us use 'int' up to 63 bits and won't
186 * compile on 32-bit platforms.
188 (* XXX The meaning of signed/unsigned breaks down at
189 * 31, 32, 63 and 64 bits.
191 | (1, _, _) -> "construct_bit"
192 | ((2|3|4|5|6|7|8), _, false) -> "construct_char_unsigned"
193 | ((2|3|4|5|6|7|8), _, true) -> "construct_char_signed"
194 | (i, BigEndian, false) when i <= 31 -> "construct_int_be_unsigned"
195 | (i, BigEndian, true) when i <= 31 -> "construct_int_be_signed"
196 | (i, LittleEndian, false) when i <= 31 -> "construct_int_le_unsigned"
197 | (i, LittleEndian, true) when i <= 31 -> "construct_int_le_signed"
198 | (i, NativeEndian, false) when i <= 31 -> "construct_int_ne_unsigned"
199 | (i, NativeEndian, true) when i <= 31 -> "construct_int_ne_signed"
200 | (32, BigEndian, false) -> "construct_int32_be_unsigned"
201 | (32, BigEndian, true) -> "construct_int32_be_signed"
202 | (32, LittleEndian, false) -> "construct_int32_le_unsigned"
203 | (32, LittleEndian, true) -> "construct_int32_le_signed"
204 | (32, NativeEndian, false) -> "construct_int32_ne_unsigned"
205 | (32, NativeEndian, true) -> "construct_int32_ne_signed"
206 | (_, BigEndian, false) -> "construct_int64_be_unsigned"
207 | (_, BigEndian, true) -> "construct_int64_be_signed"
208 | (_, LittleEndian, false) -> "construct_int64_le_unsigned"
209 | (_, LittleEndian, true) -> "construct_int64_le_signed"
210 | (_, NativeEndian, false) -> "construct_int64_ne_unsigned"
211 | (_, NativeEndian, true) -> "construct_int64_ne_signed"
213 let name_of_int_construct = function
214 (* XXX As an enhancement we should allow users to
215 * specify that a field length can fit into a char/int/int32
216 * (of course, this would have to be checked at runtime).
218 | (BigEndian, false) -> "construct_int64_be_unsigned"
219 | (BigEndian, true) -> "construct_int64_be_signed"
220 | (LittleEndian, false) -> "construct_int64_le_unsigned"
221 | (LittleEndian, true) -> "construct_int64_le_signed"
222 | (NativeEndian, false) -> "construct_int64_ne_unsigned"
223 | (NativeEndian, true) -> "construct_int64_ne_signed"
227 match t, flen_is_const with
228 (* Common case: int field, constant flen.
230 * Range checks are done inside the construction function
231 * because that's a lot simpler w.r.t. types. It might
232 * be better to move them here. XXX
234 | Int, Some i when i > 0 && i <= 64 ->
236 name_of_int_construct_const (i,endian,signed) in
240 Bitmatch.$lid:construct_func$ $lid:buffer$ $fval$ $flen$
245 Loc.raise _loc (Failure "length of int field must be [1..64]")
247 (* Int field, non-constant length. We need to perform a runtime
248 * test to ensure the length is [1..64].
250 * Range checks are done inside the construction function
251 * because that's a lot simpler w.r.t. types. It might
252 * be better to move them here. XXX
255 let construct_func = name_of_int_construct (endian,signed) in
259 if $flen$ >= 1 && $flen$ <= 64 then
260 Bitmatch.$lid:construct_func$ $lid:buffer$ $fval$ $flen$
263 raise (Bitmatch.Construct_failure
264 ("length of int field must be [1..64]",
266 $int:loc_line$, $int:loc_char$))
269 (* Bitstring, constant length > 0. *)
270 | Bitstring, Some i when i > 0 ->
271 let bs = gensym "bs" in
273 let $lid:bs$ = $fval$ in
274 if Bitmatch.bitstring_length $lid:bs$ = $flen$ then
275 Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$
277 raise (Bitmatch.Construct_failure
278 ("length of bitstring does not match declaration",
280 $int:loc_line$, $int:loc_char$))
283 (* Bitstring, constant length -1, means variable length bitstring
286 | Bitstring, Some (-1) ->
287 <:expr< Bitmatch.construct_bitstring $lid:buffer$ $fval$ >>
289 (* Bitstring, constant length = 0 is probably an error, and so it
292 | Bitstring, Some _ ->
295 "length of bitstring must be > 0 or the special value -1")
297 (* Bitstring, non-constant length.
298 * We check at runtime that the length is > 0 and matches
299 * the declared length.
302 let bslen = gensym "bslen" in
303 let bs = gensym "bs" in
305 let $lid:bslen$ = $flen$ in
306 if $lid:bslen$ > 0 then (
307 let $lid:bs$ = $fval$ in
308 if Bitmatch.bitstring_length $lid:bs$ = $lid:bslen$ then
309 Bitmatch.construct_bitstring $lid:buffer$ $lid:bs$
311 raise (Bitmatch.Construct_failure
312 ("length of bitstring does not match declaration",
314 $int:loc_line$, $int:loc_char$))
316 raise (Bitmatch.Construct_failure
317 ("length of bitstring must be > 0",
319 $int:loc_line$, $int:loc_char$))
324 (* Create the final bitstring. Start by creating an empty buffer
325 * and then evaluate each expression above in turn which will
326 * append some more to the bitstring buffer. Finally extract
329 * XXX We almost have enough information to be able to guess
330 * a good initial size for the buffer.
334 | [] -> <:expr< [] >>
335 | h::t -> List.fold_left (fun h t -> <:expr< $h$; $t$ >>) h t in
339 let $lid:buffer$ = Bitmatch.Buffer.create () in
341 Bitmatch.Buffer.contents $lid:buffer$
347 Bitmatch.Construct_failure ("value out of range",
349 $int:loc_line$, $int:loc_char$) in
355 (* Generate the code for a bitmatch statement. '_loc' is the
356 * location, 'bs' is the bitstring parameter, 'cases' are
357 * the list of cases to test against.
359 let output_bitmatch _loc bs cases =
360 let data = gensym "data" and off = gensym "off" and len = gensym "len" in
361 let result = gensym "result" in
363 (* This generates the field extraction code for each
364 * field a single case. Each field must be wider than
365 * the minimum permitted for the type and there must be
366 * enough remaining data in the bitstring to satisfy it.
367 * As we go through the fields, symbols 'data', 'off' and 'len'
368 * track our position and remaining length in the bitstring.
370 * The whole thing is a lot of nested 'if' statements. Code
371 * is generated from the inner-most (last) field outwards.
373 let rec output_field_extraction inner = function
376 let {fval=fval; flen=flen; endian=endian; signed=signed; t=t}
379 (* Is fval a binding (an ident) or an expression? If it's
380 * a binding then we will generate a binding for this field.
381 * If it's an expression then we will test the field against
386 | <:expr< $lid:id$ >> -> Some id
389 (* Is flen an integer constant? If so, what is it? This
390 * is very simple-minded and only detects simple constants.
394 | <:expr< $int:i$ >> -> Some (int_of_string i)
397 let name_of_int_extract_const = function
398 (* XXX As an enhancement we should allow a 64-bit-only
399 * mode which lets us use 'int' up to 63 bits and won't
400 * compile on 32-bit platforms.
402 (* XXX The meaning of signed/unsigned breaks down at
403 * 31, 32, 63 and 64 bits.
405 | (1, _, _) -> "extract_bit"
406 | ((2|3|4|5|6|7|8), _, false) -> "extract_char_unsigned"
407 | ((2|3|4|5|6|7|8), _, true) -> "extract_char_signed"
408 | (i, BigEndian, false) when i <= 31 -> "extract_int_be_unsigned"
409 | (i, BigEndian, true) when i <= 31 -> "extract_int_be_signed"
410 | (i, LittleEndian, false) when i <= 31 -> "extract_int_le_unsigned"
411 | (i, LittleEndian, true) when i <= 31 -> "extract_int_le_signed"
412 | (i, NativeEndian, false) when i <= 31 -> "extract_int_ne_unsigned"
413 | (i, NativeEndian, true) when i <= 31 -> "extract_int_ne_signed"
414 | (32, BigEndian, false) -> "extract_int32_be_unsigned"
415 | (32, BigEndian, true) -> "extract_int32_be_signed"
416 | (32, LittleEndian, false) -> "extract_int32_le_unsigned"
417 | (32, LittleEndian, true) -> "extract_int32_le_signed"
418 | (32, NativeEndian, false) -> "extract_int32_ne_unsigned"
419 | (32, NativeEndian, true) -> "extract_int32_ne_signed"
420 | (_, BigEndian, false) -> "extract_int64_be_unsigned"
421 | (_, BigEndian, true) -> "extract_int64_be_signed"
422 | (_, LittleEndian, false) -> "extract_int64_le_unsigned"
423 | (_, LittleEndian, true) -> "extract_int64_le_signed"
424 | (_, NativeEndian, false) -> "extract_int64_ne_unsigned"
425 | (_, NativeEndian, true) -> "extract_int64_ne_signed"
427 let name_of_int_extract = function
428 (* XXX As an enhancement we should allow users to
429 * specify that a field length can fit into a char/int/int32
430 * (of course, this would have to be checked at runtime).
432 | (BigEndian, false) -> "extract_int64_be_unsigned"
433 | (BigEndian, true) -> "extract_int64_be_signed"
434 | (LittleEndian, false) -> "extract_int64_le_unsigned"
435 | (LittleEndian, true) -> "extract_int64_le_signed"
436 | (NativeEndian, false) -> "extract_int64_ne_unsigned"
437 | (NativeEndian, true) -> "extract_int64_ne_signed"
441 match t, fval_is_ident, flen_is_const with
442 (* Common case: int field, binding, constant flen *)
443 | Int, Some ident, Some i when i > 0 && i <= 64 ->
444 let extract_func = name_of_int_extract_const (i,endian,signed) in
446 if $lid:len$ >= $flen$ then (
447 let $lid:ident$, $lid:off$, $lid:len$ =
448 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
454 (* Int field, not a binding, constant flen *)
455 | Int, None, Some i when i > 0 && i <= 64 ->
456 let extract_func = name_of_int_extract_const (i,endian,signed) in
457 let v = gensym "val" in
459 if $lid:len$ >= $flen$ then (
460 let $lid:v$, $lid:off$, $lid:len$ =
461 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
463 if $lid:v$ = $fval$ then (
470 Loc.raise _loc (Failure "length of int field must be [1..64]")
472 (* Int field, non-const flen. We have to test the range of
473 * the field at runtime. If outside the range it's a no-match
476 | Int, Some ident, None ->
477 let extract_func = name_of_int_extract (endian,signed) in
479 if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then (
480 let $lid:ident$, $lid:off$, $lid:len$ =
481 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
488 let extract_func = name_of_int_extract (endian,signed) in
489 let v = gensym "val" in
491 if $flen$ >= 1 && $flen$ <= 64 && $flen$ <= $lid:len$ then (
492 let $lid:v$, $lid:off$, $lid:len$ =
493 Bitmatch.$lid:extract_func$ $lid:data$ $lid:off$ $lid:len$
495 if $lid:v$ = $fval$ then (
501 (* Can't compare bitstrings at the moment. *)
502 | Bitstring, None, _ ->
504 (Failure "cannot compare a bitstring to a constant")
506 (* Bitstring, constant flen >= 0. *)
507 | Bitstring, Some ident, Some i when i >= 0 ->
509 if $lid:len$ >= $flen$ then (
510 let $lid:ident$, $lid:off$, $lid:len$ =
511 Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$
517 (* Bitstring, constant flen = -1, means consume all the
520 | Bitstring, Some ident, Some i when i = -1 ->
522 let $lid:ident$, $lid:off$, $lid:len$ =
523 Bitmatch.extract_remainder $lid:data$ $lid:off$ $lid:len$ in
527 | Bitstring, _, Some _ ->
528 Loc.raise _loc (Failure "length of bitstring must be >= 0 or the special value -1")
530 (* Bitstring field, non-const flen. We check the flen is >= 0
531 * (-1 is not allowed here) at runtime.
533 | Bitstring, Some ident, None ->
535 if $flen$ >= 0 && $flen$ <= $lid:len$ then (
536 let $lid:ident$, $lid:off$, $lid:len$ =
537 Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$
544 (* Emit extra debugging code. *)
546 if not debug then expr else (
547 let field = string_of_field field in
550 if !Bitmatch.debug then (
551 Printf.eprintf "PA_BITMATCH: TEST:\n";
552 Printf.eprintf " %s\n" $str:field$;
553 Printf.eprintf " off %d len %d\n%!" $lid:off$ $lid:len$;
559 output_field_extraction expr fields
562 (* Convert each case in the match. *)
563 let cases = List.map (
565 (* field : len ; field : len when .. -> ..*)
566 | (Fields fields, Some whenclause, code) ->
569 if $whenclause$ then (
570 $lid:result$ := Some ($code$);
574 output_field_extraction inner (List.rev fields)
576 (* field : len ; field : len -> ... *)
577 | (Fields fields, None, code) ->
580 $lid:result$ := Some ($code$);
583 output_field_extraction inner (List.rev fields)
585 (* _ as name when ... -> ... *)
586 | (Bind (Some name), Some whenclause, code) ->
588 let $lid:name$ = ($lid:data$, $lid:off$, $lid:len$) in
589 if $whenclause$ then (
590 $lid:result$ := Some ($code$);
595 (* _ as name -> ... *)
596 | (Bind (Some name), None, code) ->
598 let $lid:name$ = ($lid:data$, $lid:off$, $lid:len$) in
599 $lid:result$ := Some ($code$);
603 (* _ when ... -> ... *)
604 | (Bind None, Some whenclause, code) ->
606 if $whenclause$ then (
607 $lid:result$ := Some ($code$);
613 | (Bind None, None, code) ->
615 $lid:result$ := Some ($code$);
621 (* Join them into a single expression.
623 * Don't do it with a normal fold_right because that leaves
624 * 'raise Exit; ()' at the end which causes a compiler warning.
625 * Hence a bit of complexity here.
627 * Note that the number of cases is always >= 1 so List.hd is safe.
629 let cases = List.rev cases in
631 List.fold_left (fun base case -> <:expr< $case$ ; $base$ >>)
632 (List.hd cases) (List.tl cases) in
634 (* The final code just wraps the list of cases in a
635 * try/with construct so that each case is tried in
636 * turn until one case matches (that case sets 'result'
637 * and raises 'Exit' to leave the whole statement).
638 * If result isn't set by the end then we will raise
639 * Match_failure with the location of the bitmatch
640 * statement in the original code.
642 let loc_fname = Loc.file_name _loc in
643 let loc_line = string_of_int (Loc.start_line _loc) in
644 let loc_char = string_of_int (Loc.start_off _loc - Loc.start_bol _loc) in
647 let ($lid:data$, $lid:off$, $lid:len$) = $bs$ in
648 let $lid:result$ = ref None in
652 match ! $lid:result$ with
654 | None -> raise (Match_failure ($str:loc_fname$,
655 $int:loc_line$, $int:loc_char$))
662 [ LIST0 [ q = LIDENT -> q ] SEP "," ]
666 [ fval = expr LEVEL "top"; ":"; len = expr LEVEL "top";
667 qs = OPT [ ":"; qs = qualifiers -> qs ] ->
668 parse_field _loc fval len qs
674 bind = OPT [ "as"; name = LIDENT -> name ];
675 w = OPT [ "when"; e = expr -> e ]; "->";
679 | [ fields = LIST0 field SEP ";";
680 w = OPT [ "when"; e = expr -> e ]; "->";
682 (Fields fields, w, code)
686 (* 'bitmatch' expressions. *)
688 [ "bitmatch"; bs = expr; "with"; OPT "|";
689 cases = LIST1 match_case SEP "|" ->
690 output_bitmatch _loc bs cases
695 fields = LIST0 field SEP ";" ->
696 output_constructor _loc fields