From: Richard W.M. Jones Date: Fri, 25 Apr 2008 12:55:39 +0000 (+0000) Subject: Allow matching against a string type. X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=4853c75eb968f482dc8d9919cb56fc12fec8e3d2;p=ocaml-bitstring.git Allow matching against a string type. Error locations are now very fine-grained. --- diff --git a/TODO b/TODO index f4fe3cc..470f51c 100644 --- a/TODO +++ b/TODO @@ -1,9 +1,9 @@ -$Id: TODO,v 1.3 2008-04-25 12:08:51 rjones Exp $ +$Id: TODO,v 1.4 2008-04-25 12:55:39 rjones Exp $ Major to-do items. (1) DONE - In bitmatch operator, use patterns not expressions. -(2) Allow matching against strings. +(2) DONE - Allow matching against strings. (3) DONE - Change the syntax so { ... } surrounds match patterns. @@ -28,3 +28,7 @@ Major to-do items. (Q: Are these evaluated at compile time or at run time or selectable?) (8) Named but unbound patterns to avoid "Warning Y: unused variable". + +(9) DONE - + Make the error locations fine-grained, particularly so they point to + individual fields, not the whole match. diff --git a/bitmatch.mli b/bitmatch.mli index eb24575..f1c7ed1 100644 --- a/bitmatch.mli +++ b/bitmatch.mli @@ -15,7 +15,7 @@ * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * - * $Id: bitmatch.mli,v 1.15 2008-04-25 12:08:51 rjones Exp $ + * $Id: bitmatch.mli,v 1.16 2008-04-25 12:55:39 rjones Exp $ *) (** @@ -255,6 +255,15 @@ bitmatch bits with the integer 4 or the integer 6. *) ]} + One may also match on strings: + +{[ +| { "MAGIC" : 5*8 : string } -> ... + + (* Only matches if the string "MAGIC" appears at the start + of the input. *) +]} + {3:patternfieldreference Pattern field reference} The exact format of each pattern field is: @@ -285,6 +294,7 @@ bitmatch bits with signedness and endianness of the field. Permissible qualifiers are: - [int] (field has an integer type) + - [string] (field is a string type) - [bitstring] (field is a bitstring type) - [signed] (field is signed) - [unsigned] (field is unsigned) @@ -433,16 +443,16 @@ Bitmatch.hexdump_bitstring stdout bits ;; overflows. In addition to OCaml's normal bounds checks, we check that field lengths are >= 0, and many additional checks. - Denial of service attacks are more problematic although we still - believe that the library is robust. We only work forwards through - the bitstring, thus computation will eventually terminate. As for - computed lengths, code such as this is thought to be secure: + Denial of service attacks are more problematic. We only work + forwards through the bitstring, thus computation will eventually + terminate. As for computed lengths, code such as this is thought + to be secure: -{[ -bitmatch bits with -| { len : 64; - buffer : Int64.to_int len : bitstring } -> -]} + {[ + bitmatch bits with + | { len : 64; + buffer : Int64.to_int len : bitstring } -> + ]} The [len] field can be set arbitrarily large by an attacker, but when pattern-matching against the [buffer] field this merely causes @@ -451,22 +461,28 @@ bitmatch bits with allocation of sub-bitstrings is efficient and doesn't involve an arbitary-sized allocation or any copying. - The main protection against attackers should therefore be to ensure - that the main program will only read input bitstrings up to a - certain length, which is outside the scope of this library. + However the above does not necessarily apply to strings used in + matching, since they may cause the library to use the + {!Bitmatch.string_of_bitstring} function, which allocates a string. + So you should take care if you use the [string] type particularly + with a computed length that is derived from external input. + + The main protection against attackers should be to ensure that the + main program will only read input bitstrings up to a certain + length, which is outside the scope of this library. {3 Security on output} As with the input side, computed lengths are believed to be safe. For example: -{[ -let len = read_untrusted_source () in -let buffer = allocate_bitstring () in -BITSTRING { - buffer : len : bitstring -} -]} + {[ + let len = read_untrusted_source () in + let buffer = allocate_bitstring () in + BITSTRING { + buffer : len : bitstring + } + ]} This code merely causes a check that buffer's length is the same as [len]. However the program function [allocate_bitstring] must @@ -588,10 +604,7 @@ val string_of_bitstring : bitstring -> string This function is inefficient. In the best case when the bitstring is nicely byte-aligned we do a [String.sub] operation. If the bitstring isn't aligned then this involves a lot of bit twiddling - and is particularly inefficient. - - XXX This function wouldn't be needed so much if the [bitmatch] - operator allowed us to pattern-match on strings. *) + and is particularly inefficient. *) (** {3 Bitstring buffer} *) diff --git a/pa_bitmatch.ml b/pa_bitmatch.ml index 019af14..b154a31 100644 --- a/pa_bitmatch.ml +++ b/pa_bitmatch.ml @@ -15,7 +15,7 @@ * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * - * $Id: pa_bitmatch.ml,v 1.9 2008-04-25 12:08:51 rjones Exp $ + * $Id: pa_bitmatch.ml,v 1.10 2008-04-25 12:55:39 rjones Exp $ *) open Printf @@ -51,7 +51,7 @@ and fcommon = { _loc : Loc.t; (* location in source code *) } and endian = BigEndian | LittleEndian | NativeEndian -and t = Int | Bitstring +and t = Int | String | Bitstring (* Generate a fresh, unique symbol each time called. *) let gensym = @@ -124,6 +124,13 @@ and parse_field_common _loc flen qs = let t = Some Int in (endian, signed, t) ) + | "string" -> + if t <> None then + Loc.raise _loc (Failure "a type flag has been set already") + else ( + let t = Some String in + (endian, signed, t) + ) | "bitstring" -> if t <> None then Loc.raise _loc (Failure "a type flag has been set already") @@ -135,13 +142,14 @@ and parse_field_common _loc flen qs = Loc.raise _loc (Failure (s ^ ": unknown qualifier")) ) (None, None, None) qs in - (* If type is set to bitstring then endianness and signedness - * qualifiers are meaningless and must not be set. + (* If type is set to string or bitstring then endianness and + * signedness qualifiers are meaningless and must not be set. *) - if t = Some Bitstring && (endian <> None || signed <> None) then - Loc.raise _loc ( - Failure "bitstring type and endian or signed qualifiers cannot be mixed" - ); + if (t = Some Bitstring || t = Some String) + && (endian <> None || signed <> None) then + Loc.raise _loc ( + Failure "string types and endian or signed qualifiers cannot be mixed" + ); (* Default endianness, signedness, type. *) let endian = match endian with None -> BigEndian | Some e -> e in @@ -163,6 +171,7 @@ let string_of_endian = function let string_of_t = function | Int -> "int" + | String -> "string" | Bitstring -> "bitstring" let rec string_of_patt_field { fpatt = fpatt; fpc = fpc } = @@ -218,7 +227,8 @@ let output_constructor _loc fields = (* Convert each field to a simple bitstring-generating expression. *) let fields = List.map ( - fun {fexpr=fexpr; fec={flen=flen; endian=endian; signed=signed; t=t}} -> + fun {fexpr=fexpr; fec={flen=flen; endian=endian; signed=signed; + t=t; _loc=_loc}} -> (* Is flen an integer constant? If so, what is it? This * is very simple-minded and only detects simple constants. *) @@ -313,6 +323,63 @@ let output_constructor _loc fields = $int:loc_line$, $int:loc_char$)) >> + (* String, constant length > 0, must be a multiple of 8. *) + | String, Some i when i > 0 && i land 7 = 0 -> + let bs = gensym "bs" in + <:expr< + let $lid:bs$ = $fexpr$ in + if String.length $lid:bs$ = ($flen$ lsr 3) then + Bitmatch.construct_string $lid:buffer$ $lid:bs$ + else + raise (Bitmatch.Construct_failure + ("length of string does not match declaration", + $str:loc_fname$, + $int:loc_line$, $int:loc_char$)) + >> + + (* String, constant length -1, means variable length string + * with no checks. + *) + | String, Some (-1) -> + <:expr< Bitmatch.construct_string $lid:buffer$ $fexpr$ >> + + (* String, constant length = 0 is probably an error, and so is + * any other value. + *) + | String, Some _ -> + Loc.raise _loc (Failure "length of string must be > 0 and a multiple of 8, or the special value -1") + + (* String, non-constant length. + * We check at runtime that the length is > 0, a multiple of 8, + * and matches the declared length. + *) + | String, None -> + let bslen = gensym "bslen" in + let bs = gensym "bs" in + <:expr< + let $lid:bslen$ = $flen$ in + if $lid:bslen$ > 0 then ( + if $lid:bslen$ land 7 = 0 then ( + let $lid:bs$ = $fexpr$ in + if String.length $lid:bs$ = ($lid:bslen$ lsr 3) then + Bitmatch.construct_string $lid:buffer$ $lid:bs$ + else + raise (Bitmatch.Construct_failure + ("length of string does not match declaration", + $str:loc_fname$, + $int:loc_line$, $int:loc_char$)) + ) else + raise (Bitmatch.Construct_failure + ("length of string must be a multiple of 8", + $str:loc_fname$, + $int:loc_line$, $int:loc_char$)) + ) else + raise (Bitmatch.Construct_failure + ("length of string must be > 0", + $str:loc_fname$, + $int:loc_line$, $int:loc_char$)) + >> + (* Bitstring, constant length > 0. *) | Bitstring, Some i when i > 0 -> let bs = gensym "bs" in @@ -333,7 +400,7 @@ let output_constructor _loc fields = | Bitstring, Some (-1) -> <:expr< Bitmatch.construct_bitstring $lid:buffer$ $fexpr$ >> - (* Bitstring, constant length = 0 is probably an error, and so it + (* Bitstring, constant length = 0 is probably an error, and so is * any other value. *) | Bitstring, Some _ -> @@ -420,7 +487,8 @@ let output_bitmatch _loc bs cases = let rec output_field_extraction inner = function | [] -> inner | field :: fields -> - let {fpatt=fpatt; fpc={flen=flen; endian=endian; signed=signed; t=t}} + let {fpatt=fpatt; fpc={flen=flen; endian=endian; signed=signed; + t=t; _loc=_loc}} = field in (* Is flen an integer constant? If so, what is it? This @@ -528,6 +596,84 @@ let output_bitmatch _loc bs cases = ) >> + (* String, constant flen > 0. *) + | String, Some i when i > 0 && i land 7 = 0 -> + let bs = gensym "bs" in + if pattern_is_exhaustive fpatt then + <:expr< + if $lid:len$ >= $flen$ then ( + let $lid:bs$, $lid:off$, $lid:len$ = + Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$ + $flen$ in + match Bitmatch.string_of_bitstring $lid:bs$ with + | $fpatt$ -> $inner$ + ) + >> + else + <:expr< + if $lid:len$ >= $flen$ then ( + let $lid:bs$, $lid:off$, $lid:len$ = + Bitmatch.extract_bitstring $lid:data$ $lid:off$ $lid:len$ + $flen$ in + match Bitmatch.string_of_bitstring $lid:bs$ with + | $fpatt$ -> $inner$ + | _ -> () + ) + >> + + (* String, constant flen = -1, means consume all the + * rest of the input. + *) + | String, Some i when i = -1 -> + let bs = gensym "bs" in + if pattern_is_exhaustive fpatt then + <:expr< + let $lid:bs$, $lid:off$, $lid:len$ = + Bitmatch.extract_remainder $lid:data$ $lid:off$ $lid:len$ in + match Bitmatch.string_of_bitstring $lid:bs$ with + | $fpatt$ -> $inner$ + >> + else + <:expr< + let $lid:bs$, $lid:off$, $lid:len$ = + Bitmatch.extract_remainder $lid:data$ $lid:off$ $lid:len$ in + match Bitmatch.string_of_bitstring $lid:bs$ with + | $fpatt$ -> $inner$ + | _ -> () + >> + + | String, Some _ -> + Loc.raise _loc (Failure "length of string must be > 0 and a multiple of 8, or the special value -1") + + (* String field, non-const flen. We check the flen is > 0 + * and a multiple of 8 (-1 is not allowed here), at runtime. + *) + | String, None -> + let bs = gensym "bs" in + if pattern_is_exhaustive fpatt then + <:expr< + if $flen$ >= 0 && $flen$ <= $lid:len$ + && $flen$ land 7 = 0 then ( + let $lid:bs$, $lid:off$, $lid:len$ = + Bitmatch.extract_bitstring + $lid:data$ $lid:off$ $lid:len$ $flen$ in + match Bitmatch.string_of_bitstring $lid:bs$ with + | $fpatt$ -> $inner$ + ) + >> + else + <:expr< + if $flen$ >= 0 && $flen$ <= $lid:len$ + && $flen$ land 7 = 0 then ( + let $lid:bs$, $lid:off$, $lid:len$ = + Bitmatch.extract_bitstring + $lid:data$ $lid:off$ $lid:len$ $flen$ in + match Bitmatch.string_of_bitstring $lid:bs$ with + | $fpatt$ -> $inner$ + | _ -> () + ) + >> + (* Bitstring, constant flen >= 0. * At the moment all we can do is assign the bitstring to an * identifier. @@ -536,6 +682,7 @@ let output_bitmatch _loc bs cases = let ident = match fpatt with | <:patt< $lid:ident$ >> -> ident + | <:patt< _ >> -> "_" | _ -> Loc.raise _loc (Failure "cannot compare a bitstring to a constant") in diff --git a/tests/70_ext3_sb.ml b/tests/70_ext3_sb.ml index af5169a..8ff5096 100644 --- a/tests/70_ext3_sb.ml +++ b/tests/70_ext3_sb.ml @@ -1,5 +1,5 @@ (* Parse an ext3 superblock. - * $Id: 70_ext3_sb.ml,v 1.2 2008-04-25 11:08:43 rjones Exp $ + * $Id: 70_ext3_sb.ml,v 1.3 2008-04-25 12:55:39 rjones Exp $ *) open Printf @@ -43,14 +43,14 @@ let () = s_feature_compat : 32 : littleendian; (* compatible feature set *) s_feature_incompat : 32 : littleendian; (* incompatible feature set *) s_feature_ro_compat : 32 : littleendian; (* readonly-compatible feature set *) - s_uuid : 128 : bitstring; (* 128-bit uuid for volume *) - s_volume_name : 128 : bitstring; (* volume name XXX string *) - s_last_mounted : 512 : bitstring; (* directory where last mounted XXX string *) + s_uuid : 128 : string; (* 128-bit uuid for volume *) + s_volume_name : 128 : string; (* volume name *) + s_last_mounted : 512 : string; (* directory where last mounted *) s_algorithm_usage_bitmap : 32 : littleendian; (* For compression *) s_prealloc_blocks : 8; (* Nr of blocks to try to preallocate*) s_prealloc_dir_blocks : 8; (* Nr to preallocate for dirs *) s_reserved_gdt_blocks : 16 : littleendian;(* Per group desc for online growth *) - s_journal_uuid : 128 : bitstring; (* uuid of journal superblock *) + s_journal_uuid : 128 : string; (* uuid of journal superblock *) s_journal_inum : 32 : littleendian; (* inode number of journal file *) s_journal_dev : 32 : littleendian; (* device number of journal file *) s_last_orphan : 32 : littleendian; (* start of list of inodes to delete *) @@ -63,13 +63,16 @@ let () = s_reserved_word_pad : 16 : littleendian; s_default_mount_opts : 32 : littleendian; s_first_meta_bg : 32 : littleendian; (* First metablock block group *) - s_reserved : 6080 : bitstring } -> (* Padding to the end of the block *) + _ : 6080 : bitstring } -> (* Padding to the end of the block *) printf "ext3 superblock:\n"; printf " s_inodes_count = %ld\n" s_inodes_count; printf " s_blocks_count = %ld\n" s_blocks_count; printf " s_free_inodes_count = %ld\n" s_free_inodes_count; - printf " s_free_blocks_count = %ld\n" s_free_blocks_count + printf " s_free_blocks_count = %ld\n" s_free_blocks_count; + printf " s_uuid = %S\n" s_uuid; + printf " s_volume_name = %S\n" s_volume_name; + printf " s_last_mounted = %S\n" s_last_mounted | { _ } -> eprintf "not an ext3 superblock!\n%!";