From ec13c41509db2b7fae9138cde2a8ea0b6d3b3699 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Tue, 26 Aug 2008 08:24:31 +0000 Subject: [PATCH 1/1] This patch completes the optimization / fastpaths in C enhancement. --- Makefile.in | 17 +++++- bitstring.ml | 105 +++++++++++++++++++++++++++++++--- bitstring.mli | 92 +++++++++++++++++++++++++++-- bitstring_c.c | 96 +++++++++++++++++++++++++++---- byteswap.in.h | 54 +++++++++++++++++ configure.ac | 19 +++++- pa_bitstring.ml | 175 ++++++++++++++++++++++++++++++++++++++++---------------- 7 files changed, 481 insertions(+), 77 deletions(-) create mode 100644 byteswap.in.h diff --git a/Makefile.in b/Makefile.in index f3fed47..695d427 100644 --- a/Makefile.in +++ b/Makefile.in @@ -27,7 +27,10 @@ INSTALL = @INSTALL@ TIME = @TIME@ GPROF = @GPROF@ +BYTESWAP_H = @BYTESWAP_H@ + OCAMLLIB = @OCAMLLIB@ +top_srcdir = @top_srcdir@ pkg_cil = @pkg_cil@ pkg_extlib = @pkg_extlib@ @@ -40,7 +43,7 @@ OCAMLOPTPACKAGES = OCAMLDOCFLAGS = -html -sort CC = @CC@ -CFLAGS = @CFLAGS@ -Wall -Werror -fPIC -I$(OCAMLLIB) +CFLAGS = @CFLAGS@ -Wall -Werror -fPIC -I$(top_srcdir) -I$(OCAMLLIB) LIBRARY_PATH = @abs_top_builddir@ LD_LIBRARY_PATH = @abs_top_builddir@ @@ -60,7 +63,8 @@ TESTS = $(patsubst %.ml,%,$(wildcard tests/*.ml)) BENCHMARKS = $(patsubst %.ml,%,$(wildcard benchmarks/*.ml)) -all: bitstring.cma bitstring_persistent.cma \ +all: $(BYTESWAP_H) \ + bitstring.cma bitstring_persistent.cma \ bitstring.cmxa bitstring_persistent.cmxa \ pa_bitstring.cmo \ bitstring-objinfo @@ -101,6 +105,15 @@ bitstring-objinfo: bitstring_objinfo.cmo bitstring.cma bitstring_persistent.cma bitstring.cma bitstring_persistent.cma \ $< -o $@ +# Create byteswap.h if the system doesn't have this file. +# From gnulib, license is LGPLv2+. + +byteswap.h: byteswap.in.h + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ + cat $(srcdir)/byteswap.in.h; \ + } > $@-t + mv -f $@-t $@ + # Tests and examples. PP = -pp "camlp4o bitstring.cma bitstring_persistent.cma pa_bitstring.cmo" diff --git a/bitstring.ml b/bitstring.ml index 9712189..8850b59 100644 --- a/bitstring.ml +++ b/bitstring.ml @@ -158,7 +158,15 @@ module I = struct let ff = 0xff (* Create a mask 0-31 bits wide. *) - external mask : int -> int = "ocaml_bitstring_I_mask" "noalloc" + let mask bits = + if bits < 30 then + (one << bits) - 1 + else if bits = 30 then + max_int + else if bits = 31 then + minus_one + else + invalid_arg "Bitstring.I.mask" (* Byte swap an int of a given size. *) let byteswap v bits = @@ -370,13 +378,6 @@ end * the parameters should have been checked for sanity already). *) -(* Bitstrings. *) -let extract_bitstring data off len flen = - (data, off, flen) (*, off+flen, len-flen*) - -let extract_remainder data off len = - (data, off, len) (*, off+len, 0*) - (* Extract and convert to numeric. A single bit is returned as * a boolean. There are no endianness or signedness considerations. *) @@ -683,6 +684,94 @@ let extract_int64_ee_unsigned = function | LittleEndian -> extract_int64_le_unsigned | NativeEndian -> extract_int64_ne_unsigned +external extract_fastpath_int16_be_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_be_unsigned" "noalloc" + +external extract_fastpath_int16_le_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_le_unsigned" "noalloc" + +external extract_fastpath_int16_ne_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_ne_unsigned" "noalloc" + +external extract_fastpath_int16_be_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_be_signed" "noalloc" + +external extract_fastpath_int16_le_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_le_signed" "noalloc" + +external extract_fastpath_int16_ne_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_ne_signed" "noalloc" + +(* +external extract_fastpath_int24_be_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_be_unsigned" "noalloc" + +external extract_fastpath_int24_le_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_le_unsigned" "noalloc" + +external extract_fastpath_int24_ne_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_ne_unsigned" "noalloc" + +external extract_fastpath_int24_be_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_be_signed" "noalloc" + +external extract_fastpath_int24_le_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_le_signed" "noalloc" + +external extract_fastpath_int24_ne_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_ne_signed" "noalloc" +*) + +external extract_fastpath_int32_be_unsigned : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_be_unsigned" "noalloc" + +external extract_fastpath_int32_le_unsigned : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_le_unsigned" "noalloc" + +external extract_fastpath_int32_ne_unsigned : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_ne_unsigned" "noalloc" + +external extract_fastpath_int32_be_signed : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_be_signed" "noalloc" + +external extract_fastpath_int32_le_signed : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_le_signed" "noalloc" + +external extract_fastpath_int32_ne_signed : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_ne_signed" "noalloc" + +(* +external extract_fastpath_int40_be_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_be_unsigned" "noalloc" + +external extract_fastpath_int40_le_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_le_unsigned" "noalloc" + +external extract_fastpath_int40_ne_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_ne_unsigned" "noalloc" + +external extract_fastpath_int40_be_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_be_signed" "noalloc" + +external extract_fastpath_int40_le_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_le_signed" "noalloc" + +external extract_fastpath_int40_ne_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_ne_signed" "noalloc" + +external extract_fastpath_int48_be_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_be_unsigned" "noalloc" + +external extract_fastpath_int48_le_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_le_unsigned" "noalloc" + +external extract_fastpath_int48_ne_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_ne_unsigned" "noalloc" + +external extract_fastpath_int48_be_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_be_signed" "noalloc" + +external extract_fastpath_int48_le_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_le_signed" "noalloc" + +external extract_fastpath_int48_ne_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_ne_signed" "noalloc" + +external extract_fastpath_int56_be_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_be_unsigned" "noalloc" + +external extract_fastpath_int56_le_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_le_unsigned" "noalloc" + +external extract_fastpath_int56_ne_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_ne_unsigned" "noalloc" + +external extract_fastpath_int56_be_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_be_signed" "noalloc" + +external extract_fastpath_int56_le_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_le_signed" "noalloc" + +external extract_fastpath_int56_ne_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_ne_signed" "noalloc" +*) + +external extract_fastpath_int64_be_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_be_unsigned" "noalloc" + +external extract_fastpath_int64_le_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_le_unsigned" "noalloc" + +external extract_fastpath_int64_ne_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_ne_unsigned" "noalloc" + +external extract_fastpath_int64_be_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_be_signed" "noalloc" + +external extract_fastpath_int64_le_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_le_signed" "noalloc" + +external extract_fastpath_int64_ne_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_ne_signed" "noalloc" + (*----------------------------------------------------------------------*) (* Constructor functions. *) diff --git a/bitstring.mli b/bitstring.mli index 0b77cce..1e271f5 100644 --- a/bitstring.mli +++ b/bitstring.mli @@ -872,10 +872,6 @@ val debug : bool ref (* 'extract' functions are used in bitmatch statements. *) -val extract_bitstring : string -> int -> int -> int -> bitstring - -val extract_remainder : string -> int -> int -> bitstring - val extract_bit : string -> int -> int -> int -> bool val extract_char_unsigned : string -> int -> int -> int -> int @@ -904,6 +900,94 @@ val extract_int64_ne_unsigned : string -> int -> int -> int -> int64 val extract_int64_ee_unsigned : endian -> string -> int -> int -> int -> int64 +external extract_fastpath_int16_be_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_be_unsigned" "noalloc" + +external extract_fastpath_int16_le_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_le_unsigned" "noalloc" + +external extract_fastpath_int16_ne_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_ne_unsigned" "noalloc" + +external extract_fastpath_int16_be_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_be_signed" "noalloc" + +external extract_fastpath_int16_le_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_le_signed" "noalloc" + +external extract_fastpath_int16_ne_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int16_ne_signed" "noalloc" + +(* +external extract_fastpath_int24_be_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_be_unsigned" "noalloc" + +external extract_fastpath_int24_le_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_le_unsigned" "noalloc" + +external extract_fastpath_int24_ne_unsigned : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_ne_unsigned" "noalloc" + +external extract_fastpath_int24_be_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_be_signed" "noalloc" + +external extract_fastpath_int24_le_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_le_signed" "noalloc" + +external extract_fastpath_int24_ne_signed : string -> int -> int = "ocaml_bitstring_extract_fastpath_int24_ne_signed" "noalloc" +*) + +external extract_fastpath_int32_be_unsigned : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_be_unsigned" "noalloc" + +external extract_fastpath_int32_le_unsigned : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_le_unsigned" "noalloc" + +external extract_fastpath_int32_ne_unsigned : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_ne_unsigned" "noalloc" + +external extract_fastpath_int32_be_signed : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_be_signed" "noalloc" + +external extract_fastpath_int32_le_signed : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_le_signed" "noalloc" + +external extract_fastpath_int32_ne_signed : string -> int -> int32 -> int32 = "ocaml_bitstring_extract_fastpath_int32_ne_signed" "noalloc" + +(* +external extract_fastpath_int40_be_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_be_unsigned" "noalloc" + +external extract_fastpath_int40_le_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_le_unsigned" "noalloc" + +external extract_fastpath_int40_ne_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_ne_unsigned" "noalloc" + +external extract_fastpath_int40_be_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_be_signed" "noalloc" + +external extract_fastpath_int40_le_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_le_signed" "noalloc" + +external extract_fastpath_int40_ne_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int40_ne_signed" "noalloc" + +external extract_fastpath_int48_be_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_be_unsigned" "noalloc" + +external extract_fastpath_int48_le_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_le_unsigned" "noalloc" + +external extract_fastpath_int48_ne_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_ne_unsigned" "noalloc" + +external extract_fastpath_int48_be_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_be_signed" "noalloc" + +external extract_fastpath_int48_le_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_le_signed" "noalloc" + +external extract_fastpath_int48_ne_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int48_ne_signed" "noalloc" + +external extract_fastpath_int56_be_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_be_unsigned" "noalloc" + +external extract_fastpath_int56_le_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_le_unsigned" "noalloc" + +external extract_fastpath_int56_ne_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_ne_unsigned" "noalloc" + +external extract_fastpath_int56_be_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_be_signed" "noalloc" + +external extract_fastpath_int56_le_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_le_signed" "noalloc" + +external extract_fastpath_int56_ne_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int56_ne_signed" "noalloc" +*) + +external extract_fastpath_int64_be_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_be_unsigned" "noalloc" + +external extract_fastpath_int64_le_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_le_unsigned" "noalloc" + +external extract_fastpath_int64_ne_unsigned : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_ne_unsigned" "noalloc" + +external extract_fastpath_int64_be_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_be_signed" "noalloc" + +external extract_fastpath_int64_le_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_le_signed" "noalloc" + +external extract_fastpath_int64_ne_signed : string -> int -> int64 -> int64 = "ocaml_bitstring_extract_fastpath_int64_ne_signed" "noalloc" + (* 'construct' functions are used in BITSTRING constructors. *) val construct_bit : Buffer.t -> bool -> int -> exn -> unit diff --git a/bitstring_c.c b/bitstring_c.c index 6faba8b..5570f05 100644 --- a/bitstring_c.c +++ b/bitstring_c.c @@ -23,20 +23,94 @@ * certain very frequently used functions. */ +#include + #include #include +#include +#include #include #include -/* Return a mask of 0-31 bits wide. */ -CAMLprim value -ocaml_bitstring_I_mask (value bitsv) -{ - int bits = Int_val (bitsv); - - if (bits <= 31) - return Val_int ((1 << bits) - 1); - else - caml_invalid_argument ("Bitstring.I.mask"); -} +/* Fastpath functions. These are used in the common case for reading + * ints where the following conditions are known to be true: + * (a) the int size is a whole number of bytes (eg. 16, 24, 32, etc bits) + * (b) the access in the match is byte-aligned + * (c) the access in the underlying bitstring is byte-aligned + * + * These functions are all "noalloc" meaning they must not perform + * any OCaml allocations. For this reason, when the function returns + * an int32 or int64, the OCaml code passes in the pre-allocated pointer + * to the return value. + * + * The final offset in the string is calculated by the OCaml (caller) + * code. All we need to do is to read the string+offset and byteswap, + * sign-extend as necessary. + * + * There is one function for every combination of: + * (i) int size: 16, 32, 64 bits + * (ii) endian: bigendian, littleendian, nativeendian + * (iii) signed and unsigned + * + * XXX Future work: Expand this to 24, 40, 48, 56 bits. This + * requires some extra work because sign-extension won't "just happen". + */ + +#ifdef WORDS_BIGENDIAN +#define swap_be(size,v) +#define swap_le(size,v) v = bswap_##size (v) +#define swap_ne(size,v) +#else +#define swap_be(size,v) v = bswap_##size (v) +#define swap_le(size,v) +#define swap_ne(size,v) +#endif + +#define fastpath1(size,endian,signed,type) \ + CAMLprim value \ + ocaml_bitstring_extract_fastpath_int##size##_##endian##_##signed \ + (value strv, value offv) \ + { \ + type *ptr = (type *) ((void *) String_val (strv) + Int_val (offv)); \ + type r; \ + r = *ptr; \ + swap_##endian(size,r); \ + return Val_int (r); \ + } + +fastpath1(16,be,unsigned,uint16_t) +fastpath1(16,le,unsigned,uint16_t) +fastpath1(16,ne,unsigned,uint16_t) +fastpath1(16,be,signed,int16_t) +fastpath1(16,le,signed,int16_t) +fastpath1(16,ne,signed,int16_t) + +/* XXX This probably doesn't work on ARCH_ALIGN_INT64 platforms. */ + +#define fastpath2(size,endian,signed,type,rval) \ + CAMLprim value \ + ocaml_bitstring_extract_fastpath_int##size##_##endian##_##signed \ + (value strv, value offv, value rv) \ + { \ + type *ptr = (type *) ((void *) String_val (strv) + Int_val (offv)); \ + type r; \ + r = *ptr; \ + swap_##endian(size,r); \ + rval(rv) = r; \ + return rv; \ + } + +fastpath2(32,be,unsigned,uint32_t,Int32_val) +fastpath2(32,le,unsigned,uint32_t,Int32_val) +fastpath2(32,ne,unsigned,uint32_t,Int32_val) +fastpath2(32,be,signed,int32_t,Int32_val) +fastpath2(32,le,signed,int32_t,Int32_val) +fastpath2(32,ne,signed,int32_t,Int32_val) + +fastpath2(64,be,unsigned,uint64_t,Int64_val) +fastpath2(64,le,unsigned,uint64_t,Int64_val) +fastpath2(64,ne,unsigned,uint64_t,Int64_val) +fastpath2(64,be,signed,int64_t,Int64_val) +fastpath2(64,le,signed,int64_t,Int64_val) +fastpath2(64,ne,signed,int64_t,Int64_val) diff --git a/byteswap.in.h b/byteswap.in.h new file mode 100644 index 0000000..5e4652e --- /dev/null +++ b/byteswap.in.h @@ -0,0 +1,54 @@ +/* byteswap.h - Byte swapping + Copyright (C) 2005, 2007 Free Software Foundation, Inc. + Written by Oskar Liljeblad , 2005. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* NB: + + This file is from Gnulib, and in accordance with the convention + there, the real license of this file comes from the module + definition. It is really LGPLv2+. + + - RWMJ. 2008/08/23 +*/ + +#ifndef _GL_BYTESWAP_H +#define _GL_BYTESWAP_H + +/* Given an unsigned 16-bit argument X, return the value corresponding to + X with reversed byte order. */ +#define bswap_16(x) ((((x) & 0x00FF) << 8) | \ + (((x) & 0xFF00) >> 8)) + +/* Given an unsigned 32-bit argument X, return the value corresponding to + X with reversed byte order. */ +#define bswap_32(x) ((((x) & 0x000000FF) << 24) | \ + (((x) & 0x0000FF00) << 8) | \ + (((x) & 0x00FF0000) >> 8) | \ + (((x) & 0xFF000000) >> 24)) + +/* Given an unsigned 64-bit argument X, return the value corresponding to + X with reversed byte order. */ +#define bswap_64(x) ((((x) & 0x00000000000000FFULL) << 56) | \ + (((x) & 0x000000000000FF00ULL) << 40) | \ + (((x) & 0x0000000000FF0000ULL) << 24) | \ + (((x) & 0x00000000FF000000ULL) << 8) | \ + (((x) & 0x000000FF00000000ULL) >> 8) | \ + (((x) & 0x0000FF0000000000ULL) >> 24) | \ + (((x) & 0x00FF000000000000ULL) >> 40) | \ + (((x) & 0xFF00000000000000ULL) >> 56)) + +#endif /* _GL_BYTESWAP_H */ diff --git a/configure.ac b/configure.ac index 8c93229..c6d2ac7 100644 --- a/configure.ac +++ b/configure.ac @@ -30,13 +30,26 @@ test "x$U" != "x" && AC_MSG_ERROR(Compiler not ANSI compliant) AC_PROG_CC_C_O dnl Check for native endianness. -AC_C_BIGENDIAN( - [NATIVEENDIAN=BigEndian], - [NATIVEENDIAN=LittleEndian], +AC_C_BIGENDIAN(,, [AC_MSG_ERROR([Machine endianness could not be determined])] ) +if test "x$WORDS_BIGENDIAN" = "x"; then + NATIVEENDIAN=LittleEndian +else + NATIVEENDIAN=BigEndian +fi AC_SUBST(NATIVEENDIAN) +dnl Create byteswap.h if the system doesn't have this file. +dnl Written by Oskar Liljeblad. +dnl From gnulib, license is LGPLv2+. +AC_CHECK_HEADERS([byteswap.h], [ + BYTESWAP_H='' +], [ + BYTESWAP_H='byteswap.h' +]) +AC_SUBST(BYTESWAP_H) + dnl Check for basic OCaml environment & findlib. AC_PROG_OCAML AC_PROG_FINDLIB diff --git a/pa_bitstring.ml b/pa_bitstring.ml index 5e5582c..71eabed 100644 --- a/pa_bitstring.ml +++ b/pa_bitstring.ml @@ -616,40 +616,84 @@ let output_bitmatch _loc bs cases = build_bitstring_call _loc ExtractFunc None endian signed in let expr = - match t, flen_is_const, field_offset_aligned with - (* Very common cases: int field, constant 8/16/32/64 bit length, - * aligned to the match at a known offset. We still have to - * check if the bitstring is aligned (can only be known at - * runtime) but we may be able to directly access the - * bytes in the string. - *) - | P.Int, Some ((8(*|16|32|64*)) as i), Some field_byte_offset -> + match t, flen_is_const, field_offset_aligned, endian, signed with + (* Very common cases: int field, constant 8/16/32/64 bit + * length, aligned to the match at a known offset. We + * still have to check if the bitstring is aligned (can only + * be known at runtime) but we may be able to directly access + * the bytes in the string. + *) + | P.Int, Some 8, Some field_byte_offset, _, _ -> + let extract_fn = int_extract_const 8 endian signed in + + (* The fast-path code when everything is aligned. *) + let fastpath = + <:expr< + let o = + ($lid:original_off$ lsr 3) + $`int:field_byte_offset$ in + Char.code (String.unsafe_get $lid:data$ o) + >> in + + <:expr< + if $lid:len$ >= 8 then ( + let v = + if $lid:off_aligned$ then + $fastpath$ + else + $extract_fn$ $lid:data$ $lid:off$ $lid:len$ 8 in + let $lid:off$ = $lid:off$ + 8 + and $lid:len$ = $lid:len$ - 8 in + match v with $fpatt$ when true -> $expr$ | _ -> () + ) + >> + + | P.Int, Some ((16|32|64) as i), + Some field_byte_offset, (P.ConstantEndian _ as endian), signed -> let extract_fn = int_extract_const i endian signed in - let o = gensym "off" and v = gensym "val" in (* The fast-path code when everything is aligned. *) let fastpath = + let fastpath_call = + let endian = match endian with + | P.ConstantEndian BigEndian -> "be" + | P.ConstantEndian LittleEndian -> "le" + | P.ConstantEndian NativeEndian -> "ne" + | P.EndianExpr _ -> assert false in + let signed = if signed then "signed" else "unsigned" in + let name = + sprintf "extract_fastpath_int%d_%s_%s" i endian signed in + match i with + | 16 -> + <:expr< Bitstring.$lid:name$ $lid:data$ o >> + | 32 -> + <:expr< Bitstring.$lid:name$ $lid:data$ o 0l >> + | 64 -> + <:expr< Bitstring.$lid:name$ $lid:data$ o 0L >> + | _ -> assert false in <:expr< - let $lid:o$ = ($lid:original_off$ lsr 3) + - $`int:field_byte_offset$ in - Char.code (String.unsafe_get $lid:data$ $lid:o$) + (* Starting offset within the string. *) + let o = + ($lid:original_off$ lsr 3) + $`int:field_byte_offset$ in + $fastpath_call$ >> in + let slowpath = + <:expr< + $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $`int:i$ + >> in + <:expr< if $lid:len$ >= $`int:i$ then ( - let $lid:v$ = - if $lid:off_aligned$ then - $fastpath$ - else - $extract_fn$ $lid:data$ $lid:off$ $lid:len$ $`int:i$ in + let v = + if $lid:off_aligned$ then $fastpath$ else $slowpath$ in let $lid:off$ = $lid:off$ + $`int:i$ and $lid:len$ = $lid:len$ - $`int:i$ in - match $lid:v$ with $fpatt$ when true -> $expr$ | _ -> () + match v with $fpatt$ when true -> $expr$ | _ -> () ) >> (* Common case: int field, constant flen *) - | P.Int, Some i, _ when i > 0 && i <= 64 -> + | P.Int, Some i, _, _, _ when i > 0 && i <= 64 -> let extract_fn = int_extract_const i endian signed in let v = gensym "val" in <:expr< @@ -662,14 +706,14 @@ let output_bitmatch _loc bs cases = ) >> - | P.Int, Some _, _ -> + | P.Int, Some _, _, _, _ -> fail "length of int field must be [1..64]" (* Int field, non-const flen. We have to test the range of * the field at runtime. If outside the range it's a no-match * (not an error). *) - | P.Int, None, _ -> + | P.Int, None, _, _, _ -> let extract_fn = int_extract endian signed in let v = gensym "val" in <:expr< @@ -682,17 +726,52 @@ let output_bitmatch _loc bs cases = ) >> + (* String, constant flen > 0. + * The field is at a known byte-aligned offset so we may + * be able to optimize the substring extraction. + *) + | P.String, Some i, Some field_byte_offset, _, _ + when i > 0 && i land 7 = 0 -> + let fastpath = + <:expr< + (* Starting offset within the string. *) + let o = + ($lid:original_off$ lsr 3) + $`int:field_byte_offset$ in + String.sub $lid:data$ o $`int:(i lsr 3)$ + >> in + + let slowpath = + <:expr< + Bitstring.string_of_bitstring + ($lid:data$, $lid:off$, $`int:i$) + >> in + + let cond = + <:expr< + if $lid:off_aligned$ then $fastpath$ else $slowpath$ + >> in + + <:expr< + if $lid:len$ >= $`int:i$ then ( + let str = $cond$ in + let $lid:off$ = $lid:off$ + $`int:i$ + and $lid:len$ = $lid:len$ - $`int:i$ in + match str with + | $fpatt$ when true -> $expr$ + | _ -> () + ) + >> + (* String, constant flen > 0. *) - | P.String, Some i, _ when i > 0 && i land 7 = 0 -> - let bs = gensym "bs" in + | P.String, Some i, None, _, _ when i > 0 && i land 7 = 0 -> <:expr< if $lid:len$ >= $`int:i$ then ( - let $lid:bs$ = - Bitstring.extract_bitstring $lid:data$ $lid:off$ $lid:len$ - $`int:i$ in + let str = + Bitstring.string_of_bitstring + ($lid:data$, $lid:off$, $`int:i$) in let $lid:off$ = $lid:off$ + $`int:i$ and $lid:len$ = $lid:len$ - $`int:i$ in - match Bitstring.string_of_bitstring $lid:bs$ with + match str with | $fpatt$ when true -> $expr$ | _ -> () ) @@ -700,33 +779,36 @@ let output_bitmatch _loc bs cases = (* String, constant flen = -1, means consume all the * rest of the input. + * XXX It should be possible to optimize this for known byte + * offset, but the optimization is tricky because the end/length + * of the string may not be byte-aligned. *) - | P.String, Some i, _ when i = -1 -> - let bs = gensym "bs" in + | P.String, Some i, _, _, _ when i = -1 -> + let str = gensym "str" in + <:expr< - let $lid:bs$ = - Bitstring.extract_remainder $lid:data$ $lid:off$ $lid:len$ in + let $lid:str$ = + Bitstring.string_of_bitstring + ($lid:data$, $lid:off$, $lid:len$) in let $lid:off$ = $lid:off$ + $lid:len$ in let $lid:len$ = 0 in - match Bitstring.string_of_bitstring $lid:bs$ with + match $lid:str$ with | $fpatt$ when true -> $expr$ | _ -> () >> - | P.String, Some _, _ -> + | P.String, Some _, _, _, _ -> fail "length of string must be > 0 and a multiple of 8, or the special value -1" (* String field, non-const flen. We check the flen is > 0 * and a multiple of 8 (-1 is not allowed here), at runtime. *) - | P.String, None, _ -> + | P.String, None, _, _, _ -> let bs = gensym "bs" in <:expr< if $flen$ >= 0 && $flen$ <= $lid:len$ && $flen$ land 7 = 0 then ( - let $lid:bs$ = - Bitstring.extract_bitstring - $lid:data$ $lid:off$ $lid:len$ $flen$ in + let $lid:bs$ = ($lid:data$, $lid:off$, $flen$) in let $lid:off$ = $lid:off$ + $flen$ and $lid:len$ = $lid:len$ - $flen$ in match Bitstring.string_of_bitstring $lid:bs$ with @@ -739,7 +821,7 @@ let output_bitmatch _loc bs cases = * At the moment all we can do is assign the bitstring to an * identifier. *) - | P.Bitstring, Some i, _ when i >= 0 -> + | P.Bitstring, Some i, _, _, _ when i >= 0 -> let ident = match fpatt with | <:patt< $lid:ident$ >> -> ident @@ -748,9 +830,7 @@ let output_bitmatch _loc bs cases = fail "cannot compare a bitstring to a constant" in <:expr< if $lid:len$ >= $`int:i$ then ( - let $lid:ident$ = - Bitstring.extract_bitstring $lid:data$ $lid:off$ $lid:len$ - $`int:i$ in + let $lid:ident$ = ($lid:data$, $lid:off$, $`int:i$) in let $lid:off$ = $lid:off$ + $`int:i$ and $lid:len$ = $lid:len$ - $`int:i$ in $expr$ @@ -760,7 +840,7 @@ let output_bitmatch _loc bs cases = (* Bitstring, constant flen = -1, means consume all the * rest of the input. *) - | P.Bitstring, Some i, _ when i = -1 -> + | P.Bitstring, Some i, _, _, _ when i = -1 -> let ident = match fpatt with | <:patt< $lid:ident$ >> -> ident @@ -768,20 +848,19 @@ let output_bitmatch _loc bs cases = | _ -> fail "cannot compare a bitstring to a constant" in <:expr< - let $lid:ident$ = - Bitstring.extract_remainder $lid:data$ $lid:off$ $lid:len$ in + let $lid:ident$ = ($lid:data$, $lid:off$, $lid:len$) in let $lid:off$ = $lid:off$ + $lid:len$ in let $lid:len$ = 0 in $expr$ >> - | P.Bitstring, Some _, _ -> + | P.Bitstring, Some _, _, _, _ -> fail "length of bitstring must be >= 0 or the special value -1" (* Bitstring field, non-const flen. We check the flen is >= 0 * (-1 is not allowed here) at runtime. *) - | P.Bitstring, None, _ -> + | P.Bitstring, None, _, _, _ -> let ident = match fpatt with | <:patt< $lid:ident$ >> -> ident @@ -790,9 +869,7 @@ let output_bitmatch _loc bs cases = fail "cannot compare a bitstring to a constant" in <:expr< if $flen$ >= 0 && $flen$ <= $lid:len$ then ( - let $lid:ident$ = - Bitstring.extract_bitstring $lid:data$ $lid:off$ $lid:len$ - $flen$ in + let $lid:ident$ = ($lid:data$, $lid:off$, $flen$) in let $lid:off$ = $lid:off$ + $flen$ and $lid:len$ = $lid:len$ - $flen$ in $expr$ -- 1.8.3.1