- c = ' '
- (* || c = '\f' *) || c = '\n' || c = '\r' || c = '\t' (* || c = '\v' *)
-
-let isalpha c =
- c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
-
-let isdigit c =
- c >= '0' && c <= '9'
-
-let isalnum c =
- c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
-
-let islower c =
- c >= 'a' && c <= 'z'
-
-let isupper c =
- c >= 'A' && c <= 'Z'
-
-let isxdigit c =
- c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
-
-let triml ?(test = isspace) str =
+ let c = UChar.code c in
+ c = 32 || (c >= 9 && c <= 13) (* tab through to carriage return *)
+ || c = 0x3000 (* Unicode CJK IDEOGRAPHIC SPACE (double-width) *)
+
+let isprint c =
+ let c = UChar.code c in
+ (* XXX rather naive *)
+ (c >= 32 && c < 127) || (c >= 160 && c != 0x3000)
+
+let iswesterndigit c =
+ let c = UChar.code c in
+ c >= 48 && c <= 57 (* western digits *)
+
+let iswesternalpha c =
+ let c = UChar.code c in
+ (c >= 97 && c <= 122)
+ || (c >= 65 && c <= 90) (* 'a' - 'z' or 'A' - 'Z' *)
+
+let iswesternalnum c =
+ iswesterndigit c || iswesternalpha c
+
+(* 'iswebsafe' means the character is a letter or number.
+ * XXX This function is wrong. Should use Camomile's UCharInfo
+ * to get character classes, but currently Camomile is incompatible
+ * with ExtLib, and I need ExtLib more.
+ *)
+let iswebsafe c =
+ iswesternalnum c || (
+ not (isspace c) &&
+ let c = UChar.code c in
+ c >= 160
+ )
+
+(* Trim the left part of a string of any whitespace. *)
+let triml str =