(* COCANWIKI - a wiki written in Objective CAML.
 * Written by Richard W.M. Jones <rich@merjis.com>.
 * Copyright (C) 2004 Merjis Ltd.
 * $Id: cocanwiki_strings.ml,v 1.4 2006/08/16 15:27:02 rich Exp $
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 *)

open ExtString

(* UTF-8-safe lowercase/uppercase functions.  The ones in the stdlib
 * are not safe.
 *)
let lowercase str =
  let n = String.length str in
  let str' = String.create n in
  for i = 0 to n-1 do
    let c = str.[i] in
    if c >= 'A' && c <= 'Z' then
      str'.[i] <- Char.unsafe_chr (Char.code c + 32)
    else
      str'.[i] <- c
  done;
  str'

let uppercase str =
  let n = String.length str in
  let str' = String.create n in
  for i = 0 to n-1 do
    let c = str.[i] in
    if c >= 'a' && c <= 'z' then
      str'.[i] <- Char.unsafe_chr (Char.code c - 32)
    else
      str'.[i] <- c
  done;
  str'

(* Truncate a string to a maximum of n characters, in a UTF-8-safe way. *)
let truncate n str =
  let len = UTF8.length str in
  if len < n then str
  else (
    let bytes = UTF8.nth str n in
    String.sub str 0 (bytes-1)
  )

(* We used to have functions like 'isalpha' here.  These are not
 * safe for UTF-8 strings, so I have examined the code and removed
 * any references.
 *)
let isspace c =
  let c = UChar.code c in
  c = 32 || (c >= 9 && c <= 13) (* tab through to carriage return *)
      || c = 0x3000 (* Unicode CJK IDEOGRAPHIC SPACE (double-width) *)

let isprint c =
  let c = UChar.code c in
  (* XXX rather naive *)
  (c >= 32 && c < 127) || (c >= 160 && c != 0x3000)

let iswesterndigit c =
  let c = UChar.code c in
  c >= 48 && c <= 57 (* western digits *)

let iswesternalpha c =
  let c = UChar.code c in
  (c >= 97 && c <= 122)
  || (c >= 65 && c <= 90) (* 'a' - 'z' or 'A' - 'Z' *)

let iswesternalnum c =
  iswesterndigit c || iswesternalpha c

(* 'iswebsafe' means the character is a letter or number.
 * XXX This function is wrong.  Should use Camomile's UCharInfo
 * to get character classes, but currently Camomile is incompatible
 * with ExtLib, and I need ExtLib more.
 *)
let iswebsafe c =
  iswesternalnum c || (
    not (isspace c) &&
      let c = UChar.code c in
      c >= 160
  )

(* Trim the left part of a string of any whitespace. *)
let triml str =
  let i = ref 0 in
  let n = String.length str in (* length in bytes *)
  while !i < n && isspace (UTF8.look str !i); do
    i := UTF8.next str !i
  done;
  let i = !i in
  if i = 0 then str
  else String.sub str i (n-i)

(* Trim the right part of a string of any whitespace. *)
let trimr str =
  let n = String.length str in (* length in bytes *)
  if n = 0 then str else (
    let n = UTF8.prev str n in
    let n = ref n in
    while !n >= 0 && isspace (UTF8.look str !n); do
      n := UTF8.prev str !n
    done;
    let n = !n in (* n points to the first non whitespace char *)
    if n < 0 then "" else (
      let n = UTF8.next str n in
      if n = String.length str then str
      else String.sub str 0 n
    )
  )

(* Trim whitespace at the beginning and end of a string. *)
let trim str =
  trimr (triml str)

(* Is the string just whitespace? *)
let string_is_whitespace str =
  let n = String.length str in (* length in bytes *)
  let rec loop i =
    if i >= n then true
    else (
      let c = UTF8.look str i in
      if not (isspace c) then false
      else (
	let i = UTF8.next str i in
	loop i
      )
    )
  in
  loop 0