--- /dev/null
+(* COCANWIKI - a wiki written in Objective CAML.
+ * Written by Richard W.M. Jones <rich@merjis.com>.
+ * Copyright (C) 2004 Merjis Ltd.
+ * $Id: tarpit.ml,v 1.1 2006/08/09 15:35:47 rich Exp $
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *)
+
+open Apache
+open Registry
+open Cgi
+open Printf
+
+open ExtString
+open ExtList
+
+open Cocanwiki
+
+(* See note for tarpit_form.ml. *)
+
+let reserved = [
+ "0.";
+ "10.";
+ "127.";
+ "169.254.";
+ "172.16."; (* XXX *)
+ "192.0.2.";
+ "192.168.";
+ "224."; (* XXX *)
+]
+
+let tlds = [ "com"; "org"; "net"; "co.uk" ]
+
+(* Return n random dictionary words. n should be a small number. *)
+let random_word =
+ let dictfile = "/usr/share/dict/words" in
+ try
+ let size = (Unix.stat dictfile).Unix.st_size in
+ let chan = open_in dictfile in
+ fun state ->
+ (* Seek to a random place in the file, then move backwards until
+ * we find a \n character or beginning of file.
+ *)
+ let rec loop r =
+ seek_in chan r;
+ if r > 0 && input_char chan <> '\n' then loop (r-1)
+ in
+ let r = Random.State.int state size in
+ loop r;
+
+ (* Read the next word (= line). *)
+ let word = input_line chan in
+ word
+ with
+ Unix.Unix_error _ | Sys_error _ ->
+ (* No dictionary file found, so return some random strings. *)
+ let random_char state =
+ Char.chr (Char.code 'a' + Random.State.int state 26)
+ in
+ fun state ->
+ let n = Random.State.int state 10 in
+ let s = String.create n in
+ for i = 0 to n-1 do
+ s.[i] <- random_char state
+ done;
+ s
+
+let rec random_words state = function
+ | n when n <= 0 -> []
+ | n -> random_word state :: random_words state (n-1)
+
+let run r (q : cgi) _ hostid _ _ =
+ (try
+ let ip = Connection.remote_ip (Request.connection r) in
+
+ (* Make sure the IP address isn't a reserved one. *)
+ if List.exists (String.starts_with ip) reserved then
+ raise Not_found;
+
+ prerr_endline ("tarpit: Block spambot at IP address " ^ ip)
+ with Not_found -> ()
+ );
+
+ (* Print a header. *)
+ let header = "<html><body>\r\n" in
+ q#header ~content_type:"text/html" ();
+ print_string r header;
+
+ let seed = q#param_all "t" in
+ let seed = List.take 10 seed in
+ let seed = List.map int_of_string seed in
+ let seed = if seed = [] then [0] else seed in
+ let seed = Array.of_list seed in
+ let state = Random.State.make seed in
+
+ (* Feed some random email addresses and links. *)
+ for i = 1 to 100_000 do
+ (match Random.State.int state 3 with
+ | 0 ->
+ let i = 1 + Random.State.int state 10 in
+ let words = String.concat " " (random_words state i) in
+ let seed = Random.State.int state 999_999_999 in
+ let link =
+ sprintf "<a href=\"/_bin/tarpit.cmo?t=%d\">%s</a>\n" seed words in
+ print_string r link
+ | 1 ->
+ (* Generate an email address, but try hard not to make it
+ * a real email address ...
+ *)
+ let w1,w2,w3,w4 =
+ match random_words state 4 with
+ | [a;b;c;d] -> a,b,c,d
+ | _ -> assert false in
+ let tld =
+ List.nth tlds (Random.State.int state (List.length tlds-1)) in
+ let i = Random.State.int state 2000 in
+ let email = sprintf "%s%s%d@%s%s.%s" w1 w2 i w3 w4 tld in
+ let link =
+ sprintf "<a href=\"mailto:%s\">%s</a>\n" email email in
+ print_string r link
+ | 2 ->
+ let i = 1 + Random.State.int state 10 in
+ let words = String.concat " " (random_words state i) in
+ let words = words ^ "\n" in
+ print_string r words
+ | _ -> assert false);
+
+ (* Our aim is also to consume as much of the spambot's time and
+ * resources.
+ *)
+ ignore (Request.rflush r);
+ let r = Random.State.int state 2 in
+ Unix.sleep r
+ done
+
+let () =
+ register_script run
--- /dev/null
+(* COCANWIKI - a wiki written in Objective CAML.
+ * Written by Richard W.M. Jones <rich@merjis.com>.
+ * Copyright (C) 2004 Merjis Ltd.
+ * $Id: tarpit_form.ml,v 1.1 2006/08/09 15:35:47 rich Exp $
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *)
+
+open Apache
+open Registry
+open Cgi
+open Printf
+
+open Cocanwiki
+open Cocanwiki_template
+
+(* This page appears in robots.txt but nowhere else (unless the user
+ * chooses to link to it from the real site - which is a possibility).
+ * Badly behaved robots (typically spambots) will deliberately visit
+ * this. We feed them random email addresses and direct them to
+ * /_bin/tarpit.cmo which will print their IP address for banning.
+ *)
+let run r (q : cgi) dbh hostid _ _ =
+ let template = get_template dbh hostid "tarpit_form.html" in
+
+ let rand = Random.int 999_999_999 in
+ template#set "rand" (string_of_int rand);
+
+ q#template template
+
+let () =
+ register_script run
--- /dev/null
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<title>Spambot tarpit</title>
+<meta name="author" content="http://www.merjis.com/" />
+<link rel="stylesheet" href="::theme_css_html_tag::" type="text/css" title="Standard"/>
+<link rel="alternate stylesheet" href="/_css/easytoread.css" type="text/css" title="High contrast, big fonts"/>
+</head><body>
+
+<h1><span>Spambot tarpit</span></h1>
+
+<script type="text/javascript"><!--
+window.location = "/";
+//-->
+</script>
+
+<p>
+This page is for trapping
+<a href="http://www.turnstep.com/Spambot/">spambots</a>.
+If you have reached this page, please go to our
+<a href="/">main page</a>.
+</p>
+
+<br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/>
+
+<p>
+<a href="/_bin/tarpit.cmo?t=::rand_html_tag::">Yes,
+I am a stupid spambot and I would like to be banned from this
+site.</a>
+</p>
+
+::include(footer.html)::
+</body>
+</html>
\ No newline at end of file