From 02564d97791d14f07a9ca29c043062b41db76a24 Mon Sep 17 00:00:00 2001 From: rich Date: Wed, 9 Aug 2006 15:35:46 +0000 Subject: [PATCH] Sick of spambots. Added a tarpit as standard to our CMS. --- html/robots.txt | 1 + scripts/tarpit.ml | 150 +++++++++++++++++++++++++++++++++++++++++++++ scripts/tarpit_form.ml | 45 ++++++++++++++ templates/tarpit_form.html | 34 ++++++++++ 4 files changed, 230 insertions(+) create mode 100644 scripts/tarpit.ml create mode 100644 scripts/tarpit_form.ml create mode 100644 templates/tarpit_form.html diff --git a/html/robots.txt b/html/robots.txt index 913627e..563ec29 100644 --- a/html/robots.txt +++ b/html/robots.txt @@ -1,2 +1,3 @@ User-agent: * +Disallow: /_bin/tarpit_form.cmo Disallow: /sandbox diff --git a/scripts/tarpit.ml b/scripts/tarpit.ml new file mode 100644 index 0000000..cda79d8 --- /dev/null +++ b/scripts/tarpit.ml @@ -0,0 +1,150 @@ +(* COCANWIKI - a wiki written in Objective CAML. + * Written by Richard W.M. Jones . + * Copyright (C) 2004 Merjis Ltd. + * $Id: tarpit.ml,v 1.1 2006/08/09 15:35:47 rich Exp $ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + *) + +open Apache +open Registry +open Cgi +open Printf + +open ExtString +open ExtList + +open Cocanwiki + +(* See note for tarpit_form.ml. *) + +let reserved = [ + "0."; + "10."; + "127."; + "169.254."; + "172.16."; (* XXX *) + "192.0.2."; + "192.168."; + "224."; (* XXX *) +] + +let tlds = [ "com"; "org"; "net"; "co.uk" ] + +(* Return n random dictionary words. n should be a small number. *) +let random_word = + let dictfile = "/usr/share/dict/words" in + try + let size = (Unix.stat dictfile).Unix.st_size in + let chan = open_in dictfile in + fun state -> + (* Seek to a random place in the file, then move backwards until + * we find a \n character or beginning of file. + *) + let rec loop r = + seek_in chan r; + if r > 0 && input_char chan <> '\n' then loop (r-1) + in + let r = Random.State.int state size in + loop r; + + (* Read the next word (= line). *) + let word = input_line chan in + word + with + Unix.Unix_error _ | Sys_error _ -> + (* No dictionary file found, so return some random strings. *) + let random_char state = + Char.chr (Char.code 'a' + Random.State.int state 26) + in + fun state -> + let n = Random.State.int state 10 in + let s = String.create n in + for i = 0 to n-1 do + s.[i] <- random_char state + done; + s + +let rec random_words state = function + | n when n <= 0 -> [] + | n -> random_word state :: random_words state (n-1) + +let run r (q : cgi) _ hostid _ _ = + (try + let ip = Connection.remote_ip (Request.connection r) in + + (* Make sure the IP address isn't a reserved one. *) + if List.exists (String.starts_with ip) reserved then + raise Not_found; + + prerr_endline ("tarpit: Block spambot at IP address " ^ ip) + with Not_found -> () + ); + + (* Print a header. *) + let header = "\r\n" in + q#header ~content_type:"text/html" (); + print_string r header; + + let seed = q#param_all "t" in + let seed = List.take 10 seed in + let seed = List.map int_of_string seed in + let seed = if seed = [] then [0] else seed in + let seed = Array.of_list seed in + let state = Random.State.make seed in + + (* Feed some random email addresses and links. *) + for i = 1 to 100_000 do + (match Random.State.int state 3 with + | 0 -> + let i = 1 + Random.State.int state 10 in + let words = String.concat " " (random_words state i) in + let seed = Random.State.int state 999_999_999 in + let link = + sprintf "%s\n" seed words in + print_string r link + | 1 -> + (* Generate an email address, but try hard not to make it + * a real email address ... + *) + let w1,w2,w3,w4 = + match random_words state 4 with + | [a;b;c;d] -> a,b,c,d + | _ -> assert false in + let tld = + List.nth tlds (Random.State.int state (List.length tlds-1)) in + let i = Random.State.int state 2000 in + let email = sprintf "%s%s%d@%s%s.%s" w1 w2 i w3 w4 tld in + let link = + sprintf "%s\n" email email in + print_string r link + | 2 -> + let i = 1 + Random.State.int state 10 in + let words = String.concat " " (random_words state i) in + let words = words ^ "\n" in + print_string r words + | _ -> assert false); + + (* Our aim is also to consume as much of the spambot's time and + * resources. + *) + ignore (Request.rflush r); + let r = Random.State.int state 2 in + Unix.sleep r + done + +let () = + register_script run diff --git a/scripts/tarpit_form.ml b/scripts/tarpit_form.ml new file mode 100644 index 0000000..630b125 --- /dev/null +++ b/scripts/tarpit_form.ml @@ -0,0 +1,45 @@ +(* COCANWIKI - a wiki written in Objective CAML. + * Written by Richard W.M. Jones . + * Copyright (C) 2004 Merjis Ltd. + * $Id: tarpit_form.ml,v 1.1 2006/08/09 15:35:47 rich Exp $ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + *) + +open Apache +open Registry +open Cgi +open Printf + +open Cocanwiki +open Cocanwiki_template + +(* This page appears in robots.txt but nowhere else (unless the user + * chooses to link to it from the real site - which is a possibility). + * Badly behaved robots (typically spambots) will deliberately visit + * this. We feed them random email addresses and direct them to + * /_bin/tarpit.cmo which will print their IP address for banning. + *) +let run r (q : cgi) dbh hostid _ _ = + let template = get_template dbh hostid "tarpit_form.html" in + + let rand = Random.int 999_999_999 in + template#set "rand" (string_of_int rand); + + q#template template + +let () = + register_script run diff --git a/templates/tarpit_form.html b/templates/tarpit_form.html new file mode 100644 index 0000000..0fd6f34 --- /dev/null +++ b/templates/tarpit_form.html @@ -0,0 +1,34 @@ + + + +Spambot tarpit + + + + + +

Spambot tarpit

+ + + +

+This page is for trapping +spambots. +If you have reached this page, please go to our +main page. +

+ +




















































































































































































































































































































































+ +

+Yes, +I am a stupid spambot and I would like to be banned from this +site. +

+ +::include(footer.html):: + + \ No newline at end of file -- 1.8.3.1