1 (* Copy a whole host (site). This can also copy between databases and
4 * Use 'copy_host --help' for usage.
6 * $Id: copy_host.ml,v 1.3 2006/09/11 10:01:07 rich Exp $
14 "copy_host can be used to copy/duplicate a whole host (site).
15 It can also copy hosts between databases and database server.
17 Please read the instructions below carefully before attempting
22 Duplicate a host on the same (local) database server:
24 copy_host -shost mysite.example.com -dhost mycopy.example.com
26 Copy a host from the local database server to another. The copy
27 will have the same canonical and alternate hostnames:
29 copy_host -short mysite.example.com -ddbhost anotherdb.example.com
33 'host': The term used by COCANWIKI to mean a site. Each host has
34 a single row in the 'hosts' table in the database, and a single
35 unique hostid. A single COCANWIKI database instance can handle
36 an unlimited number of hosts.
38 'canonical hostname': The standard hostname for a host. See the
39 hosts.canonical_hostname column.
41 'hostname': Any of the hostnames permitted for a host. Most sites
42 will only have one, the canonical_hostname, but some will have
43 several names, eg. 'example.com' and 'www.example.com'. See
44 the 'hostnames' table.
46 'source ***', 'destination ***': When copying hosts, you copy
47 from the source whatever to the destination whatever.
49 'database hostname': The name of the server running the PostgreSQL
50 database. When copying a host between databases, you have a
51 source database hostname and a destination database hostname.
53 'database name': The name of the PostgreSQL database (not the
54 name of the server, but the name of the database). Usually
55 'cocanwiki'. Use the command 'psql -l' to list database
58 SELECTING THE SOURCE AND DESTINATION HOSTS
60 In general, any parameter beginning with '-s***' refers to the
61 source and with '-d***' to the destination.
63 The '-shost hostname' parameter is required, and it selects the
64 particular host which you want to copy/duplicate. You may give
65 either the canonical hostname or one of the alternate hostnames,
66 since either uniquely identifies the source host you want to
69 '-dhost hostname[,...]' is an optional parameter which specifies
70 the destination hostnames (canonical and alternates).
72 If not given, then the same hostname(s) are used as the source.
73 This only makes sense when copying a host between database
74 instances, since a single database instance cannot contain
75 multiple hosts with the same hostnames.
77 If given with a single hostname, then that hostname is used
78 as the canonical hostname for the copy.
80 If given with multiple (comma-separated) hostnames, then
81 the first one is the canonical hostname and the subsequent
82 ones are the alternate hostnames, eg:
84 -dhost www.example.com,example.com
86 SELECTING THE SOURCE AND DESTINATION DATABASE(S)
88 '-sdb***' and '-ddb***' parameters can be used to select
89 different source and destination database instances. The
90 parameters in question are:
92 -sdbhost -ddbhost Database hostname.
93 -sdbport -ddbport Database port number.
94 -sdbuser -ddbuser Database username.
95 -sdbpassword -ddbpassword Database password.
96 -sdbdatabase -ddbdatabase Database name.
98 All except the last two, database name, default to nothing
99 which means that the usual PostgreSQL method for find the value
100 is used (ie. first use environment variables like $PGHOST, then
101 use an internal default, usually the local database and user).
103 The database name defaults to 'cocanwiki'.
105 It is possible to duplicate a host on the same database instance.
106 In this case it is probably better not to specify the '-sdb***'
107 or '-ddb***' parameters, but instead use environment variables,
108 or run this program on the database server itself.
121 let sdbpassword = ref ""
122 let ddbpassword = ref ""
123 let sdbdatabase = ref "cocanwiki"
124 let ddbdatabase = ref "cocanwiki"
125 let dryrun = ref false
128 "-shost", Arg.Set_string shost,
129 "Source hostname (required).";
130 "-dhost", Arg.Set_string dhost,
131 "Destination hostname(s) (optional).";
132 "-sdbhost", Arg.Set_string sdbhost,
133 "Source database hostname (optional).";
134 "-ddbhost", Arg.Set_string ddbhost,
135 "Destination database hostname (optional).";
136 "-sdbport", Arg.Set_int sdbport,
137 "Source database port (optional).";
138 "-ddbport", Arg.Set_int ddbport,
139 "Destination database port (optional).";
140 "-sdbuser", Arg.Set_string sdbuser,
141 "Source database user (optional).";
142 "-ddbuser", Arg.Set_string ddbuser,
143 "Destination database user (optional).";
144 "-sdbpassword", Arg.Set_string sdbpassword,
145 "Source database password (optional).";
146 "-ddbpassword", Arg.Set_string ddbpassword,
147 "Destination database password (optional).";
148 "-sdbdatabase", Arg.Set_string sdbdatabase,
149 "Source database name (default: cocanwiki).";
150 "-ddbdatabase", Arg.Set_string ddbdatabase,
151 "Destination database name (default: cocanwiki).";
152 "-dryrun", Arg.Set dryrun,
153 "Rollback database changes at the end.";
156 let error _ = raise (Arg.Bad "Use --help for help.")
158 let () = Arg.parse argspec error usage
162 | "" -> failwith "-shost is required. Use --help for help."
167 | s -> String.nsplit s ","
168 let sdbhost = match !sdbhost with "" -> None | s -> Some s
169 let ddbhost = match !ddbhost with "" -> None | s -> Some s
170 let sdbport = match !sdbport with 0 -> None | p -> Some p
171 let ddbport = match !ddbport with 0 -> None | p -> Some p
172 let sdbuser = match !sdbuser with "" -> None | s -> Some s
173 let ddbuser = match !ddbuser with "" -> None | s -> Some s
174 let sdbpassword = match !sdbpassword with "" -> None | s -> Some s
175 let ddbpassword = match !ddbpassword with "" -> None | s -> Some s
176 let sdbdatabase = match !sdbdatabase with "" -> None | s -> Some s
177 let ddbdatabase = match !ddbdatabase with "" -> None | s -> Some s
180 (* Get column names from a table. *)
181 let columns dbh table_name =
182 let query = "select * from " ^ table_name ^ " order by 1" in
183 let name = "columns" in
184 PGOCaml.prepare dbh ~query ~name ();
186 match PGOCaml.describe_statement dbh ~name () with
188 List.map (fun { PGOCaml.name = name } -> name) results
189 | _, None -> assert false in
190 PGOCaml.close_statement dbh ~name ();
193 (* 'select * on table where ...' using the low level interface. *)
194 let select_all dbh table_name where_clause =
195 let query = "select * from " ^ table_name ^
196 (match where_clause with
198 | where_clause -> " where " ^ where_clause) ^
200 let name = "selectall" in
201 PGOCaml.prepare dbh ~query ~name ();
203 match PGOCaml.describe_statement dbh ~name () with
205 List.map (fun { PGOCaml.name = name } -> name) results
206 | _, None -> assert false in
207 let rows = PGOCaml.execute dbh ~name ~params:[] () in
208 PGOCaml.close_statement dbh ~name ();
211 (* Functions to remove or update the value in a column. *)
212 let rec remove_column columns rows col_name =
214 fun col_name' -> col_name <> col_name'
219 fun (col_name', value) ->
220 if col_name <> col_name' then Some value else None
221 ) (List.combine columns row)
224 and update_column columns rows col_name new_value =
229 fun (col_name', old_value) ->
230 if col_name <> col_name' then old_value else new_value
231 ) (List.combine columns row)
234 and update_apply_column columns rows col_name f =
239 fun (col_name', old_value) ->
240 if col_name <> col_name' then old_value else f old_value
241 ) (List.combine columns row)
244 (* Insert rows into the database table. *)
245 let insert dbh table_name columns rows =
246 let query = "insert into " ^ table_name ^ " (" ^
247 String.concat ", " columns
249 String.concat ", " (List.mapi (fun i _ -> sprintf "$%d" (i+1)) columns)
251 let name = "insertinto" in
252 PGOCaml.prepare dbh ~query ~name ();
253 List.iter (fun params -> ignore (PGOCaml.execute dbh ~name ~params ())) rows;
254 PGOCaml.close_statement dbh ~name ()
256 (* Insert rows into the database table, returning serial numbers. *)
257 let insert_serial dbh table_name columns rows serial_col =
258 let query = "insert into " ^ table_name ^ " (" ^
259 String.concat ", " columns
261 String.concat ", " (List.mapi (fun i _ -> sprintf "$%d" (i+1)) columns)
263 let name = "insertserial" in
264 PGOCaml.prepare dbh ~query ~name ();
265 let seq = table_name ^ "_" ^ serial_col ^ "_seq" in
269 ignore (PGOCaml.execute dbh ~name ~params ());
270 PGOCaml.serial4 dbh seq
272 PGOCaml.close_statement dbh ~name ();
275 let int32_of_value value =
276 let value = Option.get value in
277 Int32.of_string value
279 let value_of_int32 i =
280 let i = Int32.to_string i in
284 print_endline "Connecting to databases ...";
285 print_endline " Source ...";
287 let host = sdbhost in
288 let port = sdbport in
289 let user = sdbuser in
290 let password = sdbpassword in
291 let database = sdbdatabase in
292 PGOCaml.connect ?host ?port ?user ?password ?database () in
293 print_endline " Destination ...";
295 let host = ddbhost in
296 let port = ddbport in
297 let user = ddbuser in
298 let password = ddbpassword in
299 let database = ddbdatabase in
300 PGOCaml.connect ?host ?port ?user ?password ?database () in
301 print_endline "Locate source host ...";
302 let shostid = List.hd (
303 PGSQL(sdbh) "select hostid from hostnames where name = $shost"
305 let scanonical_hostname = List.hd (
306 PGSQL(sdbh) "select canonical_hostname from hosts where id = $shostid"
310 "select name from hostnames where hostid = $shostid order by 1" in
311 printf " shostid = %ld\n" shostid;
312 printf " scanonical_hostname = %s\n" scanonical_hostname;
313 printf " shostnames = [ %s ]\n" (String.concat "; " shostnames);
315 print_endline "Begin transaction on destination database ...";
316 PGOCaml.begin_work ddbh;
317 PGSQL(ddbh) "set constraints hosts_hostname_cn, pages_redirect_cn deferred";
319 (* Also start a transaction on the source database. We are not
320 * intending to make any changes, but this transaction ensures that we
321 * can't because the disconnect at the end of the program will roll any
324 PGOCaml.begin_work sdbh;
326 (* Tables hosts and hostnames are the most complex to copy because
327 * we may want to update the hostname.
329 let dcanonical_hostname, dhostnames =
331 | [] -> scanonical_hostname, shostnames
332 | x :: xs -> x, (x :: xs) in
334 print_endline "Create new host ...";
337 select_all sdbh "hosts" (sprintf "id = %ld" shostid) in
338 let nr_columns = List.length columns in
339 assert (nr_columns >= 19);
340 assert (List.length rows = 1);
341 let columns, rows = remove_column columns rows "id" in
342 assert (List.length columns = nr_columns - 1);
343 assert (List.length rows = 1);
345 update_column columns rows "canonical_hostname"
346 (Some dcanonical_hostname) in
347 assert (List.length columns = nr_columns - 1);
348 assert (List.length rows = 1);
349 let serials = insert_serial ddbh "hosts" columns rows "id" in
350 assert (List.length serials = 1);
351 let dhostid = List.hd serials in
352 printf " dhostid = %ld\n" dhostid;
354 print_endline "Create table hostnames ...";
357 printf " adding hostname %s\n" hostname;
359 "insert into hostnames (hostid, name) values ($dhostid, $hostname)"
362 (* Now start copying the tables.
363 * Not entirely trivial because where a table references another,
364 * we will need to update the IDs to match the corrected serial
367 print_endline "Copying table users ...";
369 select_all sdbh "users" (sprintf "hostid = %ld" shostid) in
370 assert (List.hd columns = "id");
371 let old_ids = List.map int32_of_value (List.map List.hd rows) in
373 remove_column columns rows "id" in
375 update_column columns rows "hostid" (value_of_int32 dhostid) in
377 insert_serial ddbh "users" columns rows "id" in
379 let userid_map = List.combine old_ids new_ids in
381 print_endline "Copying tables contacts, contact_emails ...";
383 select_all sdbh "contacts" (sprintf "hostid = %ld" shostid) in
384 assert (List.hd columns = "id");
385 let old_ids = List.map int32_of_value (List.map List.hd rows) in
387 remove_column columns rows "id" in
389 update_column columns rows "hostid" (value_of_int32 dhostid) in
391 insert_serial ddbh "contacts" columns rows "id" in
393 let map = List.combine old_ids new_ids in
395 if old_ids <> [] then (
397 select_all sdbh "contact_emails"
399 String.concat ", " (List.map Int32.to_string old_ids) ^
402 update_apply_column columns rows "contactid"
404 let old_id = int32_of_value old_id in
405 value_of_int32 (List.assoc old_id map)) in
406 insert ddbh "contact_emails" columns rows
409 print_endline "Copying tables pages, contents ...";
411 select_all sdbh "pages" (sprintf "hostid = %ld" shostid) in
412 assert (List.hd columns = "id");
413 let old_ids = List.map int32_of_value (List.map List.hd rows) in
415 remove_column columns rows "id" in
417 remove_column columns rows "title_description_fti" in
419 update_column columns rows "hostid" (value_of_int32 dhostid) in
421 update_apply_column columns rows "logged_user"
424 | (Some _) as old_id ->
425 let old_id = int32_of_value old_id in
426 value_of_int32 (List.assoc old_id userid_map)) in
428 insert_serial ddbh "pages" columns rows "id" in
430 let map = List.combine old_ids new_ids in
432 if old_ids <> [] then (
434 select_all sdbh "contents"
436 String.concat ", " (List.map Int32.to_string old_ids) ^
438 let columns, rows = remove_column columns rows "id" in
439 let columns, rows = remove_column columns rows "content_fti" in
441 update_apply_column columns rows "pageid"
443 let old_id = int32_of_value old_id in
444 value_of_int32 (List.assoc old_id map)) in
445 insert ddbh "contents" columns rows
448 print_endline "Copying tables messages, msg_references ...";
450 select_all sdbh "messages" (sprintf "hostid = %ld" shostid) in
451 assert (List.hd columns = "id");
452 let old_ids = List.map int32_of_value (List.map List.hd rows) in
454 remove_column columns rows "id" in
456 update_column columns rows "hostid" (value_of_int32 dhostid) in
458 insert_serial ddbh "messages" columns rows "id" in
460 let map = List.combine old_ids new_ids in
462 if old_ids <> [] then (
464 select_all sdbh "msg_references"
466 String.concat ", " (List.map Int32.to_string old_ids) ^
469 update_apply_column columns rows "message_id"
471 let old_id = int32_of_value old_id in
472 value_of_int32 (List.assoc old_id map)) in
473 insert ddbh "msg_references" columns rows
476 print_endline "Copying table files ...";
478 select_all sdbh "files" (sprintf "hostid = %ld" shostid) in
480 remove_column columns rows "id" in
482 update_column columns rows "hostid" (value_of_int32 dhostid) in
483 insert ddbh "files" columns rows;
485 print_endline "Copying table images ...";
487 select_all sdbh "images" (sprintf "hostid = %ld" shostid) in
489 remove_column columns rows "id" in
491 update_column columns rows "hostid" (value_of_int32 dhostid) in
492 insert ddbh "images" columns rows;
494 print_endline "Copying table sitemenu ...";
496 select_all sdbh "sitemenu" (sprintf "hostid = %ld" shostid) in
498 update_column columns rows "hostid" (value_of_int32 dhostid) in
499 insert ddbh "sitemenu" columns rows;
501 print_endline "Copying table links ...";
503 select_all sdbh "links" (sprintf "hostid = %ld" shostid) in
505 update_column columns rows "hostid" (value_of_int32 dhostid) in
506 insert ddbh "links" columns rows;
508 print_endline "Copying table macros ...";
510 select_all sdbh "macros" (sprintf "hostid = %ld" shostid) in
512 update_column columns rows "hostid" (value_of_int32 dhostid) in
513 insert ddbh "macros" columns rows;
515 print_endline "Copying table page_emails ...";
517 select_all sdbh "page_emails" (sprintf "hostid = %ld" shostid) in
519 update_column columns rows "hostid" (value_of_int32 dhostid) in
520 insert ddbh "page_emails" columns rows;
522 print_endline "Copying table mailing_lists ...";
524 select_all sdbh "mailing_lists" (sprintf "hostid = %ld" shostid) in
526 update_column columns rows "hostid" (value_of_int32 dhostid) in
527 insert ddbh "mailing_lists" columns rows;
529 (* pending_email_changes and usercookies are only copied if the URL
530 * will not change, because if the URL does change then there is
531 * no point copying them because all cookies/email URLs will be
534 if scanonical_hostname = dcanonical_hostname && userid_map <> [] then (
538 (List.map Int32.to_string (List.map fst userid_map)) ^
541 print_endline "Copying table usercookies ...";
542 let columns, rows = select_all sdbh "usercookies" where_clause in
544 update_apply_column columns rows "userid"
547 | (Some _) as old_id ->
548 let old_id = int32_of_value old_id in
549 value_of_int32 (List.assoc old_id userid_map)) in
550 insert ddbh "usercookies" columns rows;
552 print_endline "Copying table pending_email_changes ...";
553 let columns, rows = select_all sdbh "pending_email_changes" where_clause in
555 update_apply_column columns rows "userid"
558 | (Some _) as old_id ->
559 let old_id = int32_of_value old_id in
560 value_of_int32 (List.assoc old_id userid_map)) in
561 insert ddbh "pending_email_changes" columns rows;
564 (* Note: Tables which are NOT copied:
566 * pg_ts_cfg -- Internal tables used by tsearch2
567 * pg_ts_cfgmap -- """"
569 * pg_ts_parser -- """"
570 * powered_by -- Fixed table.
571 * recently_visited -- Not worth copying.
572 * server_settings -- Global configuration table.
573 * templates -- Fixed table.
574 * themes -- Fixed table.
577 (* Commit or rollback. *)
579 print_endline "Rolling back database because -dryrun flag was given.";
580 PGOCaml.rollback ddbh
582 print_endline "Committing changes ...";
584 print_endline "Done."