(* Copy a whole host (site). This can also copy between databases and * servers. * * Use 'copy_host --help' for usage. * * $Id: copy_host.ml,v 1.3 2006/09/11 10:01:07 rich Exp $ *) open Printf open ExtList open ExtString let usage = "copy_host can be used to copy/duplicate a whole host (site). It can also copy hosts between databases and database server. Please read the instructions below carefully before attempting to use this program! BASIC USAGE Duplicate a host on the same (local) database server: copy_host -shost mysite.example.com -dhost mycopy.example.com Copy a host from the local database server to another. The copy will have the same canonical and alternate hostnames: copy_host -short mysite.example.com -ddbhost anotherdb.example.com TERMINOLOGY 'host': The term used by COCANWIKI to mean a site. Each host has a single row in the 'hosts' table in the database, and a single unique hostid. A single COCANWIKI database instance can handle an unlimited number of hosts. 'canonical hostname': The standard hostname for a host. See the hosts.canonical_hostname column. 'hostname': Any of the hostnames permitted for a host. Most sites will only have one, the canonical_hostname, but some will have several names, eg. 'example.com' and 'www.example.com'. See the 'hostnames' table. 'source ***', 'destination ***': When copying hosts, you copy from the source whatever to the destination whatever. 'database hostname': The name of the server running the PostgreSQL database. When copying a host between databases, you have a source database hostname and a destination database hostname. 'database name': The name of the PostgreSQL database (not the name of the server, but the name of the database). Usually 'cocanwiki'. Use the command 'psql -l' to list database names. SELECTING THE SOURCE AND DESTINATION HOSTS In general, any parameter beginning with '-s***' refers to the source and with '-d***' to the destination. The '-shost hostname' parameter is required, and it selects the particular host which you want to copy/duplicate. You may give either the canonical hostname or one of the alternate hostnames, since either uniquely identifies the source host you want to copy. '-dhost hostname[,...]' is an optional parameter which specifies the destination hostnames (canonical and alternates). If not given, then the same hostname(s) are used as the source. This only makes sense when copying a host between database instances, since a single database instance cannot contain multiple hosts with the same hostnames. If given with a single hostname, then that hostname is used as the canonical hostname for the copy. If given with multiple (comma-separated) hostnames, then the first one is the canonical hostname and the subsequent ones are the alternate hostnames, eg: -dhost www.example.com,example.com SELECTING THE SOURCE AND DESTINATION DATABASE(S) '-sdb***' and '-ddb***' parameters can be used to select different source and destination database instances. The parameters in question are: -sdbhost -ddbhost Database hostname. -sdbport -ddbport Database port number. -sdbuser -ddbuser Database username. -sdbpassword -ddbpassword Database password. -sdbdatabase -ddbdatabase Database name. All except the last two, database name, default to nothing which means that the usual PostgreSQL method for find the value is used (ie. first use environment variables like $PGHOST, then use an internal default, usually the local database and user). The database name defaults to 'cocanwiki'. It is possible to duplicate a host on the same database instance. In this case it is probably better not to specify the '-sdb***' or '-ddb***' parameters, but instead use environment variables, or run this program on the database server itself. OPTIONS " let shost = ref "" let dhost = ref "" let sdbhost = ref "" let ddbhost = ref "" let sdbport = ref 0 let ddbport = ref 0 let sdbuser = ref "" let ddbuser = ref "" let sdbpassword = ref "" let ddbpassword = ref "" let sdbdatabase = ref "cocanwiki" let ddbdatabase = ref "cocanwiki" let dryrun = ref false let argspec = [ "-shost", Arg.Set_string shost, "Source hostname (required)."; "-dhost", Arg.Set_string dhost, "Destination hostname(s) (optional)."; "-sdbhost", Arg.Set_string sdbhost, "Source database hostname (optional)."; "-ddbhost", Arg.Set_string ddbhost, "Destination database hostname (optional)."; "-sdbport", Arg.Set_int sdbport, "Source database port (optional)."; "-ddbport", Arg.Set_int ddbport, "Destination database port (optional)."; "-sdbuser", Arg.Set_string sdbuser, "Source database user (optional)."; "-ddbuser", Arg.Set_string ddbuser, "Destination database user (optional)."; "-sdbpassword", Arg.Set_string sdbpassword, "Source database password (optional)."; "-ddbpassword", Arg.Set_string ddbpassword, "Destination database password (optional)."; "-sdbdatabase", Arg.Set_string sdbdatabase, "Source database name (default: cocanwiki)."; "-ddbdatabase", Arg.Set_string ddbdatabase, "Destination database name (default: cocanwiki)."; "-dryrun", Arg.Set dryrun, "Rollback database changes at the end."; ] let error _ = raise (Arg.Bad "Use --help for help.") let () = Arg.parse argspec error usage let shost = match !shost with | "" -> failwith "-shost is required. Use --help for help." | s -> s let dhost = match !dhost with | "" -> [] | s -> String.nsplit s "," let sdbhost = match !sdbhost with "" -> None | s -> Some s let ddbhost = match !ddbhost with "" -> None | s -> Some s let sdbport = match !sdbport with 0 -> None | p -> Some p let ddbport = match !ddbport with 0 -> None | p -> Some p let sdbuser = match !sdbuser with "" -> None | s -> Some s let ddbuser = match !ddbuser with "" -> None | s -> Some s let sdbpassword = match !sdbpassword with "" -> None | s -> Some s let ddbpassword = match !ddbpassword with "" -> None | s -> Some s let sdbdatabase = match !sdbdatabase with "" -> None | s -> Some s let ddbdatabase = match !ddbdatabase with "" -> None | s -> Some s let dryrun = !dryrun (* Get column names from a table. *) let columns dbh table_name = let query = "select * from " ^ table_name ^ " order by 1" in let name = "columns" in PGOCaml.prepare dbh ~query ~name (); let columns = match PGOCaml.describe_statement dbh ~name () with | _, Some results -> List.map (fun { PGOCaml.name = name } -> name) results | _, None -> assert false in PGOCaml.close_statement dbh ~name (); columns (* 'select * on table where ...' using the low level interface. *) let select_all dbh table_name where_clause = let query = "select * from " ^ table_name ^ (match where_clause with | "" -> "" | where_clause -> " where " ^ where_clause) ^ " order by 1" in let name = "selectall" in PGOCaml.prepare dbh ~query ~name (); let columns = match PGOCaml.describe_statement dbh ~name () with | _, Some results -> List.map (fun { PGOCaml.name = name } -> name) results | _, None -> assert false in let rows = PGOCaml.execute dbh ~name ~params:[] () in PGOCaml.close_statement dbh ~name (); columns, rows (* Functions to remove or update the value in a column. *) let rec remove_column columns rows col_name = List.filter ( fun col_name' -> col_name <> col_name' ) columns, List.map ( fun row -> List.filter_map ( fun (col_name', value) -> if col_name <> col_name' then Some value else None ) (List.combine columns row) ) rows and update_column columns rows col_name new_value = columns, List.map ( fun row -> List.map ( fun (col_name', old_value) -> if col_name <> col_name' then old_value else new_value ) (List.combine columns row) ) rows and update_apply_column columns rows col_name f = columns, List.map ( fun row -> List.map ( fun (col_name', old_value) -> if col_name <> col_name' then old_value else f old_value ) (List.combine columns row) ) rows (* Insert rows into the database table. *) let insert dbh table_name columns rows = let query = "insert into " ^ table_name ^ " (" ^ String.concat ", " columns ^ ") values (" ^ String.concat ", " (List.mapi (fun i _ -> sprintf "$%d" (i+1)) columns) ^ ")" in let name = "insertinto" in PGOCaml.prepare dbh ~query ~name (); List.iter (fun params -> ignore (PGOCaml.execute dbh ~name ~params ())) rows; PGOCaml.close_statement dbh ~name () (* Insert rows into the database table, returning serial numbers. *) let insert_serial dbh table_name columns rows serial_col = let query = "insert into " ^ table_name ^ " (" ^ String.concat ", " columns ^ ") values (" ^ String.concat ", " (List.mapi (fun i _ -> sprintf "$%d" (i+1)) columns) ^ ")" in let name = "insertserial" in PGOCaml.prepare dbh ~query ~name (); let seq = table_name ^ "_" ^ serial_col ^ "_seq" in let serials = List.map ( fun params -> ignore (PGOCaml.execute dbh ~name ~params ()); PGOCaml.serial4 dbh seq ) rows in PGOCaml.close_statement dbh ~name (); serials let int32_of_value value = let value = Option.get value in Int32.of_string value let value_of_int32 i = let i = Int32.to_string i in Some i let () = print_endline "Connecting to databases ..."; print_endline " Source ..."; let sdbh = let host = sdbhost in let port = sdbport in let user = sdbuser in let password = sdbpassword in let database = sdbdatabase in PGOCaml.connect ?host ?port ?user ?password ?database () in print_endline " Destination ..."; let ddbh = let host = ddbhost in let port = ddbport in let user = ddbuser in let password = ddbpassword in let database = ddbdatabase in PGOCaml.connect ?host ?port ?user ?password ?database () in print_endline "Locate source host ..."; let shostid = List.hd ( PGSQL(sdbh) "select hostid from hostnames where name = $shost" ) in let scanonical_hostname = List.hd ( PGSQL(sdbh) "select canonical_hostname from hosts where id = $shostid" ) in let shostnames = PGSQL(sdbh) "select name from hostnames where hostid = $shostid order by 1" in printf " shostid = %ld\n" shostid; printf " scanonical_hostname = %s\n" scanonical_hostname; printf " shostnames = [ %s ]\n" (String.concat "; " shostnames); print_endline "Begin transaction on destination database ..."; PGOCaml.begin_work ddbh; PGSQL(ddbh) "set constraints hosts_hostname_cn, pages_redirect_cn deferred"; (* Also start a transaction on the source database. We are not * intending to make any changes, but this transaction ensures that we * can't because the disconnect at the end of the program will roll any * we make back. *) PGOCaml.begin_work sdbh; (* Tables hosts and hostnames are the most complex to copy because * we may want to update the hostname. *) let dcanonical_hostname, dhostnames = match dhost with | [] -> scanonical_hostname, shostnames | x :: xs -> x, (x :: xs) in print_endline "Create new host ..."; let columns, rows = select_all sdbh "hosts" (sprintf "id = %ld" shostid) in let nr_columns = List.length columns in assert (nr_columns >= 19); assert (List.length rows = 1); let columns, rows = remove_column columns rows "id" in assert (List.length columns = nr_columns - 1); assert (List.length rows = 1); let columns, rows = update_column columns rows "canonical_hostname" (Some dcanonical_hostname) in assert (List.length columns = nr_columns - 1); assert (List.length rows = 1); let serials = insert_serial ddbh "hosts" columns rows "id" in assert (List.length serials = 1); let dhostid = List.hd serials in printf " dhostid = %ld\n" dhostid; print_endline "Create table hostnames ..."; List.iter ( fun hostname -> printf " adding hostname %s\n" hostname; PGSQL(ddbh) "insert into hostnames (hostid, name) values ($dhostid, $hostname)" ) dhostnames; (* Now start copying the tables. * Not entirely trivial because where a table references another, * we will need to update the IDs to match the corrected serial * numbers. *) print_endline "Copying table users ..."; let columns, rows = select_all sdbh "users" (sprintf "hostid = %ld" shostid) in assert (List.hd columns = "id"); let old_ids = List.map int32_of_value (List.map List.hd rows) in let columns, rows = remove_column columns rows "id" in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in let new_ids = insert_serial ddbh "users" columns rows "id" in let userid_map = List.combine old_ids new_ids in print_endline "Copying tables contacts, contact_emails ..."; let columns, rows = select_all sdbh "contacts" (sprintf "hostid = %ld" shostid) in assert (List.hd columns = "id"); let old_ids = List.map int32_of_value (List.map List.hd rows) in let columns, rows = remove_column columns rows "id" in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in let new_ids = insert_serial ddbh "contacts" columns rows "id" in let map = List.combine old_ids new_ids in if old_ids <> [] then ( let columns, rows = select_all sdbh "contact_emails" ("contactid in (" ^ String.concat ", " (List.map Int32.to_string old_ids) ^ ")") in let columns, rows = update_apply_column columns rows "contactid" (fun old_id -> let old_id = int32_of_value old_id in value_of_int32 (List.assoc old_id map)) in insert ddbh "contact_emails" columns rows ); print_endline "Copying tables pages, contents ..."; let columns, rows = select_all sdbh "pages" (sprintf "hostid = %ld" shostid) in assert (List.hd columns = "id"); let old_ids = List.map int32_of_value (List.map List.hd rows) in let columns, rows = remove_column columns rows "id" in let columns, rows = remove_column columns rows "title_description_fti" in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in let columns, rows = update_apply_column columns rows "logged_user" (function | None -> None | (Some _) as old_id -> let old_id = int32_of_value old_id in value_of_int32 (List.assoc old_id userid_map)) in let new_ids = insert_serial ddbh "pages" columns rows "id" in let map = List.combine old_ids new_ids in if old_ids <> [] then ( let columns, rows = select_all sdbh "contents" ("pageid in (" ^ String.concat ", " (List.map Int32.to_string old_ids) ^ ")") in let columns, rows = remove_column columns rows "id" in let columns, rows = remove_column columns rows "content_fti" in let columns, rows = update_apply_column columns rows "pageid" (fun old_id -> let old_id = int32_of_value old_id in value_of_int32 (List.assoc old_id map)) in insert ddbh "contents" columns rows ); print_endline "Copying tables messages, msg_references ..."; let columns, rows = select_all sdbh "messages" (sprintf "hostid = %ld" shostid) in assert (List.hd columns = "id"); let old_ids = List.map int32_of_value (List.map List.hd rows) in let columns, rows = remove_column columns rows "id" in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in let new_ids = insert_serial ddbh "messages" columns rows "id" in let map = List.combine old_ids new_ids in if old_ids <> [] then ( let columns, rows = select_all sdbh "msg_references" ("message_id in (" ^ String.concat ", " (List.map Int32.to_string old_ids) ^ ")") in let columns, rows = update_apply_column columns rows "message_id" (fun old_id -> let old_id = int32_of_value old_id in value_of_int32 (List.assoc old_id map)) in insert ddbh "msg_references" columns rows ); print_endline "Copying table files ..."; let columns, rows = select_all sdbh "files" (sprintf "hostid = %ld" shostid) in let columns, rows = remove_column columns rows "id" in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in insert ddbh "files" columns rows; print_endline "Copying table images ..."; let columns, rows = select_all sdbh "images" (sprintf "hostid = %ld" shostid) in let columns, rows = remove_column columns rows "id" in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in insert ddbh "images" columns rows; print_endline "Copying table sitemenu ..."; let columns, rows = select_all sdbh "sitemenu" (sprintf "hostid = %ld" shostid) in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in insert ddbh "sitemenu" columns rows; print_endline "Copying table links ..."; let columns, rows = select_all sdbh "links" (sprintf "hostid = %ld" shostid) in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in insert ddbh "links" columns rows; print_endline "Copying table macros ..."; let columns, rows = select_all sdbh "macros" (sprintf "hostid = %ld" shostid) in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in insert ddbh "macros" columns rows; print_endline "Copying table page_emails ..."; let columns, rows = select_all sdbh "page_emails" (sprintf "hostid = %ld" shostid) in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in insert ddbh "page_emails" columns rows; print_endline "Copying table mailing_lists ..."; let columns, rows = select_all sdbh "mailing_lists" (sprintf "hostid = %ld" shostid) in let columns, rows = update_column columns rows "hostid" (value_of_int32 dhostid) in insert ddbh "mailing_lists" columns rows; (* pending_email_changes and usercookies are only copied if the URL * will not change, because if the URL does change then there is * no point copying them because all cookies/email URLs will be * out of date. *) if scanonical_hostname = dcanonical_hostname && userid_map <> [] then ( let where_clause = "userid in (" ^ String.concat ", " (List.map Int32.to_string (List.map fst userid_map)) ^ ")" in print_endline "Copying table usercookies ..."; let columns, rows = select_all sdbh "usercookies" where_clause in let columns, rows = update_apply_column columns rows "userid" (function | None -> None | (Some _) as old_id -> let old_id = int32_of_value old_id in value_of_int32 (List.assoc old_id userid_map)) in insert ddbh "usercookies" columns rows; print_endline "Copying table pending_email_changes ..."; let columns, rows = select_all sdbh "pending_email_changes" where_clause in let columns, rows = update_apply_column columns rows "userid" (function | None -> None | (Some _) as old_id -> let old_id = int32_of_value old_id in value_of_int32 (List.assoc old_id userid_map)) in insert ddbh "pending_email_changes" columns rows; ); (* Note: Tables which are NOT copied: * * pg_ts_cfg -- Internal tables used by tsearch2 * pg_ts_cfgmap -- """" * pg_ts_dict -- """" * pg_ts_parser -- """" * powered_by -- Fixed table. * recently_visited -- Not worth copying. * server_settings -- Global configuration table. * templates -- Fixed table. * themes -- Fixed table. *) (* Commit or rollback. *) if dryrun then ( print_endline "Rolling back database because -dryrun flag was given."; PGOCaml.rollback ddbh ) else ( print_endline "Committing changes ..."; PGOCaml.commit ddbh; print_endline "Done." )