(* whenjobs * Copyright (C) 2012 Red Hat Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. *) open Whenutils open Whenexpr open Big_int open Unix open Printf (* See [exit.c]. *) external _exit : int -> 'a = "whenjobs__exit" (* $HOME/.whenjobs *) let jobsdir = ref "" (* The state. *) let state = ref Whenstate.empty (* Jobs that are running: a map of PID -> (job, tmpdir, serial, start_time). * Note that the job may no longer exist *OR* it may have been renamed, * eg. if the jobs file was reloaded. *) let runningmap = ref IntMap.empty (* Serial numbers of running jobs. Map of serial -> PID (in runningmap). *) let serialmap = ref BigIntMap.empty (* Was debugging requested on the command line? *) let debug = ref false (* The server. *) let server = ref None let esys = Unixqueue.standard_event_system () (* The timer. It's convenient to have this as a global variable * because (a) there should only be one timer (which fires when the * soonest every-job becomes ready), and (b) it's complicated to track * that timer and avoid it getting double-scheduled (eg. when we * reload the jobs file) without having a global variable. *) let timer_group = ref None let rec init j d = jobsdir := j; debug := d; Whenlock.create_lock !jobsdir; (* Remove old socket if it exists. *) let addr = sprintf "%s/socket" !jobsdir in (try unlink addr with Unix_error _ -> ()); (* Create the Unix domain socket server. *) server := Some ( Whenproto_srv.When.V1.create_server ~proc_reload_file ~proc_set_variable ~proc_get_variable ~proc_get_variable_names ~proc_exit_daemon ~proc_get_jobs ~proc_cancel_job ~proc_start_job ~proc_get_job ~proc_set_variables ~proc_get_job_names ~proc_test_variables ~proc_ping_daemon ~proc_whisper_variables (Rpc_server.Unix addr) Rpc.Tcp (* not TCP, this is the same as SOCK_STREAM *) Rpc.Socket esys ); (* Handle SIGCHLD to clean up jobs. *) Sys.set_signal Sys.sigchld (Sys.Signal_handle handle_sigchld); (* Initialize the variables. *) state := Whenstate.set_variable !state "JOBSERIAL" (T_int zero_big_int) and proc_reload_file () = if !debug then Syslog.notice "remote call: reload_file"; try reload_files (); `ok with Failure err -> `error err and proc_set_variable (name, value) = if !debug then Syslog.notice "remote call: set_variable %s" name; try check_valid_variable_name name; let value = variable_of_rpc value in state := Whenstate.set_variable !state name value; (* Which jobs need to be re-evaluated? *) let jobs = Whenstate.get_dependencies !state [name] in let jobnames, state' = reevaluate_whenjobs !state jobs in let state' = run_whenjobs state' jobnames in state := state'; `ok with Failure msg -> `error msg and proc_get_variable name = if !debug then Syslog.notice "remote call: get_variable %s" name; rpc_of_variable (Whenstate.get_variable !state name) and proc_get_variable_names () = if !debug then Syslog.notice "remote call: get_variable_names"; let vars = Whenstate.get_variable_names !state in (* Return variable names as a sorted array. *) let vars = Array.of_list vars in Array.sort compare vars; vars and proc_exit_daemon () = if !debug then Syslog.notice "remote call: exit_daemon"; match !server with | None -> `error "exit_daemon: no server handle" | Some s -> Rpc_server.stop_server ~graceful:true s; server := None; `ok and proc_get_jobs () = let running = Array.of_list (IntMap.values !runningmap) in Array.map ( fun (job, dir, serial, start_time) -> { Whenproto_aux.job_name = job.job_name; job_serial = string_of_big_int serial; job_tmpdir = dir; job_start_time = Int64.of_float start_time } ) running and proc_cancel_job serial = try let serial = big_int_of_string serial in let pid = BigIntMap.find serial !serialmap in kill pid 15; `ok with | Not_found -> `error "job not found" | exn -> `error (Printexc.to_string exn) and proc_start_job jobname = try let job = Whenstate.get_job !state jobname in let state' = run_job !state job in state := state'; `ok with | Not_found -> `error "job not found" | exn -> `error (Printexc.to_string exn) and proc_get_job serial = try let serial = big_int_of_string serial in let pid = BigIntMap.find serial !serialmap in let job, dir, serial, start_time = IntMap.find pid !runningmap in { Whenproto_aux.job_name = job.job_name; job_serial = string_of_big_int serial; job_tmpdir = dir; job_start_time = Int64.of_float start_time } with | Not_found -> failwith "job not found" | exn -> failwith (Printexc.to_string exn) and proc_set_variables vars = try let vars = Array.map ( fun { Whenproto_aux.sv_name = name; sv_value = value } -> name, variable_of_rpc value ) vars in let vars = Array.to_list vars in if !debug then Syslog.notice "remote call: set_variables (%s)" (String.concat " " (List.map ( fun (name, value) -> sprintf "%s=%s" name (string_of_variable value) ) vars)); List.iter (fun (name, _) -> check_valid_variable_name name) vars; (* Update all the variables atomically. *) let s = List.fold_left ( fun s (name, value) -> Whenstate.set_variable s name value ) !state vars in state := s; (* Which jobs need to be re-evaluated? *) let jobs = Whenstate.get_dependencies !state (List.map fst vars) in let jobnames, state' = reevaluate_whenjobs !state jobs in let state' = run_whenjobs state' jobnames in state := state'; `ok with Failure msg -> `error msg and proc_get_job_names () = Array.of_list (Whenstate.get_job_names !state) and proc_test_variables vars = (* This is the same as proc_set_variables, except that it doesn't * update the state, it just returns the jobs that *would* run if * these variables were set to these values. *) let vars = Array.map ( fun { Whenproto_aux.sv_name = name; sv_value = value } -> name, variable_of_rpc value ) vars in let vars = Array.to_list vars in if !debug then Syslog.notice "remote call: test_variables (%s)" (String.concat " " (List.map ( fun (name, value) -> sprintf "%s=%s" name (string_of_variable value) ) vars)); List.iter (fun (name, _) -> check_valid_variable_name name) vars; (* Update all the variables atomically. *) let state = List.fold_left ( fun s (name, value) -> Whenstate.set_variable s name value ) !state vars in (* Which jobs WOULD be re-evaluated? *) let jobs = Whenstate.get_dependencies state (List.map fst vars) in let jobnames, _ = reevaluate_whenjobs state jobs in (* Return the names. *) Array.of_list jobnames and proc_ping_daemon () = `ok and proc_whisper_variables vars = try let vars = Array.map ( fun { Whenproto_aux.sv_name = name; sv_value = value } -> name, variable_of_rpc value ) vars in let vars = Array.to_list vars in if !debug then Syslog.notice "remote call: whisper_variables (%s)" (String.concat " " (List.map ( fun (name, value) -> sprintf "%s=%s" name (string_of_variable value) ) vars)); List.iter (fun (name, _) -> check_valid_variable_name name) vars; (* Update all the variables atomically. *) let s = List.fold_left ( fun s (name, value) -> Whenstate.set_variable s name value ) !state vars in state := s; (* .. but don't reevaluate or run jobs. *) `ok with Failure msg -> `error msg (* Reload the jobs file(s). *) and reload_files () = (* Get dir/*.cmo (bytecode) or dir/*.cmxs (native code) *) let suffix = if not Dynlink.is_native then ".cmo" else ".cmxs" in let dir = !jobsdir in let files = Array.to_list (Sys.readdir dir) in let files = List.filter (fun file -> string_endswith file suffix) files in let files = List.map (fun file -> dir // file) files in let files = List.sort compare files in (* As we are reloading the file, we want to create a new state * that has no jobs, but has all the variables from the previous * state. *) let s = Whenstate.copy_variables !state Whenstate.empty in Whenfile.init s; let s = try List.iter Dynlink.loadfile files; let s = Whenfile.get_state () in Syslog.notice "loaded %d job(s) from %d file(s)" (Whenstate.nr_jobs s) (List.length files); s with | Dynlink.Error err -> let err = Dynlink.error_message err in Syslog.error "error loading jobs: %s" err; failwith err | exn -> failwith (Printexc.to_string exn) in let s = Whenstate.copy_prev_state !state s in state := s; (* Re-evaluate all when jobs. *) let jobs = Whenstate.get_whenjobs !state in let jobnames, state' = reevaluate_whenjobs ~onload:true !state jobs in let state' = run_whenjobs state' jobnames in state := state'; (* Schedule the next every job to run. *) schedule_next_everyjob () (* Re-evaluate each when-statement job, in a loop until we reach * a fixpoint. Return the list of job names that should run and * the updated state. *) and reevaluate_whenjobs ?onload state jobs = let rec loop (set, state) jobs = let set', state' = List.fold_left ( fun (set, state) job -> let r, state' = try Whenstate.evaluate_whenjob ?onload state job with Invalid_argument err | Failure err -> Syslog.error "error evaluating job %s (at %s): %s" job.job_name (Camlp4.PreCast.Ast.Loc.to_string job.job_loc) err; false, state in if !debug then Syslog.notice "evaluate %s -> %b\n" job.job_name r; (if r then StringSet.add job.job_name set else set), state' ) (set, state) jobs in (* reached a fixpoint? *) if StringSet.compare set set' <> 0 then loop (set', state') jobs else (set', state') in let set, state = loop (StringSet.empty, state) jobs in let jobnames = StringSet.elements set in (* Ensure the jobs always run in predictable (name) order. *) let jobnames = List.sort compare_jobnames jobnames in jobnames, state and run_whenjobs state jobnames = (* Run the jobs. *) let jobs = List.map (Whenstate.get_job state) jobnames in List.fold_left run_job state jobs (* Schedule the next every-statement job to run, if there is one. We * look at the every jobs, work out the time that each must run at, * pick the job(s) which must run soonest, and schedule a timer to run * them. When the timer fires, it runs those jobs, then calls this * function again. *) and schedule_next_everyjob () = let t = time () in (* Get only everyjobs. *) let jobs = Whenstate.get_everyjobs !state in let jobs = List.map ( function | { job_cond = Every_job period } as job -> (job, period) | { job_cond = When_job _ } -> assert false ) jobs in (* Map everyjob to next time it must run. *) let jobs = List.map ( fun (job, period) -> let t' = next_periodexpr t period in assert (t' > t); (* serious bug in next_periodexpr if false *) job, t' ) jobs in (* Sort, soonest first. *) let jobs = List.sort (fun (_,a) (_,b) -> compare a b) jobs in if !debug then ( List.iter ( fun (job, t) -> Syslog.notice "%s: next scheduled run at %s" job.job_name (string_of_time_t t) ) jobs ); (* Pick the job(s) which run soonest. *) let rec pick = function | [] -> 0., [] | [j, t] -> t, [j] | (j1, t) :: (j2, t') :: _ when t < t' -> t, [j1] | (j1, t) :: (((j2, t') :: _) as rest) -> t, (j1 :: snd (pick rest)) in let t, jobs = pick jobs in if t > 0. then ( if jobs <> [] then ( (* Ensure the jobs always run in predictable (name) order. *) let jobs = List.sort (fun {job_name = a} {job_name = b} -> compare_jobnames a b) jobs in if !debug then Syslog.notice "scheduling job(s) %s to run at %s" (String.concat ", " (List.map (fun { job_name = name } -> name) jobs)) (string_of_time_t t); (* Schedule them to run at time t. *) let g = new_timer_group () in let t_diff = t -. Unix.time () in let t_diff = if t_diff < 0. then 0. else t_diff in let run_jobs () = delete_timer_group (); (* Delete the timer. *) let state' = List.fold_left run_job !state jobs in state := state'; schedule_next_everyjob () in Unixqueue.weak_once esys g t_diff run_jobs; ) ) and new_timer_group () = delete_timer_group (); let g = Unixqueue.new_group esys in timer_group := Some g; g and delete_timer_group () = match !timer_group with | None -> () | Some g -> Unixqueue.clear esys g; timer_group := None and run_job state job = (* Increment JOBSERIAL. *) let serial, state = match Whenstate.get_variable state "JOBSERIAL" with | T_int serial -> let serial = succ_big_int serial in let state' = Whenstate.set_variable state "JOBSERIAL" (T_int serial) in serial, state' | _ -> assert false in (* Call the pre-condition script. Note this may decide not to run * the job by returning false. *) let pre_condition () = match job.job_pre with | None -> true | Some pre -> let rs = ref [] in IntMap.iter ( fun pid (job, _, serial, start_time) -> let r = { pirun_job_name = job.job_name; pirun_serial = serial; pirun_start_time = start_time; pirun_pid = pid } in rs := r :: !rs ) !runningmap; let preinfo = { pi_job_name = job.job_name; pi_serial = serial; pi_variables = Whenstate.get_variables state; pi_running = !rs; } in pre preinfo in if pre_condition () then ( Syslog.notice "running %s (JOBSERIAL=%s)" job.job_name (string_of_big_int serial); (* Create a temporary directory. The current directory of the job * will be in this directory. The directory is removed when the * child process exits. *) let dir = tmpdir () in let pid = fork () in if pid = 0 then ( (* child process running the job *) chdir dir; (* Set environment variables corresponding to each variable. *) List.iter (fun (name, value) -> putenv name (string_of_variable value)) (Whenstate.get_variables state); (* Set the $JOBNAME environment variable. *) putenv "JOBNAME" job.job_name; (* Create a temporary file containing the shell script fragment. *) let script = dir // "script.sh" in let chan = open_out script in fprintf chan "set -e\n"; (* So that jobs exit on error. *) output_string chan job.job_script.sh_script; close_out chan; chmod script 0o700; let shell = try getenv "SHELL" with Not_found -> "/bin/sh" in (* Set output to file. *) let output = dir // "output.txt" in let fd = openfile output [O_WRONLY; O_CREAT; O_TRUNC; O_NOCTTY] 0o600 in dup2 fd stdout; dup2 fd stderr; close fd; (* Execute the shell script. *) (try execvp shell [| shell; "-c"; script |]; with Unix_error (err, fn, _) -> Syslog.error "%s failed: %s: %s" fn script (error_message err) ); _exit 1 ); (* Remember this PID, the job and the temporary directory, so we * can clean up when the child exits. *) runningmap := IntMap.add pid (job, dir, serial, time ()) !runningmap; serialmap := BigIntMap.add serial pid !serialmap; state ) else ( Syslog.notice "not running %s (JOBSERIAL=%s) because pre() condition returned false" job.job_name (string_of_big_int serial); state ) and tmpdir () = let chan = open_in "/dev/urandom" in let data = String.create 16 in really_input chan data 0 (String.length data); close_in chan; let data = Digest.to_hex (Digest.string data) in let dir = Filename.temp_dir_name // sprintf "whenjobs%s" data in mkdir dir 0o700; dir (* This is called when a job (child process) exits. *) and handle_sigchld _ = try let pid, status = waitpid [WNOHANG] 0 in if pid > 0 then ( (* Look up the PID in the running jobs map. *) let job, dir, serial, time = IntMap.find pid !runningmap in runningmap := IntMap.remove pid !runningmap; serialmap := BigIntMap.remove serial !serialmap; post_job job dir serial time status ) with Unix_error _ | Not_found -> () and post_job job dir serial time status = (* If there is a post function, run it. *) (match job.job_post with | None -> () | Some post -> let code = match status with | WEXITED c -> c | WSIGNALED s | WSTOPPED s -> 1 in let result = { res_job_name = job.job_name; res_serial = serial; res_code = code; res_tmpdir = dir; res_output = dir // "output.txt"; res_start_time = time } in try post result with | Failure msg -> Syslog.error "job %s post function failed: %s" job.job_name msg | exn -> Syslog.error "job %s post function exception: %s" job.job_name (Printexc.to_string exn) ); (* This should be safe because the path cannot contain shell metachars. *) let cmd = sprintf "rm -rf '%s'" dir in ignore (Sys.command cmd) (* Intelligent comparison of job names. *) and compare_jobnames name1 name2 = try let len1 = String.length name1 and len2 = String.length name2 in if len1 > 4 && len2 > 4 && String.sub name1 0 4 = "job$" && String.sub name2 0 4 = "job$" then ( let i1 = int_of_string (String.sub name1 4 (len1-4)) in let i2 = int_of_string (String.sub name2 4 (len2-4)) in compare i1 i2 ) else raise Not_found with _ -> compare name1 name2 let main_loop () = Unixqueue.run esys