(* whenjobs * Copyright (C) 2012 Red Hat Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. *) open Whenutils open Whenexpr open Big_int open Unix open Printf (* See [exit.c]. *) external _exit : int -> 'a = "whenjobs__exit" (* $HOME/.whenjobs *) let jobsdir = ref "" (* The state. *) let state = ref Whenstate.empty (* Jobs that are running: a map of PID -> (job, tmpdir, serial, start_time). * Note that the job may no longer exist *OR* it may have been renamed, * eg. if the jobs file was reloaded. *) let runningmap = ref IntMap.empty (* Serial numbers of running jobs. Map of serial -> PID (in runningmap). *) let serialmap = ref BigIntMap.empty (* Was debugging requested on the command line? *) let debug = ref false (* The server. *) let server = ref None let esys = Unixqueue.standard_event_system () (* The timer. It's convenient to have this as a global variable * because (a) there should only be one timer (which fires when the * soonest every-job becomes ready), and (b) it's complicated to track * that timer and avoid it getting double-scheduled (eg. when we * reload the jobs file) without having a global variable. *) let timer_group = ref None let rec init j d = jobsdir := j; debug := d; Whenlock.create_lock !jobsdir; (* Remove old socket if it exists. *) let addr = sprintf "%s/socket" !jobsdir in (try unlink addr with Unix_error _ -> ()); (* Create the Unix domain socket server. *) server := Some ( Whenproto_srv.When.V1.create_server ~proc_reload_file ~proc_set_variable ~proc_get_variable ~proc_get_variable_names ~proc_exit_daemon ~proc_get_jobs ~proc_cancel_job ~proc_start_job (Rpc_server.Unix addr) Rpc.Tcp (* not TCP, this is the same as SOCK_STREAM *) Rpc.Socket esys ); (* Handle SIGCHLD to clean up jobs. *) Sys.set_signal Sys.sigchld (Sys.Signal_handle handle_sigchld); (* Initialize the variables. *) state := Whenstate.set_variable !state "JOBSERIAL" (T_int zero_big_int) and proc_reload_file () = if !debug then Syslog.notice "remote call: reload_file"; try reload_file (); `ok with Failure err -> `error err and proc_set_variable (name, value) = if !debug then Syslog.notice "remote call: set_variable %s" name; try check_valid_variable_name name; let value = variable_of_rpc value in state := Whenstate.set_variable !state name value; (* Which jobs need to be re-evaluated? *) let jobs = Whenstate.get_dependencies !state name in reevaluate_whenjobs jobs; `ok with Failure msg -> `error msg and proc_get_variable name = if !debug then Syslog.notice "remote call: get_variable %s" name; rpc_of_variable (Whenstate.get_variable !state name) and proc_get_variable_names () = if !debug then Syslog.notice "remote call: get_variable_names"; let vars = Whenstate.get_variable_names !state in (* Return variable names as a sorted array. *) let vars = Array.of_list vars in Array.sort compare vars; vars and proc_exit_daemon () = if !debug then Syslog.notice "remote call: exit_daemon"; match !server with | None -> `error "exit_daemon: no server handle" | Some s -> Rpc_server.stop_server ~graceful:true s; server := None; `ok and proc_get_jobs () = let running = Array.of_list (IntMap.values !runningmap) in Array.map ( fun (job, dir, serial, start_time) -> { Whenproto_aux.job_name = job.job_name; job_serial = string_of_big_int serial; job_tmpdir = dir; job_start_time = Int64.of_float start_time } ) running and proc_cancel_job serial = try let serial = big_int_of_string serial in let pid = BigIntMap.find serial !serialmap in kill pid 15; `ok with | Not_found -> `error "job not found" | exn -> `error (Printexc.to_string exn) and proc_start_job jobname = try let job = Whenstate.get_job !state jobname in run_job job; `ok with | Not_found -> `error "job not found" | exn -> `error (Printexc.to_string exn) (* Reload the jobs file. *) and reload_file () = let file = sprintf "%s/jobs.cmo" !jobsdir in (* As we are reloading the file, we want to create a new state * that has no jobs, but has all the variables from the previous * state. *) let s = Whenstate.copy_variables !state Whenstate.empty in Whenfile.init s; let s = try Dynlink.loadfile file; let s = Whenfile.get_state () in Syslog.notice "loaded %d job(s) from %s" (Whenstate.nr_jobs s) file; s with | Dynlink.Error err -> let err = Dynlink.error_message err in Syslog.error "error loading jobs: %s" err; failwith err | exn -> failwith (Printexc.to_string exn) in state := s; (* Re-evaluate all when jobs. *) reevaluate_whenjobs ~onload:true (Whenstate.get_whenjobs !state); (* Schedule the next every job to run. *) schedule_next_everyjob () (* Re-evaluate each when-statement job, in a loop until we reach * a fixpoint. Run those that need to be run. *) and reevaluate_whenjobs ?onload jobs = let rec loop set jobs = let set' = List.fold_left ( fun set job -> let r, state' = try Whenstate.evaluate_whenjob ?onload !state job with Invalid_argument err | Failure err -> Syslog.error "error evaluating job %s (at %s): %s" job.job_name (Camlp4.PreCast.Ast.Loc.to_string job.job_loc) err; false, !state in state := state'; if !debug then Syslog.notice "evaluate %s -> %b\n" job.job_name r; if r then StringSet.add job.job_name set else set ) set jobs in if StringSet.compare set set' <> 0 then loop set' jobs else set' in let set = loop StringSet.empty jobs in let jobnames = StringSet.elements set in (* Ensure the jobs always run in predictable (name) order. *) let jobnames = List.sort compare_jobnames jobnames in (* Run the jobs. *) List.iter run_job (List.map (Whenstate.get_job !state) jobnames) (* Schedule the next every-statement job to run, if there is one. We * look at the every jobs, work out the time that each must run at, * pick the job(s) which must run soonest, and schedule a timer to run * them. When the timer fires, it runs those jobs, then calls this * function again. *) and schedule_next_everyjob () = let t = time () in (* Get only everyjobs. *) let jobs = Whenstate.get_everyjobs !state in let jobs = List.map ( function | { job_cond = Every_job period } as job -> (job, period) | { job_cond = When_job _ } -> assert false ) jobs in (* Map everyjob to next time it must run. *) let jobs = List.map ( fun (job, period) -> let t' = next_periodexpr t period in assert (t' > t); (* serious bug in next_periodexpr if false *) job, t' ) jobs in (* Sort, soonest first. *) let jobs = List.sort (fun (_,a) (_,b) -> compare a b) jobs in if !debug then ( List.iter ( fun (job, t) -> Syslog.notice "%s: next scheduled run at %s" job.job_name (string_of_time_t t) ) jobs ); (* Pick the job(s) which run soonest. *) let rec pick = function | [] -> 0., [] | [j, t] -> t, [j] | (j1, t) :: (j2, t') :: _ when t < t' -> t, [j1] | (j1, t) :: (((j2, t') :: _) as rest) -> t, (j1 :: snd (pick rest)) in let t, jobs = pick jobs in if t > 0. then ( if jobs <> [] then ( (* Ensure the jobs always run in predictable (name) order. *) let jobs = List.sort (fun {job_name = a} {job_name = b} -> compare_jobnames a b) jobs in if !debug then Syslog.notice "scheduling job(s) %s to run at %s" (String.concat ", " (List.map (fun { job_name = name } -> name) jobs)) (string_of_time_t t); (* Schedule them to run at time t. *) let g = new_timer_group () in let t_diff = t -. Unix.time () in let t_diff = if t_diff < 0. then 0. else t_diff in let run_jobs () = delete_timer_group (); (* Delete the timer. *) List.iter run_job jobs; schedule_next_everyjob () in Unixqueue.weak_once esys g t_diff run_jobs; ) ) and new_timer_group () = delete_timer_group (); let g = Unixqueue.new_group esys in timer_group := Some g; g and delete_timer_group () = match !timer_group with | None -> () | Some g -> Unixqueue.clear esys g; timer_group := None and run_job job = (* Increment JOBSERIAL. *) let serial = match Whenstate.get_variable !state "JOBSERIAL" with | T_int serial -> let serial = succ_big_int serial in state := Whenstate.set_variable !state "JOBSERIAL" (T_int serial); serial | _ -> assert false in Syslog.notice "running %s (JOBSERIAL=%s)" job.job_name (string_of_big_int serial); (* Create a temporary directory. The current directory of the job * will be in this directory. The directory is removed when the * child process exits. *) let dir = tmpdir () in let pid = fork () in if pid = 0 then ( (* child process running the job *) chdir dir; (* Set environment variables corresponding to each variable. *) List.iter (fun (name, value) -> putenv name (string_of_variable value)) (Whenstate.get_variables !state); (* Set the $JOBNAME environment variable. *) putenv "JOBNAME" job.job_name; (* Create a temporary file containing the shell script fragment. *) let script = dir // "script.sh" in let chan = open_out script in fprintf chan "set -e\n"; (* So that jobs exit on error. *) output_string chan job.job_script.sh_script; close_out chan; chmod script 0o700; let shell = try getenv "SHELL" with Not_found -> "/bin/sh" in (* Set output to file. *) let output = dir // "output.txt" in let fd = openfile output [O_WRONLY; O_CREAT; O_TRUNC; O_NOCTTY] 0o600 in dup2 fd stdout; dup2 fd stderr; close fd; (* Execute the shell script. *) (try execvp shell [| shell; "-c"; script |]; with Unix_error (err, fn, _) -> Syslog.error "%s failed: %s: %s" fn script (error_message err) ); _exit 1 ); (* Remember this PID, the job and the temporary directory, so we * can clean up when the child exits. *) runningmap := IntMap.add pid (job, dir, serial, time ()) !runningmap; serialmap := BigIntMap.add serial pid !serialmap and tmpdir () = let chan = open_in "/dev/urandom" in let data = String.create 16 in really_input chan data 0 (String.length data); close_in chan; let data = Digest.to_hex (Digest.string data) in let dir = Filename.temp_dir_name // sprintf "whenjobs%s" data in mkdir dir 0o700; dir (* This is called when a job (child process) exits. *) and handle_sigchld _ = try let pid, status = waitpid [WNOHANG] 0 in if pid > 0 then ( (* Look up the PID in the running jobs map. *) let job, dir, serial, time = IntMap.find pid !runningmap in runningmap := IntMap.remove pid !runningmap; serialmap := BigIntMap.remove serial !serialmap; post_job job dir serial time status ) with Unix_error _ | Not_found -> () and post_job job dir serial time status = (* If there is a post function, run it. *) (match job.job_post with | None -> () | Some post -> let code = match status with | WEXITED c -> c | WSIGNALED s | WSTOPPED s -> 1 in let result = { res_job_name = job.job_name; res_serial = serial; res_code = code; res_tmpdir = dir; res_output = dir // "output.txt"; res_start_time = time } in try post result with | Failure msg -> Syslog.error "job %s post function failed: %s" job.job_name msg | exn -> Syslog.error "job %s post function exception: %s" job.job_name (Printexc.to_string exn) ); (* This should be safe because the path cannot contain shell metachars. *) let cmd = sprintf "rm -rf '%s'" dir in ignore (Sys.command cmd) (* Intelligent comparison of job names. *) and compare_jobnames name1 name2 = try let len1 = String.length name1 and len2 = String.length name2 in if len1 > 4 && len2 > 4 && String.sub name1 0 4 = "job$" && String.sub name2 0 4 = "job$" then ( let i1 = int_of_string (String.sub name1 4 (len1-4)) in let i2 = int_of_string (String.sub name2 4 (len2-4)) in compare i1 i2 ) else raise Not_found with _ -> compare name1 name2 let main_loop () = Unixqueue.run esys