X-Git-Url: http://git.annexia.org/?p=whenjobs.git;a=blobdiff_plain;f=daemon%2Fdaemon.ml;h=ed5e067f802558f4ba3b55bbe677b23119156d6d;hp=c3e1abd33f605dc46955bc442d5e339ff7465074;hb=57d21a4a716b513d93790262db4a663c169814bd;hpb=f35f462b83f860958da38347d30b45986b8f4774 diff --git a/daemon/daemon.ml b/daemon/daemon.ml index c3e1abd..ed5e067 100644 --- a/daemon/daemon.ml +++ b/daemon/daemon.ml @@ -78,6 +78,7 @@ let rec init j d = ~proc_get_jobs ~proc_cancel_job ~proc_start_job + ~proc_get_job (Rpc_server.Unix addr) Rpc.Tcp (* not TCP, this is the same as SOCK_STREAM *) Rpc.Socket @@ -106,7 +107,7 @@ and proc_set_variable (name, value) = state := Whenstate.set_variable !state name value; (* Which jobs need to be re-evaluated? *) - let jobs = Whenstate.get_dependencies !state name in + let jobs = Whenstate.get_dependencies !state [name] in reevaluate_whenjobs jobs; `ok @@ -167,6 +168,18 @@ and proc_start_job jobname = | Not_found -> `error "job not found" | exn -> `error (Printexc.to_string exn) +and proc_get_job serial = + try + let serial = big_int_of_string serial in + let pid = BigIntMap.find serial !serialmap in + let job, dir, serial, start_time = IntMap.find pid !runningmap in + { Whenproto_aux.job_name = job.job_name; + job_serial = string_of_big_int serial; + job_tmpdir = dir; job_start_time = Int64.of_float start_time } + with + | Not_found -> failwith "job not found" + | exn -> failwith (Printexc.to_string exn) + (* Reload the jobs file. *) and reload_file () = let file = sprintf "%s/jobs.cmo" !jobsdir in @@ -192,6 +205,7 @@ and reload_file () = | exn -> failwith (Printexc.to_string exn) in + let s = Whenstate.copy_prev_state !state s in state := s; (* Re-evaluate all when jobs. *) @@ -329,57 +343,87 @@ and run_job job = serial | _ -> assert false in - Syslog.notice "running %s (JOBSERIAL=%s)" - job.job_name (string_of_big_int serial); - - (* Create a temporary directory. The current directory of the job - * will be in this directory. The directory is removed when the - * child process exits. + (* Call the pre-condition script. Note this may decide not to run + * the job by returning false. *) - let dir = tmpdir () in - - let pid = fork () in - if pid = 0 then ( (* child process running the job *) - chdir dir; - - (* Set environment variables corresponding to each variable. *) - List.iter - (fun (name, value) -> putenv name (string_of_variable value)) - (Whenstate.get_variables !state); - - (* Set the $JOBNAME environment variable. *) - putenv "JOBNAME" job.job_name; - - (* Create a temporary file containing the shell script fragment. *) - let script = dir // "script.sh" in - let chan = open_out script in - fprintf chan "set -e\n"; (* So that jobs exit on error. *) - output_string chan job.job_script.sh_script; - close_out chan; - chmod script 0o700; - - let shell = try getenv "SHELL" with Not_found -> "/bin/sh" in - - (* Set output to file. *) - let output = dir // "output.txt" in - let fd = openfile output [O_WRONLY; O_CREAT; O_TRUNC; O_NOCTTY] 0o600 in - dup2 fd stdout; - dup2 fd stderr; - close fd; - - (* Execute the shell script. *) - (try execvp shell [| shell; "-c"; script |]; - with Unix_error (err, fn, _) -> - Syslog.error "%s failed: %s: %s" fn script (error_message err) + let pre_condition () = + match job.job_pre with + | None -> true + | Some pre -> + let rs = ref [] in + IntMap.iter ( + fun pid (job, _, serial, start_time) -> + let r = { pirun_job_name = job.job_name; + pirun_serial = serial; + pirun_start_time = start_time; + pirun_pid = pid } in + rs := r :: !rs + ) !runningmap; + let preinfo = { + pi_job_name = job.job_name; + pi_serial = serial; + pi_variables = Whenstate.get_variables !state; + pi_running = !rs; + } in + pre preinfo + in + if pre_condition () then ( + Syslog.notice "running %s (JOBSERIAL=%s)" + job.job_name (string_of_big_int serial); + + (* Create a temporary directory. The current directory of the job + * will be in this directory. The directory is removed when the + * child process exits. + *) + let dir = tmpdir () in + + let pid = fork () in + if pid = 0 then ( (* child process running the job *) + chdir dir; + + (* Set environment variables corresponding to each variable. *) + List.iter + (fun (name, value) -> putenv name (string_of_variable value)) + (Whenstate.get_variables !state); + + (* Set the $JOBNAME environment variable. *) + putenv "JOBNAME" job.job_name; + + (* Create a temporary file containing the shell script fragment. *) + let script = dir // "script.sh" in + let chan = open_out script in + fprintf chan "set -e\n"; (* So that jobs exit on error. *) + output_string chan job.job_script.sh_script; + close_out chan; + chmod script 0o700; + + let shell = try getenv "SHELL" with Not_found -> "/bin/sh" in + + (* Set output to file. *) + let output = dir // "output.txt" in + let fd = openfile output [O_WRONLY; O_CREAT; O_TRUNC; O_NOCTTY] 0o600 in + dup2 fd stdout; + dup2 fd stderr; + close fd; + + (* Execute the shell script. *) + (try execvp shell [| shell; "-c"; script |]; + with Unix_error (err, fn, _) -> + Syslog.error "%s failed: %s: %s" fn script (error_message err) + ); + _exit 1 ); - _exit 1 - ); - (* Remember this PID, the job and the temporary directory, so we - * can clean up when the child exits. - *) - runningmap := IntMap.add pid (job, dir, serial, time ()) !runningmap; - serialmap := BigIntMap.add serial pid !serialmap + (* Remember this PID, the job and the temporary directory, so we + * can clean up when the child exits. + *) + runningmap := IntMap.add pid (job, dir, serial, time ()) !runningmap; + serialmap := BigIntMap.add serial pid !serialmap + ) + else ( + Syslog.notice "not running %s (JOBSERIAL=%s) because pre() condition returned false" + job.job_name (string_of_big_int serial); + ) and tmpdir () = let chan = open_in "/dev/urandom" in @@ -400,15 +444,15 @@ and handle_sigchld _ = let job, dir, serial, time = IntMap.find pid !runningmap in runningmap := IntMap.remove pid !runningmap; serialmap := BigIntMap.remove serial !serialmap; - cleanup_job job dir serial time status + post_job job dir serial time status ) with Unix_error _ | Not_found -> () -and cleanup_job job dir serial time status = - (* If there is a cleanup function, run it. *) - (match job.job_cleanup with +and post_job job dir serial time status = + (* If there is a post function, run it. *) + (match job.job_post with | None -> () - | Some cleanup -> + | Some post -> let code = match status with | WEXITED c -> c @@ -421,12 +465,12 @@ and cleanup_job job dir serial time status = res_output = dir // "output.txt"; res_start_time = time } in - try cleanup result + try post result with | Failure msg -> - Syslog.error "job %s cleanup function failed: %s" job.job_name msg + Syslog.error "job %s post function failed: %s" job.job_name msg | exn -> - Syslog.error "job %s cleanup function exception: %s" + Syslog.error "job %s post function exception: %s" job.job_name (Printexc.to_string exn) );