From a70987f316ab4b948bf941ddea8fb6ccef09da4f Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Thu, 9 Jan 2020 18:11:42 +0000 Subject: [PATCH 1/1] Split implementation into dependency analysis and traversal. --- Goalfile.in | 1 + src/deps.ml | 523 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/deps.mli | 57 +++++++ src/jobs.ml | 263 +++++++---------------------- src/jobs.mli | 62 ++----- src/main.ml | 12 +- src/run.ml | 318 +---------------------------------- src/run.mli | 19 ++- tests/test1.gl | 2 + 9 files changed, 687 insertions(+), 570 deletions(-) create mode 100644 src/deps.ml create mode 100644 src/deps.mli diff --git a/Goalfile.in b/Goalfile.in index 7a085e3..5aa349d 100644 --- a/Goalfile.in +++ b/Goalfile.in @@ -71,6 +71,7 @@ let objects = [ "src/lexer.cmx", "src/parse.cmx", "src/eval.cmx", + "src/deps.cmx", "src/run.cmx", "src/main.cmx" ] diff --git a/src/deps.ml b/src/deps.ml new file mode 100644 index 0000000..6656e87 --- /dev/null +++ b/src/deps.ml @@ -0,0 +1,523 @@ +(* Goalfile dependency DAG + * Copyright (C) 2020 Richard W.M. Jones + * Copyright (C) 2020 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + *) + +open Printf + +open Utils + +type node = + | Goal of Ast.env * Ast.loc * string * Ast.expr list * + Ast.goal * Ast.expr list * string + | Exists of Ast.env * Ast.loc * Ast.pattern * string + +let string_of_node = function + | Goal (_, _, _, _, _, _, debug_goal) -> debug_goal + | Exists (_, _, _, debug_tactic) -> debug_tactic + +let compare_nodes n1 n2 = + match n1, n2 with + | Goal _, Exists _ -> -1 + | Exists _, Goal _ -> 1 + | Exists (_, _, p1, _), Exists (_, _, p2, _) -> compare p1 p2 + | Goal (_, _, n1, a1, _, _, _), Goal (_, _, n2, a2, _, _, _) -> + compare (n1, a1) (n2, a2) + +module G = Map.Make ( + struct + type t = node + let compare = compare_nodes + end +) + +type dag = { + (* List of nodes. *) + nodes : node list; + + (* Edges are stored as an adjacency list, which is a map from + * a parent node to a list of child nodes. Note that as the + * graph does not need to be connected, there may be nodes + * in the list above which don't appear in this map. + *) + edges : node list G.t; +} + +type t = dag * node list +(* The final type is a DAG and a topologically sorted list of nodes. *) + +(* Creates a new DAG. *) +let rec new_dag () = { nodes = []; edges = G.empty } + +(* This will create a new node, unless the node already exists. + * If the optional parent parameter is given then it also creates + * an edge from parent to the new (or existing) node. + *) +and add_node { nodes; edges } ?parent data = + let node, nodes = + try List.find (fun n -> compare_nodes n data = 0) nodes, nodes + with Not_found -> data, data :: nodes in + let edges = + match parent with + | None -> edges + | Some parent -> + let children = try G.find parent edges with Not_found -> [] in + if List.mem node children then edges + else G.add parent (node :: children) edges in + node, { nodes; edges } + +(* This is Khan's algorithm. *) +and topological_sort dag = + let incoming_map = incoming_map dag in + + (* Set of all nodes with no incoming edge. *) + let q = List.filter (fun node -> not (G.mem node incoming_map)) dag.nodes in + + let rec loop dag acc im = function + | [] -> dag, acc + | node :: q -> + let acc = node :: acc in + let children = try G.find node dag.edges with Not_found -> [] in + let dag, q, im = + List.fold_left ( + fun (dag, q, im) child -> + (* There's an arrow from node to child. *) + let dag = + { nodes = + List.filter (fun n -> compare_nodes n node <> 0) dag.nodes; + edges = remove_edge dag.edges node child } in + let im = remove_edge im child node in + let q = if not (G.mem child im) then child :: q else q in + (dag, q, im) + ) (dag, q, im) children in + loop dag acc im q + in + let dag, acc = loop dag [] incoming_map q in + + if not (G.is_empty dag.edges) then + (* XXX More debugging to help out users! I believe the remaining + * edges should demonstrate the cycle. + *) + failwithf "dependency graph contains cycles"; + + (* This builds the topological list in reverse order, but that's + * fine because that is the running order. + *) + acc + +(* The dag structure has an adjacency list, which is a list of outgoing + * edges from each node. But for a topological sort what we actually + * need is another list of incoming edges, so construct that first. + * + * Note this never returns a mapping node -> []. + *) +and incoming_map { edges } = + let im = ref G.empty in + G.iter ( + fun parent children -> + List.iter ( + fun c -> + (* There is an arrow from parent -> c. *) + let xs = try G.find c !im with Not_found -> [] in + im := G.add c (parent :: xs) !im + ) children + ) edges; + !im + +(* Remove edge from parent to child returning a new edges map. + * Preserves the invariant that there is never a mapping node -> []. + *) +and remove_edge edges parent child = + try + let children = G.find parent edges in + let children = + List.filter (fun n -> compare_nodes n child <> 0) children in + if children = [] then + G.remove parent edges + else + G.add parent children edges + with + Not_found -> edges + +and debug_dag { nodes; edges } = + eprintf "nodes:\n"; + List.iter (fun node -> eprintf " %s\n" (string_of_node node)) nodes; + eprintf "edges:\n"; + G.iter ( + fun parent children -> + eprintf " %s ->" (string_of_node parent); + List.iter (fun c -> eprintf " %s" (string_of_node c)) children; + eprintf "\n" + ) edges + +let rec create env roots = + let dag = new_dag () in + let dag = add_targets dag env roots in + if Cmdline.debug_flag () then debug_dag dag; + (* Make actually breaks cycles, but I'm not convinced that this + * is a good idea, so this function will fail if any cycle is + * found. We may wish to revisit this decision in future. + *) + let sorted = topological_sort dag in + if Cmdline.debug_flag () then + eprintf "dependency order:\n %s\n" + (String.concat " <- " (List.map string_of_node sorted)); + dag, sorted + +and add_targets dag ?parent env roots = + List.fold_left (fun dag root -> add_target dag ?parent env root) dag roots + +and add_target dag ?parent env = function + | Ast.EGoalDefn _ | Ast.EFuncDefn _ | Ast.ETacticDefn _ -> assert false + + (* Call a goal or function. *) + | Ast.ECall (loc, name, args) -> + let expr = Ast.getvar env loc name in + (match expr with + | Ast.EGoalDefn (_, goal) -> + add_goal dag ?parent env loc name args goal [] + | Ast.EFuncDefn (_, func) -> + let expr = Eval.call_function env loc name args func in + add_target dag ?parent env expr + | _ -> + failwithf "%a: tried to call ‘%s’ which is not a goal or a function" + Ast.string_loc loc name + ) + + (* Call a tactic. *) + | Ast.ETacticCtor (loc, name, args) -> + (* All parameters of tactics must be simple constant expressions + * (strings, in future booleans, numbers, etc). + *) + let args = List.map (Eval.to_constant env) args in + add_tactic dag ?parent env loc name args + + (* If this is a goal then it's the same as calling goal(). If not + * then look up the variable and substitute it. + *) + | Ast.EVar (loc, name) -> + let expr = Ast.getvar env loc name in + (match expr with + | Ast.EGoalDefn (loc, ([], _, _, _)) -> + add_target dag ?parent env (Ast.ECall (loc, name, [])) + | EGoalDefn _ -> + failwithf "%a: cannot call %s() since this goal has parameters" + Ast.string_loc loc name + | _ -> + add_target dag ?parent env expr + ) + + (* Lists are inlined when found as a target. *) + | Ast.EList (loc, exprs) -> + add_targets dag ?parent env exprs + + (* A string (with or without substitutions) implies *file(filename). *) + | Ast.ESubsts (loc, str) -> + let str = Eval.substitute env loc str in + add_tactic dag ?parent env loc "*file" [Ast.CString str] + + | Ast.EConstant (loc, c) -> + add_tactic dag ?parent env loc "*file" [c] + +(* Add a goal by name. *) +and add_goal dag ?parent env loc name args + ((params, patterns, deps, code) as goal) + extra_deps = + (* This is used to print the goal in debug and error messages only. *) + let debug_goal = + sprintf "%s (%s)" name + (String.concat ", " (List.map (Ast.string_expr ()) args)) in + Cmdline.debug "%a: adding goal %s" Ast.string_loc loc debug_goal; + + (* This is the point where we evaluate the goal arguments. We must + * do this before creating the new environment, because variables + * appearing in goal arguments don't refer to goal parameters. + *) + let args = List.map (Eval.evaluate_goal_arg env) args in + + (* Create a new environment which maps the parameter names to + * the args. + *) + let env = + let params = + try List.combine params args + with Invalid_argument _ -> + failwithf "%a: calling goal %s with wrong number of arguments, expecting %d args but got %d args" + Ast.string_loc loc debug_goal + (List.length params) (List.length args) in + List.fold_left (fun env (k, v) -> Ast.Env.add k v env) env params in + + (* Create the node. *) + let node, dag = + add_node dag ?parent (Goal (env, loc, name, args, goal, + extra_deps, debug_goal)) in + + (* Add all dependencies. *) + add_targets dag ~parent:node env (deps @ extra_deps) + +(* Find the goal which matches the given tactic and add it. + * cargs is a list of parameters (all constants). + *) +and add_tactic dag ?parent env loc tactic cargs = + (* This is used to print the tactic in debug and error messages only. *) + let debug_tactic = + Ast.string_expr () + (Ast.ETacticCtor (loc, tactic, + List.map (fun c -> Ast.EConstant (loc, c)) cargs)) in + Cmdline.debug "%a: adding tactic %s" Ast.string_loc loc debug_tactic; + + (* Find all goals in the environment. Returns a list of (name, goal). *) + let goals = + let env = Ast.Env.bindings env in + filter_map + (function + | name, Ast.EGoalDefn (loc, goal) -> Some (name, goal) + | _ -> None) env in + + (* Find all patterns. Returns a list of (pattern, name, goal). *) + let patterns : (Ast.pattern * Ast.id * Ast.goal) list = + List.flatten + (List.map (fun (name, ((_, patterns, _, _) as goal)) -> + List.map (fun pattern -> (pattern, name, goal)) patterns) goals) in + + (* Find any patterns (ie. tactics) which match the one we + * are searching for. This returns a binding for the goal args, + * so we end up with a list of (pattern, name, goal, args). + *) + let patterns : (Ast.pattern * Ast.id * Ast.goal * Ast.expr list) list = + filter_map ( + fun (pattern, name, ((params, _, _, _) as goal)) -> + match matching_pattern env loc tactic cargs pattern params with + | None -> None + | Some args -> Some (pattern, name, goal, args) + ) patterns in + + match patterns with + | [] -> + (* There's no matching goal, but we don't need one if + * the tactic doesn't need to be rebuilt. So create a + * special Exists node which will be used to run the tactic + * to see if the target needs to be rebuilt, and only fail + * if it does need a rebuild. + *) + let targs = List.map (function Ast.CString s -> [Ast.SString s]) cargs in + let p = Ast.PTactic (loc, tactic, targs) in + let _, dag = add_node dag ?parent (Exists (env, loc, p, debug_tactic)) in + dag + + | [_, name, goal, args] -> + (* Single goal matches. *) + add_goal dag ?parent env loc name args goal [] + + | goals -> + (* Two or more goals match. Only one must have a CODE section, + * and we combine the dependencies into a "supergoal". + *) + let with_code, without_code = + List.partition ( + fun (_, _, (_, _, _, code), _) -> code <> None + ) goals in + + let (_, name, goal, args), extra_deps = + match with_code with + | [g] -> + let extra_deps = + List.flatten ( + List.map (fun (_, _, (_, _, deps, _), _) -> deps) without_code + ) in + (g, extra_deps) + + | [] -> + (* This is OK, it means we'll rebuild all dependencies + * but there is no code to run. Pick the first goal + * without code and the dependencies from the other goals. + *) + let g = List.hd without_code in + let extra_deps = + List.flatten ( + List.map (fun (_, _, (_, _, deps, _), _) -> deps) + (List.tl without_code) + ) in + (g, extra_deps) + + | _ :: _ -> + failwithf "%a: multiple goals found which match tactic %s, but more than one of these goals have {code} sections which is not allowed" + Ast.string_loc loc debug_tactic in + + add_goal dag ?parent env loc name args goal extra_deps + +(* Test if pattern matches *tactic(cargs). If it does + * then we return Some args where args is the arguments that must + * be passed to the matching goal. The params parameter is + * the names of the parameters of that goal. + *) +and matching_pattern env loc tactic cargs pattern params = + match pattern with + | Ast.PTactic (loc, ttactic, targs) + when tactic <> ttactic || + List.length cargs <> List.length targs -> + None (* Can't possibly match if tactic name or #args is different. *) + | Ast.PTactic (loc, ttactic, targs) -> + (* Do the args match with a possible params binding? *) + try Some (matching_params env loc params targs cargs) + with Not_found -> None + +(* Return a possible binding. For example the goal is: + * goal compile (name) = "%name.o": "%name.c" {} + * which means that params = ["name"] and targs = ["%name.o"]. + * + * If we are called with cargs = ["file1.o"], we would + * return ["file1"]. + * + * On non-matching this raises Not_found. + *) +and matching_params env loc params targs cargs = + (* This is going to record the resulting binding. *) + let res = ref Ast.Env.empty in + List.iter2 (matching_param env loc params res) targs cargs; + + (* Rearrange the result into goal parameter order. Also this + * checks that every parameter got a binding. + *) + let res = !res in + List.map ( + (* Allow the Not_found exception to escape if no binding for this param. *) + fun param -> Ast.Env.find param res + ) params + +(* If targ = "%name.o" and carg = "file.o" then this would set + * name => "file" in !res. If they don't match, raises Not_found. + *) +and matching_param env loc params res targ carg = + match carg with + | Ast.CString carg -> + (* Substitute any non parameters in targ from the environment. *) + let targ = + List.map ( + function + | Ast.SString _ as s -> s + | Ast.SVar name -> + if not (List.mem name params) then ( + try + let expr = Ast.getvar env loc name in + match Eval.to_constant env expr with + | Ast.CString s -> Ast.SString s + with Failure _ -> raise Not_found + ) + else + Ast.SVar name + ) targ in + + (* Do the actual pattern matching. Any remaining SVar elements + * must refer to goal parameters. + *) + let carg = ref carg in + let rec loop = function + | [] -> + (* End of targ, we must have matched all of carg. *) + if !carg <> "" then raise Not_found + | Ast.SString s :: rest -> + (* Does this match the first part of !carg? *) + let clen = String.length !carg in + let slen = String.length s in + if slen > clen || s <> String.sub !carg 0 slen then + raise Not_found; + (* Yes, so continue after the matching prefix. *) + carg := String.sub !carg slen (clen-slen); + loop rest + | Ast.SVar name :: Ast.SString s :: rest -> + (* This is a goal parameter. Find s later in !carg. *) + let i = string_find !carg s in + if i = -1 then raise Not_found; + (* Set the binding in !res. *) + let r = Ast.EConstant (Ast.noloc, + Ast.CString (String.sub !carg 0 i)) in + res := Ast.Env.add name r !res; + (* Continue after the match. *) + let skip = i + String.length s in + carg := String.sub !carg skip (String.length !carg - skip); + loop rest + | Ast.SVar name :: [] -> + (* Matches the whole remainder of the string. *) + let r = Ast.EConstant (Ast.noloc, Ast.CString !carg) in + res := Ast.Env.add name r !res + | Ast.SVar x :: Ast.SVar y :: _ -> + (* TODO! We cannot match a target like "%x%y". *) + assert false + in + loop targ + +type goal_runner = + Ast.env -> Ast.loc -> string -> Ast.expr list -> Ast.goal -> + Ast.expr list -> string -> unit + +type exists_runner = Ast.env -> Ast.loc -> Ast.pattern -> string -> unit + +type state = { + dag : dag; + goal_runner : goal_runner; + exists_runner : exists_runner; + + (* Topologically sorted in build order. When nodes start running + * we take them off this list. + *) + mutable sorted_nodes : node list; + + (* List of nodes which completed successfully. Actually for fast + * access we store a map node -> true. + *) + mutable complete : bool G.t; +} + +let new_state (dag, sorted_nodes) goal_runner exists_runner = + { dag; goal_runner; exists_runner; sorted_nodes; complete = G.empty } + +(* Called by [Jobs] when a node completes successfully. We mark + * it as done. + *) +let retire_job state node = + state.complete <- G.add node true state.complete + +let rec next_job state = + (* Find the earliest node in the list which has all its + * dependencies done. + *) + let rec loop acc = function + | [] -> + if state.sorted_nodes = [] then Jobs.Complete else Jobs.Not_ready + | node :: nodes -> + if node_is_ready_to_run state node then ( + state.sorted_nodes <- List.rev acc @ nodes; + match node with + | Goal (env, loc, name, args, goal, extra_deps, debug_goal) -> + Jobs.Job (node, fun () -> + state.goal_runner env loc name args goal + extra_deps debug_goal) + | Exists (env, loc, p, debug_tactic) -> + Jobs.Job (node, fun () -> + state.exists_runner env loc p debug_tactic) + ) + else + loop (node :: acc) nodes + in + loop [] state.sorted_nodes + +and node_is_ready_to_run { dag; complete } node = + let parents = try G.find node dag.edges with Not_found -> [] in + List.for_all (fun p -> G.mem p complete) parents + +let string_of_job = string_of_node diff --git a/src/deps.mli b/src/deps.mli new file mode 100644 index 0000000..7cdcdab --- /dev/null +++ b/src/deps.mli @@ -0,0 +1,57 @@ +(* Goalfile dependency DAG + * Copyright (C) 2020 Richard W.M. Jones + * Copyright (C) 2020 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + *) + +type t +(** The type of the directed acyclic graph of dependencies. *) + +val create : Ast.env -> Ast.expr list -> t +(** [create env roots] constructs the DAG of dependencies starting + with the roots, returning the final structure or raising + [Failure _] if there is a problem. *) + +type state +(** State held when running jobs. *) + +type goal_runner = + Ast.env -> Ast.loc -> string -> Ast.expr list -> Ast.goal -> + Ast.expr list -> string -> unit +(** Run a single goal. *) + +type exists_runner = Ast.env -> Ast.loc -> Ast.pattern -> string -> unit +(** Run an existence tactic. *) + +val new_state : t -> goal_runner -> exists_runner -> state +(** Create a new state object from the DAG. + + [goal_runner] is a function for running single goals. + [exists_runner] is a function for running existence tactics. + See the {!Run} module. *) + +type node + +val retire_job : state -> node -> unit +(** See {!Jobs.run}. *) + +val next_job : state -> node Jobs.next +(** Returns the next job that is able to run. (See {!Jobs.run}). + + It is always guaranteed that the next job which is returned + has already had all its dependencies retired already. *) + +val string_of_job : node -> string diff --git a/src/jobs.ml b/src/jobs.ml index cdf4e29..085d466 100644 --- a/src/jobs.ml +++ b/src/jobs.ml @@ -19,216 +19,71 @@ open Utils -module type Key = sig - type t - val compare : t -> t -> int - val to_string : t -> string -end +type 'a next = Job of 'a * (unit -> unit) | Complete | Not_ready -module type Jobs = sig - type key - type group - val new_group : unit -> group - val start : group -> key -> (unit -> unit) -> unit - val wait : group -> unit - val stop_all : unit -> unit -end +type 'a retire = 'a -> unit -module Make (K : Key) = struct - type key = K.t +type 'a to_string = 'a -> string - type state = Waiting | Running | Done - type job = { - mutable state : state; - f : unit -> unit; (* The function to run the job. *) - mutable exn : exn option; (* If the job raised an exception. *) - } +let run next_job retire_job string_of_job = + (* Number of running threads <= Cmdline.nr_jobs. *) + let running = ref 0 in - type queue = { - (* Lock preventing multiple jobs with the same key from - * running at the same time. - *) - q_lock : Mutex.t; - mutable q : job list; (* List of jobs on this queue. *) - } - let new_queue () = { q_lock = Mutex.create (); q = [] } + (* Lock and condition for when a thread exits. *) + let lock = Mutex.create () and cond = Condition.create () in - (* All of the shared state below is protected by this lock that - * you must hold before using any of it. - *) - let lock = Mutex.create () + (* If a job throws an exception it is saved here. *) + let last_exn = ref None in - (* Jobs are queued on separate queues according to their key. - * qs is a map of the key to the list of jobs in that queue. - *) - module Qs = Map.Make (K) - let qs = ref Qs.empty + (* This is the background thread which runs each job. *) + let runner (job, f) = + let exn = try f (); None with exn -> Some exn in - (* Threads which are idle wait on this condition. This is - * signalled when: - * - a new job is added (idle threads may be able to run it) - * - a job finishes (idle threads may be able to run another - * job which has the same key as the one which finished) - *) - let idle = Condition.create () - - (* Threads which are running or idle but NOT waiting. This - * starts as one because the main thread is running. A thread - * which is waiting is essentially blocking another job which - * could run, so we should start a new thread. A thread which - * is idle on the other hand is not blocking anything from - * running, it's idle because there is nothing that can be run. - * - * We aim to keep this <= Cmdline.nr_jobs. - *) - let ready = ref 1 - - (* If stop_all is called, this is set to true and we stop - * running new jobs. - *) - let stop = ref false - - (* The worker thread. *) - let rec worker _ = - let id = Thread.id (Thread.self ()) in - Mutex.lock lock; - incr ready; - while not !stop && !ready <= Cmdline.nr_jobs () do - (* See if there's any queue with a job which is ready to run. *) - Cmdline.debug "thread %d: checking for a runnable queue" id; - match get_runnable_queue () with - | None -> - (* Nothing that we can run, go idle. This also drops - * the lock so other threads can examine the queue. - *) - Cmdline.debug "thread %d: idle" id; - Condition.wait idle lock; - | Some q -> - (* Note that q.q_lock is now held by this thread, and q.q - * is non-empty. Pick the job off the head of this queue. - *) - let job = List.hd q.q in - q.q <- List.tl q.q; - - (* Run the job, dropping the main lock while running. *) - job.state <- Running; - Mutex.unlock lock; - Cmdline.debug "thread %d: running job" id; - let exn = try job.f (); None with exn -> Some exn in - Cmdline.debug "thread %d: finished job" id; - Mutex.lock lock; - job.state <- Done; - job.exn <- exn; - (* Since we have finished a job, it may be that other - * idle threads could now run (if a job with the same - * key is waiting). - *) - Mutex.unlock q.q_lock; - Condition.broadcast idle - done; - decr ready; - Mutex.unlock lock - - (* Check all the queues to see if there is any job which can run. - * The lock must be held when calling this function. This - * locks the queue if it finds one. - *) - and get_runnable_queue () = - try - let qs = List.map snd (Qs.bindings !qs) in - Some (List.find is_runnable_queue qs) - with - Not_found -> None - - (* Return true iff the queue contains jobs and no existing job - * from this queue is already running. This locks the queue - * if it returns true. - *) - and is_runnable_queue = function - | { q = [] } -> false - | { q_lock } -> Mutex.try_lock q_lock - - (* A group is simply a list of jobs. *) - type group = job list ref - let new_group () = ref [] - - (* Submit a new job. *) - let start group key f = - let id = Thread.id (Thread.self ()) in - Cmdline.debug "thread %d: submitting new job" id; Mutex.lock lock; - let job = { state = Waiting; f; exn = None } in - group := job :: !group; - - (* Put the job on the queue associated with this key. *) - let q = - try Qs.find key !qs - with Not_found -> - let q = new_queue () in (* Allocate a new queue for this key. *) - qs := Qs.add key q !qs; - q in - q.q <- q.q @ [job]; - - (* Wake up any idle threads. *) - Condition.signal idle; + (match exn with + | None -> retire_job job + | Some exn -> last_exn := Some exn + ); + decr running; + Condition.signal cond; Mutex.unlock lock - - (* Wait for all jobs in the group to be done. *) - let rec wait group = - let id = Thread.id (Thread.self ()) in - Mutex.lock lock; - while not !stop && not (all_done group); do - decr ready; - (* Start more threads if fewer than nr_jobs threads are ready. *) - let needed = Cmdline.nr_jobs () - !ready in - if not !stop && needed > 0 then - ignore (Array.init needed (Thread.create worker)); - - Cmdline.debug "thread %d: waiting for group to complete" id; - Condition.wait idle lock; - incr ready - done; - Mutex.unlock lock; - - (* If any job in the group raised an exception, we re-raise it here. - * We can only reraise the first exception though so if there are - * multiple failures then the exceptions are lost, but that doesn't - * really matter as long as goals exits with an error. Note that if - * we are being called from a worker, then the exception we raise - * here may be saved and reraised in another wait. - *) - List.iter ( - fun job -> - match job.exn with - | None -> () - | Some exn -> - Cmdline.debug "thread %d: raising exception in wait: %s" - id (Printexc.to_string exn); - stop := true; - raise exn - ) !group; - - (* It can happen that we didn't finish all jobs, especially if - * the stop flag was set in another thread. In this case we - * shouldn't just return as if everything is fine because it - * will cause the job to continue. Instead make sure we - * raise an error in this case. - *) - if not (all_done group) then - failwithf "job cancelled because of earlier error" - - (* Test if all jobs in a group are done. Note you must hold - * the lock. - *) - and all_done group = List.for_all (fun { state } -> state = Done) !group - - let stop_all () = - Mutex.lock lock; - (* All threads will exit after running jobs if this is set. *) - stop := true; - while !ready > 1 do - Condition.wait idle lock; - done; - Mutex.unlock lock - -end + in + + let rec loop () = + if !last_exn = None then ( + match next_job () with + | Complete -> + if !running > 0 then ( + Cmdline.debug "%d/%d threads running, waiting for completion" + !running (Cmdline.nr_jobs ()); + Condition.wait cond lock; + loop () + ) + | Not_ready -> + assert (!running > 0); + Cmdline.debug "%d/%d threads running, waiting for dependencies" + !running (Cmdline.nr_jobs ()); + (* Wait for any running thread to finish. *) + Condition.wait cond lock; + loop () + | Job (job, f) -> + incr running; + ignore (Thread.create runner (job, f)); + (* If we've reached the limit on number of threads, wait + * for any running thread to finish. + *) + while !running >= Cmdline.nr_jobs () do + Condition.wait cond lock + done; + loop () + ) + in + Mutex.lock lock; + loop (); + let exn = !last_exn in + Mutex.unlock lock; + + (* Re-raise the saved exception from the job which failed. *) + match exn with + | None -> () + | Some exn -> raise exn diff --git a/src/jobs.mli b/src/jobs.mli index afafbbd..8aedbba 100644 --- a/src/jobs.mli +++ b/src/jobs.mli @@ -17,55 +17,29 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. *) -(** This module manages parallel jobs. +(** This module manages parallel jobs. *) - Jobs are grouped. You call [new_group] to create a new - group of jobs, initially empty. Then add jobs to it. Then - wait for all the jobs in the group to complete. +type 'a next = Job of 'a * (unit -> unit) | Complete | Not_ready - To submit a job to a group use [start group key f]. [group] - is an existing group of jobs to which this is added. [key] is - a key which ensures that two identical jobs cannot be running - at the same time (across all groups). If two or more jobs - with the same key are submitted then only one will run and - the others will wait until the first finishes, and then another - will be picked to run and so on. Jobs with different keys run - freely in parallel, assuming there are enough threads available - to run them. +type 'a retire = 'a -> unit - Goals uses the goal (name + parameters) as the key to - ensure you cannot have two jobs running at the same time - which would interfere with each other by trying to build - the same target. +type 'a to_string = 'a -> string - To wait for a group of jobs to complete, call [wait group]. - *) - -module type Key = sig - type t - val compare : t -> t -> int - val to_string : t -> string -end - -module type Jobs = sig - type key - type group - - val new_group : unit -> group - (** Create a new empty jobs group. *) +val run : (unit -> 'a next) -> 'a retire -> 'a to_string -> unit +(** [run next_job retire_job] runs jobs in parallel. [next_job] + is called to pick the next available job. [retire_job] is + called when a job finishes successfully. - val start : group -> key -> (unit -> unit) -> unit - (** [start group key f] submits a job to run in the background. - The [key] ensures that two jobs with the same key cannot run - at the same time (across all groups). *) + If [next_job] returns [Job f] then that function is started + (usually in a thread if -j N > 1). - val wait : group -> unit - (** [wait group] waits for all of the jobs in the group to finish. *) + If [next_job] returns [Complete] then [run] waits until + all parallel jobs are then returns. - val stop_all : unit -> unit - (** This is used when goals exits with an error. All jobs which - are waiting to run are deleted, and we wait for all running - jobs to finish. *) -end + If [next_job] returns [Not_ready] then [next_job] will be + called again after a little while. -module Make (K : Key) : Jobs with type key = K.t + If any job throws an exception then the exception will be + reraised by [run], usually causing goals to exit with an error. + The exception is delayed until all currently running jobs + finish, but no new jobs will be started during this time. *) diff --git a/src/main.ml b/src/main.ml index 9890dba..ba4a195 100644 --- a/src/main.ml +++ b/src/main.ml @@ -77,13 +77,19 @@ let main () = if Cmdline.debug_flag () then Ast.print_env stderr env; - (* Run the target expressions. *) - Run.run_targets_to_completion env targets + (* Construct the dependency DAG with the root(s) being the targets. *) + let dag = Deps.create env targets in + + (* Run the jobs. *) + let state = Deps.new_state dag Run.goal_runner Run.exists_runner in + let next_job () = Deps.next_job state in + let retire_job job = Deps.retire_job state job in + let string_of_job job = Deps.string_of_job job in + Jobs.run next_job retire_job string_of_job let () = try main () with | Failure msg | Sys_error msg -> - Run.stop_all (); prerr_endline ("*** error: " ^ msg); exit 1 diff --git a/src/run.ml b/src/run.ml index 2df305b..2d4e326 100644 --- a/src/run.ml +++ b/src/run.ml @@ -21,125 +21,10 @@ open Printf open Utils -(* Goals uses the goal (name + parameters) as the key to - * ensure you cannot have two jobs running at the same time - * which would interfere with each other by trying to build - * the same target. - *) -module Jobs = Jobs.Make ( - struct - type t = string * Ast.expr list - let compare = compare - let to_string (name, args) = - sprintf "%s (%s)" name - (String.concat ", " (List.map (Ast.string_expr ()) args)) - end -) - -let stop_all = Jobs.stop_all - -(* Starts the target expressions running and waits for them to complete. *) -let rec run_targets_to_completion env exprs = - let group = Jobs.new_group () in - run_targets group env exprs; - Jobs.wait group - -(* This starts the targets, adding them to the jobs group, but does not - * wait for them to complete. - *) -and run_targets group env exprs = - List.iter (run_target group env) exprs - -(* This starts a single target, adding the (usually single but can - * be multiple) jobs to the jobs group. It does not wait for the - * jobs to complete. - *) -and run_target group env = function - | Ast.EGoalDefn _ | Ast.EFuncDefn _ | Ast.ETacticDefn _ -> assert false - - (* Call a goal or function. *) - | Ast.ECall (loc, name, args) -> - let expr = Ast.getvar env loc name in - (match expr with - | Ast.EGoalDefn (_, goal) -> - let key = name, args in - Jobs.start group key ( - fun () -> run_goal env loc name args goal [] - ) - | Ast.EFuncDefn (_, func) -> - let expr = Eval.call_function env loc name args func in - run_target group env expr - | _ -> - failwithf "%a: tried to call ‘%s’ which is not a goal or a function" - Ast.string_loc loc name - ) - - (* Call a tactic. *) - | Ast.ETacticCtor (loc, name, args) -> - (* All parameters of tactics must be simple constant expressions - * (strings, in future booleans, numbers, etc). - *) - let args = List.map (Eval.to_constant env) args in - run_tactic group env loc name args - - (* If this is a goal then it's the same as calling goal(). If not - * then look up the variable and substitute it. - *) - | Ast.EVar (loc, name) -> - let expr = Ast.getvar env loc name in - (match expr with - | Ast.EGoalDefn (loc, ([], _, _, _)) -> - run_target group env (Ast.ECall (loc, name, [])) - | EGoalDefn _ -> - failwithf "%a: cannot call %s() since this goal has parameters" - Ast.string_loc loc name - | _ -> - run_target group env expr - ) - - (* Lists are inlined when found as a target. *) - | Ast.EList (loc, exprs) -> - run_targets group env exprs - - (* A string (with or without substitutions) implies *file(filename). *) - | Ast.ESubsts (loc, str) -> - let str = Eval.substitute env loc str in - run_tactic group env loc "*file" [Ast.CString str] - - | Ast.EConstant (loc, c) -> - run_tactic group env loc "*file" [c] - -(* Run a goal by name. *) -and run_goal env loc name args (params, patterns, deps, code) extra_deps = - (* This is used to print the goal in debug and error messages only. *) - let debug_goal = - sprintf "%s (%s)" name - (String.concat ", " (List.map (Ast.string_expr ()) args)) in +let rec goal_runner env loc name args + (params, patterns, deps, code) extra_deps debug_goal = Cmdline.debug "%a: running goal %s" Ast.string_loc loc debug_goal; - (* This is the point where we evaluate the goal arguments. We must - * do this before creating the new environment, because variables - * appearing in goal arguments don't refer to goal parameters. - *) - let args = List.map (Eval.evaluate_goal_arg env) args in - - (* Create a new environment which maps the parameter names to - * the args. - *) - let env = - let params = - try List.combine params args - with Invalid_argument _ -> - failwithf "%a: calling goal %s with wrong number of arguments, expecting %d args but got %d args" - Ast.string_loc loc debug_goal - (List.length params) (List.length args) in - List.fold_left (fun env (k, v) -> Ast.Env.add k v env) env params in - - (* Check all dependencies have been updated. We must wait - * for these to complete before we can continue. - *) - run_targets_to_completion env (deps @ extra_deps); - (* Check if any target (ie. pattern) needs to be rebuilt. * As with make, a goal with no targets is always run. *) @@ -230,198 +115,9 @@ and needs_rebuild env loc deps extra_deps pattern = else failwithf "tactic ‘%s’ failed with exit code %d" tactic r -(* Find the goal which matches the given tactic and start it. - * cargs is a list of parameters (all constants). - *) -and run_tactic group env loc tactic cargs = - (* This is used to print the tactic in debug and error messages only. *) - let debug_tactic = - Ast.string_expr () - (Ast.ETacticCtor (loc, tactic, - List.map (fun c -> Ast.EConstant (loc, c)) cargs)) in - Cmdline.debug "%a: running tactic %s" Ast.string_loc loc debug_tactic; - - (* Find all goals in the environment. Returns a list of (name, goal). *) - let goals = - let env = Ast.Env.bindings env in - filter_map - (function - | name, Ast.EGoalDefn (loc, goal) -> Some (name, goal) - | _ -> None) env in +and exists_runner env loc p debug_tactic = + Cmdline.debug "%a: running implicit existence rule for tactic %s" + Ast.string_loc loc debug_tactic; - (* Find all patterns. Returns a list of (pattern, name, goal). *) - let patterns : (Ast.pattern * Ast.id * Ast.goal) list = - List.flatten - (List.map (fun (name, ((_, patterns, _, _) as goal)) -> - List.map (fun pattern -> (pattern, name, goal)) patterns) goals) in - - (* Find any patterns (ie. tactics) which match the one we - * are searching for. This returns a binding for the goal args, - * so we end up with a list of (pattern, name, goal, args). - *) - let patterns : (Ast.pattern * Ast.id * Ast.goal * Ast.expr list) list = - filter_map ( - fun (pattern, name, ((params, _, _, _) as goal)) -> - match matching_pattern env loc tactic cargs pattern params with - | None -> None - | Some args -> Some (pattern, name, goal, args) - ) patterns in - - match patterns with - | [] -> - (* There's no matching goal, but we don't need one if - * the tactic doesn't need to be rebuilt. - *) - let targs = List.map (function Ast.CString s -> [Ast.SString s]) cargs in - let p = Ast.PTactic (loc, tactic, targs) in - if needs_rebuild env loc [] [] p then - failwithf "%a: don't know how to build %s" - Ast.string_loc loc debug_tactic - - | [_, name, goal, args] -> - (* Single goal matches, run it. *) - let key = name, args in - Jobs.start group key ( - fun () -> run_goal env loc name args goal [] - ) - - | goals -> - (* Two or more goals match. Only one must have a CODE section, - * and we combine the dependencies into a "supergoal". - *) - let with_code, without_code = - List.partition ( - fun (_, _, (_, _, _, code), _) -> code <> None - ) goals in - - let (_, name, goal, args), extra_deps = - match with_code with - | [g] -> - let extra_deps = - List.flatten ( - List.map (fun (_, _, (_, _, deps, _), _) -> deps) without_code - ) in - (g, extra_deps) - - | [] -> - (* This is OK, it means we'll rebuild all dependencies - * but there is no code to run. Pick the first goal - * without code and the dependencies from the other goals. - *) - let g = List.hd without_code in - let extra_deps = - List.flatten ( - List.map (fun (_, _, (_, _, deps, _), _) -> deps) - (List.tl without_code) - ) in - (g, extra_deps) - - | _ :: _ -> - failwithf "%a: multiple goals found which match tactic %s, but more than one of these goals have {code} sections which is not allowed" - Ast.string_loc loc debug_tactic in - - let key = name, args in - Jobs.start group key (fun () -> - run_goal env loc name args goal extra_deps - ) - -(* Test if pattern matches *tactic(cargs). If it does - * then we return Some args where args is the arguments that must - * be passed to the matching goal. The params parameter is - * the names of the parameters of that goal. - *) -and matching_pattern env loc tactic cargs pattern params = - match pattern with - | Ast.PTactic (loc, ttactic, targs) - when tactic <> ttactic || - List.length cargs <> List.length targs -> - None (* Can't possibly match if tactic name or #args is different. *) - | Ast.PTactic (loc, ttactic, targs) -> - (* Do the args match with a possible params binding? *) - try Some (matching_params env loc params targs cargs) - with Not_found -> None - -(* Return a possible binding. For example the goal is: - * goal compile (name) = "%name.o": "%name.c" {} - * which means that params = ["name"] and targs = ["%name.o"]. - * - * If we are called with cargs = ["file1.o"], we would - * return ["file1"]. - * - * On non-matching this raises Not_found. - *) -and matching_params env loc params targs cargs = - (* This is going to record the resulting binding. *) - let res = ref Ast.Env.empty in - List.iter2 (matching_param env loc params res) targs cargs; - - (* Rearrange the result into goal parameter order. Also this - * checks that every parameter got a binding. - *) - let res = !res in - List.map ( - (* Allow the Not_found exception to escape if no binding for this param. *) - fun param -> Ast.Env.find param res - ) params - -(* If targ = "%name.o" and carg = "file.o" then this would set - * name => "file" in !res. If they don't match, raises Not_found. - *) -and matching_param env loc params res targ carg = - match carg with - | Ast.CString carg -> - (* Substitute any non parameters in targ from the environment. *) - let targ = - List.map ( - function - | Ast.SString _ as s -> s - | Ast.SVar name -> - if not (List.mem name params) then ( - try - let expr = Ast.getvar env loc name in - match Eval.to_constant env expr with - | Ast.CString s -> Ast.SString s - with Failure _ -> raise Not_found - ) - else - Ast.SVar name - ) targ in - - (* Do the actual pattern matching. Any remaining SVar elements - * must refer to goal parameters. - *) - let carg = ref carg in - let rec loop = function - | [] -> - (* End of targ, we must have matched all of carg. *) - if !carg <> "" then raise Not_found - | Ast.SString s :: rest -> - (* Does this match the first part of !carg? *) - let clen = String.length !carg in - let slen = String.length s in - if slen > clen || s <> String.sub !carg 0 slen then - raise Not_found; - (* Yes, so continue after the matching prefix. *) - carg := String.sub !carg slen (clen-slen); - loop rest - | Ast.SVar name :: Ast.SString s :: rest -> - (* This is a goal parameter. Find s later in !carg. *) - let i = string_find !carg s in - if i = -1 then raise Not_found; - (* Set the binding in !res. *) - let r = Ast.EConstant (Ast.noloc, - Ast.CString (String.sub !carg 0 i)) in - res := Ast.Env.add name r !res; - (* Continue after the match. *) - let skip = i + String.length s in - carg := String.sub !carg skip (String.length !carg - skip); - loop rest - | Ast.SVar name :: [] -> - (* Matches the whole remainder of the string. *) - let r = Ast.EConstant (Ast.noloc, Ast.CString !carg) in - res := Ast.Env.add name r !res - | Ast.SVar x :: Ast.SVar y :: _ -> - (* TODO! We cannot match a target like "%x%y". *) - assert false - in - loop targ + if needs_rebuild env loc [] [] p then + failwithf "%a: don't know how to build %s" Ast.string_loc loc debug_tactic diff --git a/src/run.mli b/src/run.mli index 4251235..ec26096 100644 --- a/src/run.mli +++ b/src/run.mli @@ -17,12 +17,15 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. *) -val run_targets_to_completion : Ast.env -> Ast.expr list -> unit -(** This drives evaluation of the list of target expressions (in - parallel) until they are complete or we reach an error. The - expressions are either a list of dependencies and/or a list of - initial targets. *) +val goal_runner : Deps.goal_runner +(** Run a single goal. *) -val stop_all : unit -> unit -(** Wait until all running jobs finish, and don't start any new ones. - See [Jobs.stop_all]. *) +val exists_runner : Deps.exists_runner +(** Run the implicit existence tactic. + + This is used when we find a tactic like *foo(...) but there + is no matching goal. We run (in this callback) the associated + tactic code. As long as it runs successfully, not returning 99 or + any error value, then we're OK - the tactic doesn't need rebuilding + so the dependency is satisfied. However if it returns 99 (needs + rebuild) or an error then we have to exit with an error. *) diff --git a/tests/test1.gl b/tests/test1.gl index 85335ce..44c15a2 100644 --- a/tests/test1.gl +++ b/tests/test1.gl @@ -6,6 +6,8 @@ let foo = "bar" goal all = : "file1.o", *file("file2.o") +goal clean = { } + goal compile (name) = "%name.o" : "%name.c" { echo %< "->" %@ -- 1.8.3.1