1 (* Goalfile dependency DAG
2 * Copyright (C) 2020 Richard W.M. Jones
3 * Copyright (C) 2020 Red Hat Inc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 | Goal of Ast.env * Ast.loc * string * Ast.expr list *
26 Ast.goal * Ast.expr list * string
27 | Exists of Ast.env * Ast.loc * Ast.pattern * string
29 let string_of_node = function
30 | Goal (_, _, _, _, _, _, debug_goal) -> debug_goal
31 | Exists (_, _, _, debug_pred) -> debug_pred
33 let loc_of_node = function
34 | Goal (_, loc, _, _, _, _, _)
35 | Exists (_, loc, _, _) -> loc
37 let compare_nodes n1 n2 =
39 | Goal _, Exists _ -> -1
40 | Exists _, Goal _ -> 1
41 | Exists (_, _, p1, _), Exists (_, _, p2, _) -> compare p1 p2
42 | Goal (_, _, n1, a1, _, _, _), Goal (_, _, n2, a2, _, _, _) ->
43 compare (n1, a1) (n2, a2)
48 let compare = compare_nodes
58 (* Edges are stored as an adjacency list, which is a map from
59 * a parent node to a list of child nodes. Note that as the
60 * graph does not need to be connected, there may be nodes
61 * in the list above which don't appear in this map.
63 edges : node list G.t;
66 type t = dag * node list
67 (* The final type is a DAG and a topologically sorted list of nodes. *)
69 (* Creates a new DAG. *)
70 let rec new_dag () = { nodes = []; edges = G.empty }
72 (* This will create a new node, unless the node already exists.
73 * If the optional parent parameter is given then it also creates
74 * an edge from parent to the new (or existing) node.
76 and add_node { nodes; edges } ?parent data =
78 try List.find (fun n -> compare_nodes n data = 0) nodes, nodes
79 with Not_found -> data, data :: nodes in
84 let children = try G.find parent edges with Not_found -> [] in
85 if List.mem node children then edges
87 (* We are going to add an edge from parent -> node. To
88 * ensure we don't make a cycle, check there is no path
89 * already from node -> parent.
91 if exists_path node parent edges then (
92 let loc = loc_of_node data in
93 failwithf "%a: adding %s creates a dependency cycle"
94 Ast.string_loc loc (string_of_node data)
96 G.add parent (node :: children) edges
98 node, { nodes; edges }
100 (* Is there a path from n1 to n2 in edges? *)
101 and exists_path n1 n2 edges =
102 if compare_nodes n1 n2 = 0 then true
104 let children = try G.find n1 edges with Not_found -> [] in
105 List.exists (fun n -> exists_path n n2 edges) children
108 (* This is Khan's algorithm. *)
109 and topological_sort dag =
110 let incoming_map = incoming_map dag in
112 (* Set of all nodes with no incoming edge. *)
113 let q = List.filter (fun node -> not (G.mem node incoming_map)) dag.nodes in
115 let rec loop dag acc im = function
118 let acc = node :: acc in
119 let children = try G.find node dag.edges with Not_found -> [] in
122 fun (dag, q, im) child ->
123 (* There's an arrow from node to child. *)
126 List.filter (fun n -> compare_nodes n node <> 0) dag.nodes;
127 edges = remove_edge dag.edges node child } in
128 let im = remove_edge im child node in
129 let q = if not (G.mem child im) then child :: q else q in
131 ) (dag, q, im) children in
134 let dag, acc = loop dag [] incoming_map q in
136 if not (G.is_empty dag.edges) then raise Cycle_found;
138 (* This builds the topological list in reverse order, but that's
139 * fine because that is the running order.
143 (* The dag structure has an adjacency list, which is a list of outgoing
144 * edges from each node. But for a topological sort what we actually
145 * need is another list of incoming edges, so construct that first.
147 * Note this never returns a mapping node -> [].
149 and incoming_map { edges } =
150 let im = ref G.empty in
152 fun parent children ->
155 (* There is an arrow from parent -> c. *)
156 let xs = try G.find c !im with Not_found -> [] in
157 im := G.add c (parent :: xs) !im
162 (* Remove edge from parent to child returning a new edges map.
163 * Preserves the invariant that there is never a mapping node -> [].
165 and remove_edge edges parent child =
167 let children = G.find parent edges in
169 List.filter (fun n -> compare_nodes n child <> 0) children in
170 if children = [] then
171 G.remove parent edges
173 G.add parent children edges
177 and debug_dag { nodes; edges } =
179 List.iter (fun node -> eprintf " %s\n" (string_of_node node)) nodes;
182 fun parent children ->
183 eprintf " %s ->" (string_of_node parent);
184 List.iter (fun c -> eprintf " %s" (string_of_node c)) children;
188 let rec create env roots =
189 let dag = new_dag () in
190 let dag = add_targets dag env roots in
191 if Cmdline.debug_flag () then debug_dag dag;
192 (* Make actually breaks cycles, but I'm not convinced that this
193 * is a good idea, so this function will fail if any cycle is
194 * found. We may wish to revisit this decision in future.
197 try topological_sort dag
198 with Cycle_found -> failwithf "dependency graph contains cycles" in
199 if Cmdline.debug_flag () then
200 eprintf "dependency order:\n %s\n"
201 (String.concat " <- " (List.map string_of_node sorted));
204 and add_targets dag ?parent env roots =
205 List.fold_left (fun dag root -> add_target dag ?parent env root) dag roots
207 and add_target dag ?parent env = function
208 | Ast.EGoalDefn _ | Ast.EFuncDefn _ | Ast.EPredDefn _ -> assert false
210 (* Call a goal or function. *)
211 | Ast.ECall (loc, name, args) ->
212 let expr = Ast.getvar env loc name in
214 | Ast.EGoalDefn (_, goal) ->
215 add_goal dag ?parent env loc name args goal []
216 | Ast.EFuncDefn (_, func) ->
217 let expr = Eval.call_function env loc name args func in
218 add_target dag ?parent env expr
220 failwithf "%a: tried to call ā%sā which is not a goal or a function"
221 Ast.string_loc loc name
224 (* Call a predicate. *)
225 | Ast.EPredCtor (loc, name, args) ->
226 (* All parameters of predicates must be simple constant expressions
227 * (strings, in future booleans, numbers, etc).
229 let args = List.map (Eval.to_constant env) args in
230 add_pred dag ?parent env loc name args
232 (* If this is a goal then it's the same as calling goal(). If not
233 * then look up the variable and substitute it.
235 | Ast.EVar (loc, name) ->
236 let expr = Ast.getvar env loc name in
238 | Ast.EGoalDefn (loc, ([], _, _, _)) ->
239 add_target dag ?parent env (Ast.ECall (loc, name, []))
241 failwithf "%a: cannot call %s() since this goal has parameters"
242 Ast.string_loc loc name
244 add_target dag ?parent env expr
247 (* Lists are inlined when found as a target. *)
248 | Ast.EList (loc, exprs) ->
249 add_targets dag ?parent env exprs
251 (* A string (with or without substitutions) implies is-file(filename). *)
252 | Ast.ESubsts (loc, str) ->
253 let str = Eval.substitute env loc str in
254 add_pred dag ?parent env loc "is-file" [Ast.CString str]
256 | Ast.EConstant (loc, c) ->
257 add_pred dag ?parent env loc "is-file" [c]
259 (* Add a goal by name. *)
260 and add_goal dag ?parent env loc name args
261 ((params, patterns, deps, code) as goal)
263 (* This is used to print the goal in debug and error messages only. *)
265 sprintf "%s (%s)" name
266 (String.concat ", " (List.map (Ast.string_expr ()) args)) in
267 Cmdline.debug "%a: adding goal %s" Ast.string_loc loc debug_goal;
269 (* This is the point where we evaluate the goal arguments. We must
270 * do this before creating the new environment, because variables
271 * appearing in goal arguments don't refer to goal parameters.
273 let args = List.map (Eval.evaluate_goal_arg env) args in
275 (* Create a new environment which maps the parameter names to
280 try List.combine params args
281 with Invalid_argument _ ->
282 failwithf "%a: calling goal %s with wrong number of arguments, expecting %d args but got %d args"
283 Ast.string_loc loc debug_goal
284 (List.length params) (List.length args) in
285 List.fold_left (fun env (k, v) -> Ast.Env.add k v env) env params in
287 (* Create the node. *)
289 add_node dag ?parent (Goal (env, loc, name, args, goal,
290 extra_deps, debug_goal)) in
292 (* Add all dependencies. *)
293 add_targets dag ~parent:node env (deps @ extra_deps)
295 (* Find the goal which matches the given predicate and add it.
296 * cargs is a list of parameters (all constants).
298 and add_pred dag ?parent env loc pred cargs =
299 (* This is used to print the predicate in debug and error messages only. *)
302 (Ast.EPredCtor (loc, pred,
303 List.map (fun c -> Ast.EConstant (loc, c)) cargs)) in
304 Cmdline.debug "%a: adding predicate %s" Ast.string_loc loc debug_pred;
306 (* Find all goals in the environment. Returns a list of (name, goal). *)
308 let env = Ast.Env.bindings env in
311 | name, Ast.EGoalDefn (loc, goal) -> Some (name, goal)
314 (* Find all patterns. Returns a list of (pattern, name, goal). *)
315 let patterns : (Ast.pattern * Ast.id * Ast.goal) list =
317 (List.map (fun (name, ((_, patterns, _, _) as goal)) ->
318 List.map (fun pattern -> (pattern, name, goal)) patterns) goals) in
320 (* Find any patterns (ie. predicates) which match the one we
321 * are searching for. This returns a binding for the goal args,
322 * so we end up with a list of (pattern, name, goal, args).
324 let patterns : (Ast.pattern * Ast.id * Ast.goal * Ast.expr list) list =
326 fun (pattern, name, ((params, _, _, _) as goal)) ->
327 match matching_pattern env loc pred cargs pattern params with
329 | Some args -> Some (pattern, name, goal, args)
334 (* There's no matching goal, but we don't need one if
335 * the predicate doesn't need to be rebuilt. So create a
336 * special Exists node which will be used to run the predicate
337 * to see if the target needs to be rebuilt, and only fail
338 * if it does need a rebuild.
340 let targs = List.map (function Ast.CString s -> [Ast.SString s]) cargs in
341 let p = Ast.PPred (loc, pred, targs) in
342 let _, dag = add_node dag ?parent (Exists (env, loc, p, debug_pred)) in
345 | [_, name, goal, args] ->
346 (* Single goal matches. *)
347 add_goal dag ?parent env loc name args goal []
350 (* Two or more goals match. Only one must have a CODE section,
351 * and we combine the dependencies into a "supergoal".
353 let with_code, without_code =
355 fun (_, _, (_, _, _, code), _) -> code <> None
358 let (_, name, goal, args), extra_deps =
363 List.map (fun (_, _, (_, _, deps, _), _) -> deps) without_code
368 (* This is OK, it means we'll rebuild all dependencies
369 * but there is no code to run. Pick the first goal
370 * without code and the dependencies from the other goals.
372 let g = List.hd without_code in
375 List.map (fun (_, _, (_, _, deps, _), _) -> deps)
376 (List.tl without_code)
381 failwithf "%a: multiple goals found which match predicate %s, but more than one of these goals have {code} sections which is not allowed"
382 Ast.string_loc loc debug_pred in
384 add_goal dag ?parent env loc name args goal extra_deps
386 (* Test if pattern matches is-predicate(cargs). If it does
387 * then we return Some args where args is the arguments that must
388 * be passed to the matching goal. The params parameter is
389 * the names of the parameters of that goal.
391 and matching_pattern env loc pred cargs pattern params =
393 | Ast.PPred (loc, tpred, targs)
394 when pred <> tpred ||
395 List.length cargs <> List.length targs ->
396 None (* Can't possibly match if predicate name or #args is different. *)
397 | Ast.PPred (loc, tpred, targs) ->
398 (* Do the args match with a possible params binding? *)
399 try Some (matching_params env loc params targs cargs)
400 with Not_found -> None
402 (* Return a possible binding. For example the goal is:
403 * goal compile (name) = "%name.o": "%name.c" {}
404 * which means that params = ["name"] and targs = ["%name.o"].
406 * If we are called with cargs = ["file1.o"], we would
409 * On non-matching this raises Not_found.
411 and matching_params env loc params targs cargs =
412 (* This is going to record the resulting binding. *)
413 let res = ref Ast.Env.empty in
414 List.iter2 (matching_param env loc params res) targs cargs;
416 (* Rearrange the result into goal parameter order. Also this
417 * checks that every parameter got a binding.
421 (* Allow the Not_found exception to escape if no binding for this param. *)
422 fun param -> Ast.Env.find param res
425 (* If targ = "%name.o" and carg = "file.o" then this would set
426 * name => "file" in !res. If they don't match, raises Not_found.
428 and matching_param env loc params res targ carg =
430 | Ast.CString carg ->
431 (* Substitute any non parameters in targ from the environment. *)
435 | Ast.SString _ as s -> s
437 if not (List.mem name params) then (
439 let expr = Ast.getvar env loc name in
440 match Eval.to_constant env expr with
441 | Ast.CString s -> Ast.SString s
442 with Failure _ -> raise Not_found
448 (* Do the actual pattern matching. Any remaining SVar elements
449 * must refer to goal parameters.
451 let carg = ref carg in
452 let rec loop = function
454 (* End of targ, we must have matched all of carg. *)
455 if !carg <> "" then raise Not_found
456 | Ast.SString s :: rest ->
457 (* Does this match the first part of !carg? *)
458 let clen = String.length !carg in
459 let slen = String.length s in
460 if slen > clen || s <> String.sub !carg 0 slen then
462 (* Yes, so continue after the matching prefix. *)
463 carg := String.sub !carg slen (clen-slen);
465 | Ast.SVar name :: Ast.SString s :: rest ->
466 (* This is a goal parameter. Find s later in !carg. *)
467 let i = string_find !carg s in
468 if i = -1 then raise Not_found;
469 (* Set the binding in !res. *)
470 let r = Ast.EConstant (Ast.noloc,
471 Ast.CString (String.sub !carg 0 i)) in
472 res := Ast.Env.add name r !res;
473 (* Continue after the match. *)
474 let skip = i + String.length s in
475 carg := String.sub !carg skip (String.length !carg - skip);
477 | Ast.SVar name :: [] ->
478 (* Matches the whole remainder of the string. *)
479 let r = Ast.EConstant (Ast.noloc, Ast.CString !carg) in
480 res := Ast.Env.add name r !res
481 | Ast.SVar x :: Ast.SVar y :: _ ->
482 (* TODO! We cannot match a target like "%x%y". *)
488 Ast.env -> Ast.loc -> string -> Ast.expr list -> Ast.goal ->
489 Ast.expr list -> string -> unit
491 type exists_runner = Ast.env -> Ast.loc -> Ast.pattern -> string -> unit
495 goal_runner : goal_runner;
496 exists_runner : exists_runner;
498 (* Topologically sorted in build order. When nodes start running
499 * we take them off this list.
501 mutable sorted_nodes : node list;
503 (* List of nodes which completed successfully. Actually for fast
504 * access we store a map node -> true.
506 mutable complete : bool G.t;
508 (* List of nodes which failed. *)
509 mutable failed : bool G.t;
512 let new_state (dag, sorted_nodes) goal_runner exists_runner =
513 { dag; goal_runner; exists_runner; sorted_nodes;
514 complete = G.empty; failed = G.empty }
516 (* Called by [Jobs] when a node completes successfully. We mark
519 let retire_job state node =
520 state.complete <- G.add node true state.complete
522 (* Called by [Jobs] when a node fails. We mark the node as
523 * failed and ensure that anything that depends on it will
524 * also be marked as failed (and never returned by next_job).
526 let fail_job state node =
527 state.failed <- G.add node true state.failed
529 let rec next_job state =
530 (* Find the earliest node in the list which has all its
533 let rec loop acc = function
535 if state.sorted_nodes = [] then Jobs.Complete else Jobs.Not_ready
536 | node :: nodes when node_is_ready_to_run state node ->
537 (* Drop the node from the list of jobs and run it. *)
538 state.sorted_nodes <- List.rev acc @ nodes;
540 | Goal (env, loc, name, args, goal, extra_deps, debug_goal) ->
541 Jobs.Job (node, fun () ->
542 state.goal_runner env loc name args goal
543 extra_deps debug_goal)
544 | Exists (env, loc, p, debug_pred) ->
545 Jobs.Job (node, fun () ->
546 state.exists_runner env loc p debug_pred)
548 | node :: nodes when node_failed state node ->
549 (* Mark it as failed also, and drop it from the list of jobs. *)
551 state.sorted_nodes <- List.rev acc @ nodes;
554 (* All dependencies of this node are neither complete nor failed,
555 * continue down the list.
557 loop (node :: acc) nodes
559 loop [] state.sorted_nodes
561 (* All dependencies of this node are complete. *)
562 and node_is_ready_to_run { dag; complete } node =
563 let parents = try G.find node dag.edges with Not_found -> [] in
564 List.for_all (fun p -> G.mem p complete) parents
566 (* This node or any dependency of this node failed. *)
567 and node_failed { dag; failed } node =
568 G.mem node failed || (
569 let parents = try G.find node dag.edges with Not_found -> [] in
570 List.exists (fun p -> G.mem p failed) parents
573 let string_of_job = string_of_node