1 (* Goalfile dependency DAG
2 * Copyright (C) 2020 Richard W.M. Jones
3 * Copyright (C) 2020 Red Hat Inc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 | Goal of Ast.env * Ast.loc * string * Ast.expr list *
26 Ast.goal * Ast.expr list * string
27 | Exists of Ast.env * Ast.loc * Ast.pattern * string
29 let string_of_node = function
30 | Goal (_, _, _, _, _, _, debug_goal) -> debug_goal
31 | Exists (_, _, _, debug_pred) -> debug_pred
33 let loc_of_node = function
34 | Goal (_, loc, _, _, _, _, _)
35 | Exists (_, loc, _, _) -> loc
37 let compare_nodes n1 n2 =
39 | Goal _, Exists _ -> -1
40 | Exists _, Goal _ -> 1
41 | Exists (_, _, p1, _), Exists (_, _, p2, _) -> compare p1 p2
42 | Goal (_, _, n1, a1, _, _, _), Goal (_, _, n2, a2, _, _, _) ->
43 compare (n1, a1) (n2, a2)
48 let compare = compare_nodes
58 (* Edges are stored as an adjacency list, which is a map from
59 * a parent node to a list of child nodes. Note that as the
60 * graph does not need to be connected, there may be nodes
61 * in the list above which don't appear in this map.
63 edges : node list G.t;
66 type t = dag * node list
67 (* The final type is a DAG and a topologically sorted list of nodes. *)
69 (* Creates a new DAG. *)
70 let rec new_dag () = { nodes = []; edges = G.empty }
72 (* This will create a new node, unless the node already exists.
73 * If the optional parent parameter is given then it also creates
74 * an edge from parent to the new (or existing) node.
76 and add_node { nodes; edges } ?parent data =
78 try List.find (fun n -> compare_nodes n data = 0) nodes, nodes
79 with Not_found -> data, data :: nodes in
84 let children = try G.find parent edges with Not_found -> [] in
85 if List.mem node children then edges
87 (* We are going to add an edge from parent -> node. To
88 * ensure we don't make a cycle, check there is no path
89 * already from node -> parent.
91 (match exists_path node parent edges with
94 let loc = loc_of_node data in
95 failwithf "%a: dependency cycle: %s -> %s"
97 (String.concat " -> " (List.map string_of_node nodes))
100 G.add parent (node :: children) edges
102 node, { nodes; edges }
104 (* Is there a path from n1 to n2 in edges?
105 * Returns [None] if no path, or [Some nodes] if there is a path.
107 and exists_path n1 n2 edges =
108 if compare_nodes n1 n2 = 0 then Some [n2]
110 let children = try G.find n1 edges with Not_found -> [] in
113 fun n -> exists_path n n2 edges
117 | Some nodes -> Some (n1 :: nodes)
120 (* This is Khan's algorithm. *)
121 and topological_sort dag =
122 let incoming_map = incoming_map dag in
124 (* Set of all nodes with no incoming edge. *)
125 let q = List.filter (fun node -> not (G.mem node incoming_map)) dag.nodes in
127 let rec loop dag acc im = function
130 let acc = node :: acc in
131 let children = try G.find node dag.edges with Not_found -> [] in
134 fun (dag, q, im) child ->
135 (* There's an arrow from node to child. *)
138 List.filter (fun n -> compare_nodes n node <> 0) dag.nodes;
139 edges = remove_edge dag.edges node child } in
140 let im = remove_edge im child node in
141 let q = if not (G.mem child im) then child :: q else q in
143 ) (dag, q, im) children in
146 let dag, acc = loop dag [] incoming_map q in
148 if not (G.is_empty dag.edges) then raise Cycle_found;
150 (* This builds the topological list in reverse order, but that's
151 * fine because that is the running order.
155 (* The dag structure has an adjacency list, which is a list of outgoing
156 * edges from each node. But for a topological sort what we actually
157 * need is another list of incoming edges, so construct that first.
159 * Note this never returns a mapping node -> [].
161 and incoming_map { edges } =
162 let im = ref G.empty in
164 fun parent children ->
167 (* There is an arrow from parent -> c. *)
168 let xs = try G.find c !im with Not_found -> [] in
169 im := G.add c (parent :: xs) !im
174 (* Remove edge from parent to child returning a new edges map.
175 * Preserves the invariant that there is never a mapping node -> [].
177 and remove_edge edges parent child =
179 let children = G.find parent edges in
181 List.filter (fun n -> compare_nodes n child <> 0) children in
182 if children = [] then
183 G.remove parent edges
185 G.add parent children edges
189 and debug_dag { nodes; edges } =
191 List.iter (fun node -> eprintf " %s\n" (string_of_node node)) nodes;
194 fun parent children ->
195 eprintf " %s ->" (string_of_node parent);
196 List.iter (fun c -> eprintf " %s" (string_of_node c)) children;
200 let rec create env roots =
201 let dag = new_dag () in
202 let dag = add_targets dag env roots in
203 if Cmdline.debug_flag () then debug_dag dag;
204 (* Make actually breaks cycles, but I'm not convinced that this
205 * is a good idea, so this function will fail if any cycle is
206 * found. We may wish to revisit this decision in future.
209 try topological_sort dag
210 with Cycle_found -> failwithf "dependency graph contains cycles" in
211 if Cmdline.debug_flag () then
212 eprintf "dependency order:\n %s\n"
213 (String.concat " <- " (List.map string_of_node sorted));
216 and add_targets dag ?parent env roots =
217 List.fold_left (fun dag root -> add_target dag ?parent env root) dag roots
219 and add_target dag ?parent env = function
220 | Ast.EGoalDefn _ | Ast.EFuncDefn _ | Ast.EPredDefn _ -> assert false
222 (* Call a goal or function. *)
223 | Ast.ECall (loc, name, args) ->
224 let expr = Ast.getvar env loc name in
226 | Ast.EGoalDefn (_, goal) ->
227 add_goal dag ?parent env loc name args goal []
228 | Ast.EFuncDefn (_, func) ->
229 let expr = Eval.call_function env loc name args func in
230 add_target dag ?parent env expr
232 failwithf "%a: tried to call ā%sā which is not a goal or a function"
233 Ast.string_loc loc name
236 (* Call a predicate. *)
237 | Ast.EPredCtor (loc, name, args) ->
238 (* All parameters of predicates must be simple constant expressions
239 * (strings, in future booleans, numbers, etc).
241 let args = List.map (Eval.to_constant env) args in
242 add_pred dag ?parent env loc name args
244 (* If this is a goal then it's the same as calling goal(). If not
245 * then look up the variable and substitute it.
247 | Ast.EVar (loc, name) ->
248 let expr = Ast.getvar env loc name in
250 | Ast.EGoalDefn (loc, ([], _, _, _)) ->
251 add_target dag ?parent env (Ast.ECall (loc, name, []))
253 failwithf "%a: cannot call %s() since this goal has parameters"
254 Ast.string_loc loc name
256 add_target dag ?parent env expr
259 (* Lists are inlined when found as a target. *)
260 | Ast.EList (loc, exprs) ->
261 add_targets dag ?parent env exprs
263 (* A string (with or without substitutions) implies is-file(filename). *)
264 | Ast.ESubsts (loc, str) ->
265 let str = Eval.substitute env loc str in
266 add_pred dag ?parent env loc "is-file" [Ast.CString str]
268 | Ast.EConstant (loc, c) ->
269 add_pred dag ?parent env loc "is-file" [c]
271 (* Add a goal by name. *)
272 and add_goal dag ?parent env loc name args
273 ((params, patterns, deps, code) as goal)
275 (* This is used to print the goal in debug and error messages only. *)
277 sprintf "%s (%s)" name
278 (String.concat ", " (List.map (Ast.string_expr ()) args)) in
279 Cmdline.debug "%a: adding goal %s" Ast.string_loc loc debug_goal;
281 (* This is the point where we evaluate the goal arguments. We must
282 * do this before creating the new environment, because variables
283 * appearing in goal arguments don't refer to goal parameters.
285 let args = List.map (Eval.evaluate_goal_arg env) args in
287 (* Create a new environment which maps the parameter names to
292 try List.combine params args
293 with Invalid_argument _ ->
294 failwithf "%a: calling goal %s with wrong number of arguments, expecting %d args but got %d args"
295 Ast.string_loc loc debug_goal
296 (List.length params) (List.length args) in
297 List.fold_left (fun env (k, v) -> Ast.Env.add k v env) env params in
299 (* Create the node. *)
301 add_node dag ?parent (Goal (env, loc, name, args, goal,
302 extra_deps, debug_goal)) in
304 (* Add all dependencies. *)
305 add_targets dag ~parent:node env (deps @ extra_deps)
307 (* Find the goal which matches the given predicate and add it.
308 * cargs is a list of parameters (all constants).
310 and add_pred dag ?parent env loc pred cargs =
311 (* This is used to print the predicate in debug and error messages only. *)
314 (Ast.EPredCtor (loc, pred,
315 List.map (fun c -> Ast.EConstant (loc, c)) cargs)) in
316 Cmdline.debug "%a: adding predicate %s" Ast.string_loc loc debug_pred;
318 (* Find all goals in the environment. Returns a list of (name, goal). *)
320 let env = Ast.Env.bindings env in
323 | name, Ast.EGoalDefn (loc, goal) -> Some (name, goal)
326 (* Find all patterns. Returns a list of (pattern, name, goal). *)
327 let patterns : (Ast.pattern * Ast.id * Ast.goal) list =
329 (List.map (fun (name, ((_, patterns, _, _) as goal)) ->
330 List.map (fun pattern -> (pattern, name, goal)) patterns) goals) in
332 (* Find any patterns (ie. predicates) which match the one we
333 * are searching for. This returns a binding for the goal args,
334 * so we end up with a list of (pattern, name, goal, args).
336 let patterns : (Ast.pattern * Ast.id * Ast.goal * Ast.expr list) list =
338 fun (pattern, name, ((params, _, _, _) as goal)) ->
339 match matching_pattern env loc pred cargs pattern params with
341 | Some args -> Some (pattern, name, goal, args)
346 (* There's no matching goal, but we don't need one if
347 * the predicate doesn't need to be rebuilt. So create a
348 * special Exists node which will be used to run the predicate
349 * to see if the target needs to be rebuilt, and only fail
350 * if it does need a rebuild.
352 let targs = List.map (function Ast.CString s -> [Ast.SString s]) cargs in
353 let p = Ast.PPred (loc, pred, targs) in
354 let _, dag = add_node dag ?parent (Exists (env, loc, p, debug_pred)) in
357 | [_, name, goal, args] ->
358 (* Single goal matches. *)
359 add_goal dag ?parent env loc name args goal []
362 (* Two or more goals match. Only one must have a CODE section,
363 * and we combine the dependencies into a "supergoal".
365 let with_code, without_code =
367 fun (_, _, (_, _, _, code), _) -> code <> None
370 let (_, name, goal, args), extra_deps =
375 List.map (fun (_, _, (_, _, deps, _), _) -> deps) without_code
380 (* This is OK, it means we'll rebuild all dependencies
381 * but there is no code to run. Pick the first goal
382 * without code and the dependencies from the other goals.
384 let g = List.hd without_code in
387 List.map (fun (_, _, (_, _, deps, _), _) -> deps)
388 (List.tl without_code)
393 failwithf "%a: multiple goals found which match predicate %s, but more than one of these goals have {code} sections which is not allowed"
394 Ast.string_loc loc debug_pred in
396 add_goal dag ?parent env loc name args goal extra_deps
398 (* Test if pattern matches is-predicate(cargs). If it does
399 * then we return Some args where args is the arguments that must
400 * be passed to the matching goal. The params parameter is
401 * the names of the parameters of that goal.
403 and matching_pattern env loc pred cargs pattern params =
405 | Ast.PPred (loc, tpred, targs)
406 when pred <> tpred ||
407 List.length cargs <> List.length targs ->
408 None (* Can't possibly match if predicate name or #args is different. *)
409 | Ast.PPred (loc, tpred, targs) ->
410 (* Do the args match with a possible params binding? *)
411 try Some (matching_params env loc params targs cargs)
412 with Not_found -> None
414 (* Return a possible binding. For example the goal is:
415 * goal compile (name) = "%name.o": "%name.c" {}
416 * which means that params = ["name"] and targs = ["%name.o"].
418 * If we are called with cargs = ["file1.o"], we would
421 * On non-matching this raises Not_found.
423 and matching_params env loc params targs cargs =
424 (* This is going to record the resulting binding. *)
425 let res = ref Ast.Env.empty in
426 List.iter2 (matching_param env loc params res) targs cargs;
428 (* Rearrange the result into goal parameter order. Also this
429 * checks that every parameter got a binding.
433 (* Allow the Not_found exception to escape if no binding for this param. *)
434 fun param -> Ast.Env.find param res
437 (* If targ = "%name.o" and carg = "file.o" then this would set
438 * name => "file" in !res. If they don't match, raises Not_found.
440 and matching_param env loc params res targ carg =
442 | Ast.CString carg ->
443 (* Substitute any non parameters in targ from the environment. *)
447 | Ast.SString _ as s -> s
449 if not (List.mem name params) then (
451 let expr = Ast.getvar env loc name in
452 match Eval.to_constant env expr with
453 | Ast.CString s -> Ast.SString s
454 with Failure _ -> raise Not_found
460 (* Do the actual pattern matching. Any remaining SVar elements
461 * must refer to goal parameters.
463 let carg = ref carg in
464 let rec loop = function
466 (* End of targ, we must have matched all of carg. *)
467 if !carg <> "" then raise Not_found
468 | Ast.SString s :: rest ->
469 (* Does this match the first part of !carg? *)
470 let clen = String.length !carg in
471 let slen = String.length s in
472 if slen > clen || s <> String.sub !carg 0 slen then
474 (* Yes, so continue after the matching prefix. *)
475 carg := String.sub !carg slen (clen-slen);
477 | Ast.SVar name :: Ast.SString s :: rest ->
478 (* This is a goal parameter. Find s later in !carg. *)
479 let i = string_find !carg s in
480 if i = -1 then raise Not_found;
481 (* Set the binding in !res. *)
482 let r = Ast.EConstant (Ast.noloc,
483 Ast.CString (String.sub !carg 0 i)) in
484 res := Ast.Env.add name r !res;
485 (* Continue after the match. *)
486 let skip = i + String.length s in
487 carg := String.sub !carg skip (String.length !carg - skip);
489 | Ast.SVar name :: [] ->
490 (* Matches the whole remainder of the string. *)
491 let r = Ast.EConstant (Ast.noloc, Ast.CString !carg) in
492 res := Ast.Env.add name r !res
493 | Ast.SVar x :: Ast.SVar y :: _ ->
494 (* TODO! We cannot match a target like "%x%y". *)
500 Ast.env -> Ast.loc -> string -> Ast.expr list -> Ast.goal ->
501 Ast.expr list -> string -> unit
503 type exists_runner = Ast.env -> Ast.loc -> Ast.pattern -> string -> unit
507 goal_runner : goal_runner;
508 exists_runner : exists_runner;
510 (* Topologically sorted in build order. When nodes start running
511 * we take them off this list.
513 mutable sorted_nodes : node list;
515 (* List of nodes which completed successfully. Actually for fast
516 * access we store a map node -> true.
518 mutable complete : bool G.t;
520 (* List of nodes which failed. *)
521 mutable failed : bool G.t;
524 let new_state (dag, sorted_nodes) goal_runner exists_runner =
525 { dag; goal_runner; exists_runner; sorted_nodes;
526 complete = G.empty; failed = G.empty }
528 (* Called by [Jobs] when a node completes successfully. We mark
531 let retire_job state node =
532 state.complete <- G.add node true state.complete
534 (* Called by [Jobs] when a node fails. We mark the node as
535 * failed and ensure that anything that depends on it will
536 * also be marked as failed (and never returned by next_job).
538 let fail_job state node =
539 state.failed <- G.add node true state.failed
541 let rec next_job state =
542 (* Find the earliest node in the list which has all its
545 let rec loop acc = function
547 if state.sorted_nodes = [] then Jobs.Complete else Jobs.Not_ready
548 | node :: nodes when node_is_ready_to_run state node ->
549 (* Drop the node from the list of jobs and run it. *)
550 state.sorted_nodes <- List.rev acc @ nodes;
552 | Goal (env, loc, name, args, goal, extra_deps, debug_goal) ->
553 Jobs.Job (node, fun () ->
554 state.goal_runner env loc name args goal
555 extra_deps debug_goal)
556 | Exists (env, loc, p, debug_pred) ->
557 Jobs.Job (node, fun () ->
558 state.exists_runner env loc p debug_pred)
560 | node :: nodes when node_failed state node ->
561 (* Mark it as failed also, and drop it from the list of jobs. *)
563 state.sorted_nodes <- List.rev acc @ nodes;
566 (* All dependencies of this node are neither complete nor failed,
567 * continue down the list.
569 loop (node :: acc) nodes
571 loop [] state.sorted_nodes
573 (* All dependencies of this node are complete. *)
574 and node_is_ready_to_run { dag; complete } node =
575 let parents = try G.find node dag.edges with Not_found -> [] in
576 List.for_all (fun p -> G.mem p complete) parents
578 (* This node or any dependency of this node failed. *)
579 and node_failed { dag; failed } node =
580 G.mem node failed || (
581 let parents = try G.find node dag.edges with Not_found -> [] in
582 List.exists (fun p -> G.mem p failed) parents
585 let string_of_job = string_of_node