1 (* Goalfile dependency DAG
2 * Copyright (C) 2020 Richard W.M. Jones
3 * Copyright (C) 2020 Red Hat Inc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 | Goal of Ast.env * Ast.loc * string * Ast.expr list *
26 Ast.goal * Ast.expr list * string
27 | Exists of Ast.env * Ast.loc * Ast.pattern * string
29 let string_of_node = function
30 | Goal (_, _, _, _, _, _, debug_goal) -> debug_goal
31 | Exists (_, _, _, debug_pred) -> debug_pred
33 let loc_of_node = function
34 | Goal (_, loc, _, _, _, _, _)
35 | Exists (_, loc, _, _) -> loc
37 let compare_nodes n1 n2 =
39 | Goal _, Exists _ -> -1
40 | Exists _, Goal _ -> 1
41 | Exists (_, _, p1, _), Exists (_, _, p2, _) -> compare p1 p2
42 | Goal (_, _, n1, a1, _, _, _), Goal (_, _, n2, a2, _, _, _) ->
43 compare (n1, a1) (n2, a2)
48 let compare = compare_nodes
58 (* Edges are stored as an adjacency list, which is a map from
59 * a parent node to a list of child nodes. Note that as the
60 * graph does not need to be connected, there may be nodes
61 * in the list above which don't appear in this map.
63 edges : node list G.t;
66 type t = dag * node list
67 (* The final type is a DAG and a topologically sorted list of nodes. *)
69 (* Creates a new DAG. *)
70 let rec new_dag () = { nodes = []; edges = G.empty }
72 (* This will create a new node, unless the node already exists.
73 * If the optional parent parameter is given then it also creates
74 * an edge from parent to the new (or existing) node.
76 and add_node { nodes; edges } ?parent data =
78 try List.find (fun n -> compare_nodes n data = 0) nodes, nodes
79 with Not_found -> data, data :: nodes in
84 let children = try G.find parent edges with Not_found -> [] in
85 if List.mem node children then edges
86 else G.add parent (node :: children) edges in
87 (* Doing this checks if we have added a cycle. There may be
88 * cheaper ways to do this, see:
89 * https://stackoverflow.com/questions/20246417/how-to-detect-if-adding-an-edge-to-a-directed-graph-results-in-a-cycle
91 let dag = { nodes; edges } in
92 (try ignore (topological_sort dag)
94 let loc = loc_of_node data in
95 failwithf "%a: adding %s creates a dependency cycle"
96 Ast.string_loc loc (string_of_node data)
100 (* This is Khan's algorithm. *)
101 and topological_sort dag =
102 let incoming_map = incoming_map dag in
104 (* Set of all nodes with no incoming edge. *)
105 let q = List.filter (fun node -> not (G.mem node incoming_map)) dag.nodes in
107 let rec loop dag acc im = function
110 let acc = node :: acc in
111 let children = try G.find node dag.edges with Not_found -> [] in
114 fun (dag, q, im) child ->
115 (* There's an arrow from node to child. *)
118 List.filter (fun n -> compare_nodes n node <> 0) dag.nodes;
119 edges = remove_edge dag.edges node child } in
120 let im = remove_edge im child node in
121 let q = if not (G.mem child im) then child :: q else q in
123 ) (dag, q, im) children in
126 let dag, acc = loop dag [] incoming_map q in
128 if not (G.is_empty dag.edges) then raise Cycle_found;
130 (* This builds the topological list in reverse order, but that's
131 * fine because that is the running order.
135 (* The dag structure has an adjacency list, which is a list of outgoing
136 * edges from each node. But for a topological sort what we actually
137 * need is another list of incoming edges, so construct that first.
139 * Note this never returns a mapping node -> [].
141 and incoming_map { edges } =
142 let im = ref G.empty in
144 fun parent children ->
147 (* There is an arrow from parent -> c. *)
148 let xs = try G.find c !im with Not_found -> [] in
149 im := G.add c (parent :: xs) !im
154 (* Remove edge from parent to child returning a new edges map.
155 * Preserves the invariant that there is never a mapping node -> [].
157 and remove_edge edges parent child =
159 let children = G.find parent edges in
161 List.filter (fun n -> compare_nodes n child <> 0) children in
162 if children = [] then
163 G.remove parent edges
165 G.add parent children edges
169 and debug_dag { nodes; edges } =
171 List.iter (fun node -> eprintf " %s\n" (string_of_node node)) nodes;
174 fun parent children ->
175 eprintf " %s ->" (string_of_node parent);
176 List.iter (fun c -> eprintf " %s" (string_of_node c)) children;
180 let rec create env roots =
181 let dag = new_dag () in
182 let dag = add_targets dag env roots in
183 if Cmdline.debug_flag () then debug_dag dag;
184 (* Make actually breaks cycles, but I'm not convinced that this
185 * is a good idea, so this function will fail if any cycle is
186 * found. We may wish to revisit this decision in future.
189 try topological_sort dag
190 with Cycle_found -> failwithf "dependency graph contains cycles" in
191 if Cmdline.debug_flag () then
192 eprintf "dependency order:\n %s\n"
193 (String.concat " <- " (List.map string_of_node sorted));
196 and add_targets dag ?parent env roots =
197 List.fold_left (fun dag root -> add_target dag ?parent env root) dag roots
199 and add_target dag ?parent env = function
200 | Ast.EGoalDefn _ | Ast.EFuncDefn _ | Ast.EPredDefn _ -> assert false
202 (* Call a goal or function. *)
203 | Ast.ECall (loc, name, args) ->
204 let expr = Ast.getvar env loc name in
206 | Ast.EGoalDefn (_, goal) ->
207 add_goal dag ?parent env loc name args goal []
208 | Ast.EFuncDefn (_, func) ->
209 let expr = Eval.call_function env loc name args func in
210 add_target dag ?parent env expr
212 failwithf "%a: tried to call ā%sā which is not a goal or a function"
213 Ast.string_loc loc name
216 (* Call a predicate. *)
217 | Ast.EPredCtor (loc, name, args) ->
218 (* All parameters of predicates must be simple constant expressions
219 * (strings, in future booleans, numbers, etc).
221 let args = List.map (Eval.to_constant env) args in
222 add_pred dag ?parent env loc name args
224 (* If this is a goal then it's the same as calling goal(). If not
225 * then look up the variable and substitute it.
227 | Ast.EVar (loc, name) ->
228 let expr = Ast.getvar env loc name in
230 | Ast.EGoalDefn (loc, ([], _, _, _)) ->
231 add_target dag ?parent env (Ast.ECall (loc, name, []))
233 failwithf "%a: cannot call %s() since this goal has parameters"
234 Ast.string_loc loc name
236 add_target dag ?parent env expr
239 (* Lists are inlined when found as a target. *)
240 | Ast.EList (loc, exprs) ->
241 add_targets dag ?parent env exprs
243 (* A string (with or without substitutions) implies is-file(filename). *)
244 | Ast.ESubsts (loc, str) ->
245 let str = Eval.substitute env loc str in
246 add_pred dag ?parent env loc "is-file" [Ast.CString str]
248 | Ast.EConstant (loc, c) ->
249 add_pred dag ?parent env loc "is-file" [c]
251 (* Add a goal by name. *)
252 and add_goal dag ?parent env loc name args
253 ((params, patterns, deps, code) as goal)
255 (* This is used to print the goal in debug and error messages only. *)
257 sprintf "%s (%s)" name
258 (String.concat ", " (List.map (Ast.string_expr ()) args)) in
259 Cmdline.debug "%a: adding goal %s" Ast.string_loc loc debug_goal;
261 (* This is the point where we evaluate the goal arguments. We must
262 * do this before creating the new environment, because variables
263 * appearing in goal arguments don't refer to goal parameters.
265 let args = List.map (Eval.evaluate_goal_arg env) args in
267 (* Create a new environment which maps the parameter names to
272 try List.combine params args
273 with Invalid_argument _ ->
274 failwithf "%a: calling goal %s with wrong number of arguments, expecting %d args but got %d args"
275 Ast.string_loc loc debug_goal
276 (List.length params) (List.length args) in
277 List.fold_left (fun env (k, v) -> Ast.Env.add k v env) env params in
279 (* Create the node. *)
281 add_node dag ?parent (Goal (env, loc, name, args, goal,
282 extra_deps, debug_goal)) in
284 (* Add all dependencies. *)
285 add_targets dag ~parent:node env (deps @ extra_deps)
287 (* Find the goal which matches the given predicate and add it.
288 * cargs is a list of parameters (all constants).
290 and add_pred dag ?parent env loc pred cargs =
291 (* This is used to print the predicate in debug and error messages only. *)
294 (Ast.EPredCtor (loc, pred,
295 List.map (fun c -> Ast.EConstant (loc, c)) cargs)) in
296 Cmdline.debug "%a: adding predicate %s" Ast.string_loc loc debug_pred;
298 (* Find all goals in the environment. Returns a list of (name, goal). *)
300 let env = Ast.Env.bindings env in
303 | name, Ast.EGoalDefn (loc, goal) -> Some (name, goal)
306 (* Find all patterns. Returns a list of (pattern, name, goal). *)
307 let patterns : (Ast.pattern * Ast.id * Ast.goal) list =
309 (List.map (fun (name, ((_, patterns, _, _) as goal)) ->
310 List.map (fun pattern -> (pattern, name, goal)) patterns) goals) in
312 (* Find any patterns (ie. predicates) which match the one we
313 * are searching for. This returns a binding for the goal args,
314 * so we end up with a list of (pattern, name, goal, args).
316 let patterns : (Ast.pattern * Ast.id * Ast.goal * Ast.expr list) list =
318 fun (pattern, name, ((params, _, _, _) as goal)) ->
319 match matching_pattern env loc pred cargs pattern params with
321 | Some args -> Some (pattern, name, goal, args)
326 (* There's no matching goal, but we don't need one if
327 * the predicate doesn't need to be rebuilt. So create a
328 * special Exists node which will be used to run the predicate
329 * to see if the target needs to be rebuilt, and only fail
330 * if it does need a rebuild.
332 let targs = List.map (function Ast.CString s -> [Ast.SString s]) cargs in
333 let p = Ast.PPred (loc, pred, targs) in
334 let _, dag = add_node dag ?parent (Exists (env, loc, p, debug_pred)) in
337 | [_, name, goal, args] ->
338 (* Single goal matches. *)
339 add_goal dag ?parent env loc name args goal []
342 (* Two or more goals match. Only one must have a CODE section,
343 * and we combine the dependencies into a "supergoal".
345 let with_code, without_code =
347 fun (_, _, (_, _, _, code), _) -> code <> None
350 let (_, name, goal, args), extra_deps =
355 List.map (fun (_, _, (_, _, deps, _), _) -> deps) without_code
360 (* This is OK, it means we'll rebuild all dependencies
361 * but there is no code to run. Pick the first goal
362 * without code and the dependencies from the other goals.
364 let g = List.hd without_code in
367 List.map (fun (_, _, (_, _, deps, _), _) -> deps)
368 (List.tl without_code)
373 failwithf "%a: multiple goals found which match predicate %s, but more than one of these goals have {code} sections which is not allowed"
374 Ast.string_loc loc debug_pred in
376 add_goal dag ?parent env loc name args goal extra_deps
378 (* Test if pattern matches is-predicate(cargs). If it does
379 * then we return Some args where args is the arguments that must
380 * be passed to the matching goal. The params parameter is
381 * the names of the parameters of that goal.
383 and matching_pattern env loc pred cargs pattern params =
385 | Ast.PPred (loc, tpred, targs)
386 when pred <> tpred ||
387 List.length cargs <> List.length targs ->
388 None (* Can't possibly match if predicate name or #args is different. *)
389 | Ast.PPred (loc, tpred, targs) ->
390 (* Do the args match with a possible params binding? *)
391 try Some (matching_params env loc params targs cargs)
392 with Not_found -> None
394 (* Return a possible binding. For example the goal is:
395 * goal compile (name) = "%name.o": "%name.c" {}
396 * which means that params = ["name"] and targs = ["%name.o"].
398 * If we are called with cargs = ["file1.o"], we would
401 * On non-matching this raises Not_found.
403 and matching_params env loc params targs cargs =
404 (* This is going to record the resulting binding. *)
405 let res = ref Ast.Env.empty in
406 List.iter2 (matching_param env loc params res) targs cargs;
408 (* Rearrange the result into goal parameter order. Also this
409 * checks that every parameter got a binding.
413 (* Allow the Not_found exception to escape if no binding for this param. *)
414 fun param -> Ast.Env.find param res
417 (* If targ = "%name.o" and carg = "file.o" then this would set
418 * name => "file" in !res. If they don't match, raises Not_found.
420 and matching_param env loc params res targ carg =
422 | Ast.CString carg ->
423 (* Substitute any non parameters in targ from the environment. *)
427 | Ast.SString _ as s -> s
429 if not (List.mem name params) then (
431 let expr = Ast.getvar env loc name in
432 match Eval.to_constant env expr with
433 | Ast.CString s -> Ast.SString s
434 with Failure _ -> raise Not_found
440 (* Do the actual pattern matching. Any remaining SVar elements
441 * must refer to goal parameters.
443 let carg = ref carg in
444 let rec loop = function
446 (* End of targ, we must have matched all of carg. *)
447 if !carg <> "" then raise Not_found
448 | Ast.SString s :: rest ->
449 (* Does this match the first part of !carg? *)
450 let clen = String.length !carg in
451 let slen = String.length s in
452 if slen > clen || s <> String.sub !carg 0 slen then
454 (* Yes, so continue after the matching prefix. *)
455 carg := String.sub !carg slen (clen-slen);
457 | Ast.SVar name :: Ast.SString s :: rest ->
458 (* This is a goal parameter. Find s later in !carg. *)
459 let i = string_find !carg s in
460 if i = -1 then raise Not_found;
461 (* Set the binding in !res. *)
462 let r = Ast.EConstant (Ast.noloc,
463 Ast.CString (String.sub !carg 0 i)) in
464 res := Ast.Env.add name r !res;
465 (* Continue after the match. *)
466 let skip = i + String.length s in
467 carg := String.sub !carg skip (String.length !carg - skip);
469 | Ast.SVar name :: [] ->
470 (* Matches the whole remainder of the string. *)
471 let r = Ast.EConstant (Ast.noloc, Ast.CString !carg) in
472 res := Ast.Env.add name r !res
473 | Ast.SVar x :: Ast.SVar y :: _ ->
474 (* TODO! We cannot match a target like "%x%y". *)
480 Ast.env -> Ast.loc -> string -> Ast.expr list -> Ast.goal ->
481 Ast.expr list -> string -> unit
483 type exists_runner = Ast.env -> Ast.loc -> Ast.pattern -> string -> unit
487 goal_runner : goal_runner;
488 exists_runner : exists_runner;
490 (* Topologically sorted in build order. When nodes start running
491 * we take them off this list.
493 mutable sorted_nodes : node list;
495 (* List of nodes which completed successfully. Actually for fast
496 * access we store a map node -> true.
498 mutable complete : bool G.t;
500 (* List of nodes which failed. *)
501 mutable failed : bool G.t;
504 let new_state (dag, sorted_nodes) goal_runner exists_runner =
505 { dag; goal_runner; exists_runner; sorted_nodes;
506 complete = G.empty; failed = G.empty }
508 (* Called by [Jobs] when a node completes successfully. We mark
511 let retire_job state node =
512 state.complete <- G.add node true state.complete
514 (* Called by [Jobs] when a node fails. We mark the node as
515 * failed and ensure that anything that depends on it will
516 * also be marked as failed (and never returned by next_job).
518 let fail_job state node =
519 state.failed <- G.add node true state.failed
521 let rec next_job state =
522 (* Find the earliest node in the list which has all its
525 let rec loop acc = function
527 if state.sorted_nodes = [] then Jobs.Complete else Jobs.Not_ready
528 | node :: nodes when node_is_ready_to_run state node ->
529 (* Drop the node from the list of jobs and run it. *)
530 state.sorted_nodes <- List.rev acc @ nodes;
532 | Goal (env, loc, name, args, goal, extra_deps, debug_goal) ->
533 Jobs.Job (node, fun () ->
534 state.goal_runner env loc name args goal
535 extra_deps debug_goal)
536 | Exists (env, loc, p, debug_pred) ->
537 Jobs.Job (node, fun () ->
538 state.exists_runner env loc p debug_pred)
540 | node :: nodes when node_failed state node ->
541 (* Mark it as failed also, and drop it from the list of jobs. *)
543 state.sorted_nodes <- List.rev acc @ nodes;
546 (* All dependencies of this node are neither complete nor failed,
547 * continue down the list.
549 loop (node :: acc) nodes
551 loop [] state.sorted_nodes
553 (* All dependencies of this node are complete. *)
554 and node_is_ready_to_run { dag; complete } node =
555 let parents = try G.find node dag.edges with Not_found -> [] in
556 List.for_all (fun p -> G.mem p complete) parents
558 (* This node or any dependency of this node failed. *)
559 and node_failed { dag; failed } node =
560 G.mem node failed || (
561 let parents = try G.find node dag.edges with Not_found -> [] in
562 List.exists (fun p -> G.mem p failed) parents
565 let string_of_job = string_of_node