1 (* 'top'-like tool for libvirt domains.
2 (C) Copyright 2007-2017 Richard W.M. Jones, Red Hat Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 module C = Libvirt.Connect
21 module D = Libvirt.Domain
28 (* Hook for XML support (see [opt_xml.ml]). *)
29 let parse_device_xml : (int -> [>`R] D.t -> string list * string list) ref =
34 (* Intermediate "domain + stats" structure that we use to collect
35 * everything we know about a domain within the collect function.
37 type rd_domain = Inactive | Active of rd_active
39 rd_domid : int; (* Domain ID. *)
40 rd_domuuid : Libvirt.uuid; (* Domain UUID. *)
41 rd_dom : [`R] D.t; (* Domain object. *)
42 rd_info : D.info; (* Domain CPU info now. *)
43 rd_block_stats : (string * D.block_stats) list;
44 (* Domain block stats now. *)
45 rd_interface_stats : (string * D.interface_stats) list;
46 (* Domain net stats now. *)
47 rd_prev_info : D.info option; (* Domain CPU info previously. *)
48 rd_prev_block_stats : (string * D.block_stats) list;
49 (* Domain block stats prev. *)
50 rd_prev_interface_stats : (string * D.interface_stats) list;
51 (* Domain interface stats prev. *)
52 (* The following are since the last slice, or 0 if cannot be calculated: *)
53 rd_cpu_time : float; (* CPU time used in nanoseconds. *)
54 rd_percent_cpu : float; (* CPU time as percent of total. *)
55 rd_mem_bytes : int64; (* Memory usage in bytes *)
56 rd_mem_percent: int64; (* Memory usage as percent of total *)
57 (* The following are since the last slice, or None if cannot be calc'd: *)
58 rd_block_rd_reqs : int64 option; (* Number of block device read rqs. *)
59 rd_block_wr_reqs : int64 option; (* Number of block device write rqs. *)
60 rd_block_rd_bytes : int64 option; (* Number of bytes block device read *)
61 rd_block_wr_bytes : int64 option; (* Number of bytes block device write *)
62 rd_net_rx_bytes : int64 option; (* Number of bytes received. *)
63 rd_net_tx_bytes : int64 option; (* Number of bytes transmitted. *)
67 rd_doms : (string * rd_domain) list; (* List of domains. *)
69 rd_printable_time : string;
72 rd_total_cpu_per_pcpu : float;
73 rd_totals : (int * int * int * int * int * int * int * int * int * float *
78 rd_pcpu_doms : (int * string * int *
79 Libvirt.Domain.vcpu_info array * int64 array array *
80 int64 array array * string * int) list;
81 rd_pcpu_pcpus : int64 array array array;
82 rd_pcpu_pcpus_cpu_time : float array
85 (* We cache the list of block devices and interfaces for each domain
86 * here, so we don't need to reparse the XML each time.
88 let devices = Hashtbl.create 13
90 (* Function to get the list of block devices, network interfaces for
91 * a particular domain. Get it from the devices cache, and if not
92 * there then parse the domain XML.
94 let get_devices id dom =
95 try Hashtbl.find devices id
97 let blkdevs, netifs = (!parse_device_xml) id dom in
98 Hashtbl.replace devices id (blkdevs, netifs);
101 (* We save the state of domains across redraws here, which allows us
102 * to deduce %CPU usage from the running total.
104 let last_info = Hashtbl.create 13
105 let last_time = ref (Unix.gettimeofday ())
107 (* Save pcpu_usages structures across redraws too (only for pCPU display). *)
108 let last_pcpu_usages = Hashtbl.create 13
110 let clear_pcpu_display_data () =
111 Hashtbl.clear last_pcpu_usages
113 (* What to get from virConnectGetAllDomainStats. *)
115 D.StatsState; D.StatsCpuTotal; D.StatsBalloon; D.StatsVcpu;
116 D.StatsInterface; D.StatsBlock
118 (* Which domains to get. Empty list means return all domains:
119 * active, inactive, persistent, transient etc.
123 let collect (conn, _, _, _, _, node_info, _, _) =
124 (* Number of physical CPUs (some may be disabled). *)
125 let nr_pcpus = C.maxcpus_of_node_info node_info in
127 (* Get the current time. *)
128 let time = Unix.gettimeofday () in
129 let tm = Unix.localtime time in
131 sprintf "%02d:%02d:%02d" tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec in
133 (* What's the total CPU time elapsed since we were last called? (ns) *)
134 let total_cpu_per_pcpu = 1_000_000_000. *. (time -. !last_time) in
135 (* Avoid division by zero. *)
136 let total_cpu_per_pcpu =
137 if total_cpu_per_pcpu <= 0. then 1. else total_cpu_per_pcpu in
138 let total_cpu = float node_info.C.cpus *. total_cpu_per_pcpu in
140 (* Get the domains. Match up with their last_info (if any). *)
142 let doms = D.get_all_domain_stats conn what who in
143 let doms = Array.to_list doms in
145 fun { D.dom_uuid = uuid; D.params = params } ->
146 let nr_params = Array.length params in
149 if i = nr_params then None
150 else if fst params.(i) = name then Some (snd params.(i))
155 let get_param_int name default =
156 match get_param name with
158 | Some (D.TypedFieldInt32 i)
159 | Some (D.TypedFieldUInt32 i) -> Some (Int32.to_int i)
160 | Some (D.TypedFieldInt64 i)
161 | Some (D.TypedFieldUInt64 i) -> Some (Int64.to_int i)
164 let get_param_int64 name default =
165 match get_param name with
167 | Some (D.TypedFieldInt32 i)
168 | Some (D.TypedFieldUInt32 i) -> Some (Int64.of_int32 i)
169 | Some (D.TypedFieldInt64 i)
170 | Some (D.TypedFieldUInt64 i) -> Some i
174 let dom = D.lookup_by_uuid conn uuid in
175 let id = D.get_id dom in
176 let name = D.get_name dom in
177 let state = get_param_int "state.state" None in
179 if state = Some 5 (* VIR_DOMAIN_SHUTOFF *) then
184 (* Synthesize a D.info struct out of the data we have
185 * from virConnectGetAllDomainStats. Doing this is an
186 * artifact from the old APIs we used to use to fetch
187 * stats, we could simplify here, and also return the
192 | None | Some 0 -> D.InfoNoState
193 | Some 1 -> D.InfoRunning
194 | Some 2 -> D.InfoBlocked
195 | Some 3 -> D.InfoPaused
196 | Some 4 -> D.InfoShutdown
197 | Some 5 -> D.InfoShutoff
198 | Some 6 -> D.InfoCrashed
199 | Some 7 -> D.InfoPaused (* XXX really VIR_DOMAIN_PMSUSPENDED *)
200 | _ -> D.InfoNoState in
202 match get_param_int64 "balloon.current" None with
206 match get_param_int "vcpu.current" None with
210 (* NB: libvirt does not return cpu.time for non-root domains. *)
211 match get_param_int64 "cpu.time" None with
216 max_mem = -1_L; (* not used anywhere in virt-top *)
218 nr_virt_cpu = nr_virt_cpu;
223 match get_param_int "block.count" None with
230 match get_param (sprintf "block.%d.name" i) with
231 | None -> sprintf "blk%d" i
232 | Some (D.TypedFieldString s) -> s
233 | _ -> assert false in
236 (match get_param_int64 (sprintf "block.%d.rd.reqs" i) None
237 with None -> 0_L | Some v -> v);
239 (match get_param_int64 (sprintf "block.%d.rd.bytes" i) None
240 with None -> 0_L | Some v -> v);
242 (match get_param_int64 (sprintf "block.%d.wr.reqs" i) None
243 with None -> 0_L | Some v -> v);
245 (match get_param_int64 (sprintf "block.%d.wr.bytes" i) None
246 with None -> 0_L | Some v -> v);
249 ) (range 0 (nr_block_devs-1)) in
251 let nr_interface_devs =
252 match get_param_int "net.count" None with
255 let interface_stats =
259 match get_param (sprintf "net.%d.name" i) with
260 | None -> sprintf "net%d" i
261 | Some (D.TypedFieldString s) -> s
262 | _ -> assert false in
265 (match get_param_int64 (sprintf "net.%d.rx.bytes" i) None
266 with None -> 0_L | Some v -> v);
268 (match get_param_int64 (sprintf "net.%d.rx.pkts" i) None
269 with None -> 0_L | Some v -> v);
271 (match get_param_int64 (sprintf "net.%d.rx.errs" i) None
272 with None -> 0_L | Some v -> v);
274 (match get_param_int64 (sprintf "net.%d.rx.drop" i) None
275 with None -> 0_L | Some v -> v);
277 (match get_param_int64 (sprintf "net.%d.tx.bytes" i) None
278 with None -> 0_L | Some v -> v);
280 (match get_param_int64 (sprintf "net.%d.tx.pkts" i) None
281 with None -> 0_L | Some v -> v);
283 (match get_param_int64 (sprintf "net.%d.tx.errs" i) None
284 with None -> 0_L | Some v -> v);
286 (match get_param_int64 (sprintf "net.%d.tx.drop" i) None
287 with None -> 0_L | Some v -> v);
289 ) (range 0 (nr_interface_devs-1)) in
291 let prev_info, prev_block_stats, prev_interface_stats =
293 let prev_info, prev_block_stats, prev_interface_stats =
294 Hashtbl.find last_info uuid in
295 Some prev_info, prev_block_stats, prev_interface_stats
296 with Not_found -> None, [], [] in
300 rd_domid = id; rd_domuuid = uuid; rd_dom = dom;
302 rd_block_stats = block_stats;
303 rd_interface_stats = interface_stats;
304 rd_prev_info = prev_info;
305 rd_prev_block_stats = prev_block_stats;
306 rd_prev_interface_stats = prev_interface_stats;
307 rd_cpu_time = 0.; rd_percent_cpu = 0.;
308 rd_mem_bytes = 0L; rd_mem_percent = 0L;
309 rd_block_rd_reqs = None; rd_block_wr_reqs = None;
310 rd_block_rd_bytes = None; rd_block_wr_bytes = None;
311 rd_net_rx_bytes = None; rd_net_tx_bytes = None;
316 (* Calculate the CPU time (ns) and %CPU used by each domain. *)
320 (* We have previous CPU info from which to calculate it? *)
321 | name, Active ({ rd_prev_info = Some prev_info } as rd) ->
323 Int64.to_float (rd.rd_info.D.cpu_time -^ prev_info.D.cpu_time) in
324 let percent_cpu = 100. *. cpu_time /. total_cpu in
325 let mem_usage = rd.rd_info.D.memory in
327 100L *^ rd.rd_info.D.memory /^ node_info.C.memory in
329 rd_cpu_time = cpu_time;
330 rd_percent_cpu = percent_cpu;
331 rd_mem_bytes = mem_usage;
332 rd_mem_percent = mem_percent} in
334 (* For all other domains we can't calculate it, so leave as 0 *)
338 (* Calculate the number of block device read/write requests across
339 * all block devices attached to a domain.
344 (* Do we have stats from the previous slice? *)
345 | name, Active ({ rd_prev_block_stats = ((_::_) as prev_block_stats) }
347 let block_stats = rd.rd_block_stats in (* stats now *)
349 (* Add all the devices together. Throw away device names. *)
350 let prev_block_stats =
351 sum_block_stats (List.map snd prev_block_stats) in
353 sum_block_stats (List.map snd block_stats) in
355 (* Calculate increase in read & write requests. *)
357 block_stats.D.rd_req -^ prev_block_stats.D.rd_req in
359 block_stats.D.wr_req -^ prev_block_stats.D.wr_req in
361 block_stats.D.rd_bytes -^ prev_block_stats.D.rd_bytes in
363 block_stats.D.wr_bytes -^ prev_block_stats.D.wr_bytes in
366 rd_block_rd_reqs = Some read_reqs;
367 rd_block_wr_reqs = Some write_reqs;
368 rd_block_rd_bytes = Some read_bytes;
369 rd_block_wr_bytes = Some write_bytes;
372 (* For all other domains we can't calculate it, so leave as None. *)
376 (* Calculate the same as above for network interfaces across
377 * all network interfaces attached to a domain.
382 (* Do we have stats from the previous slice? *)
383 | name, Active ({ rd_prev_interface_stats =
384 ((_::_) as prev_interface_stats) }
386 let interface_stats = rd.rd_interface_stats in (* stats now *)
388 (* Add all the devices together. Throw away device names. *)
389 let prev_interface_stats =
390 sum_interface_stats (List.map snd prev_interface_stats) in
391 let interface_stats =
392 sum_interface_stats (List.map snd interface_stats) in
394 (* Calculate increase in rx & tx bytes. *)
396 interface_stats.D.rx_bytes -^ prev_interface_stats.D.rx_bytes in
398 interface_stats.D.tx_bytes -^ prev_interface_stats.D.tx_bytes in
401 rd_net_rx_bytes = Some rx_bytes;
402 rd_net_tx_bytes = Some tx_bytes } in
404 (* For all other domains we can't calculate it, so leave as None. *)
408 (* Calculate totals. *)
411 fun (count, running, blocked, paused, shutdown, shutoff,
412 crashed, active, inactive,
413 total_cpu_time, total_memory, total_domU_memory) ->
415 | (name, Active rd) ->
416 let test state orig =
417 if rd.rd_info.D.state = state then orig+1 else orig
419 let running = test D.InfoRunning running in
420 let blocked = test D.InfoBlocked blocked in
421 let paused = test D.InfoPaused paused in
422 let shutdown = test D.InfoShutdown shutdown in
423 let shutoff = test D.InfoShutoff shutoff in
424 let crashed = test D.InfoCrashed crashed in
426 let total_cpu_time = total_cpu_time +. rd.rd_cpu_time in
427 let total_memory = total_memory +^ rd.rd_info.D.memory in
428 let total_domU_memory =
430 if rd.rd_domid > 0 then rd.rd_info.D.memory else 0L in
432 (count+1, running, blocked, paused, shutdown, shutoff,
433 crashed, active+1, inactive,
434 total_cpu_time, total_memory, total_domU_memory)
436 | (name, Inactive) -> (* inactive domain *)
437 (count+1, running, blocked, paused, shutdown, shutoff,
438 crashed, active, inactive+1,
439 total_cpu_time, total_memory, total_domU_memory)
440 ) (0,0,0,0,0,0,0,0,0, 0.,0L,0L) doms in
442 (* Update last_time, last_info. *)
444 Hashtbl.clear last_info;
448 let info = rd.rd_info, rd.rd_block_stats, rd.rd_interface_stats in
449 Hashtbl.add last_info rd.rd_domuuid info
455 rd_printable_time = printable_time;
456 rd_nr_pcpus = nr_pcpus;
457 rd_total_cpu = total_cpu;
458 rd_total_cpu_per_pcpu = total_cpu_per_pcpu;
461 (* Collect some extra information in PCPUDisplay display_mode. *)
462 let collect_pcpu { rd_doms = doms; rd_nr_pcpus = nr_pcpus } =
463 (* Get the VCPU info and VCPU->PCPU mappings for active domains.
464 * Also cull some data we don't care about.
469 | (name, Active rd) ->
471 let domid = rd.rd_domid in
472 let maplen = C.cpumaplen nr_pcpus in
473 let cpu_stats = D.get_cpu_stats rd.rd_dom in
475 (* Note the terminology is confusing.
477 * In libvirt, cpu_time is the total time (hypervisor +
478 * vCPU). vcpu_time is the time only taken by the vCPU,
479 * excluding time taken inside the hypervisor.
481 * For each pCPU, libvirt may return either "cpu_time"
482 * or "vcpu_time" or neither or both. This function
483 * returns an array pair [|cpu_time, vcpu_time|];
484 * if either is missing it is returned as 0.
486 let find_cpu_usages params =
487 let rec find_uint64_field name = function
488 | (n, D.TypedFieldUInt64 usage) :: _ when n = name ->
490 | _ :: params -> find_uint64_field name params
493 [| find_uint64_field "cpu_time" params;
494 find_uint64_field "vcpu_time" params |]
497 let pcpu_usages = Array.map find_cpu_usages cpu_stats in
498 let maxinfo = rd.rd_info.D.nr_virt_cpu in
499 let nr_vcpus, vcpu_infos, cpumaps =
500 D.get_vcpus rd.rd_dom maxinfo maplen in
502 (* Got previous pcpu_usages for this domain? *)
503 let prev_pcpu_usages =
504 try Some (Hashtbl.find last_pcpu_usages domid)
505 with Not_found -> None in
506 (* Update last_pcpu_usages. *)
507 Hashtbl.replace last_pcpu_usages domid pcpu_usages;
509 (match prev_pcpu_usages with
510 | Some prev_pcpu_usages
511 when Array.length prev_pcpu_usages = Array.length pcpu_usages ->
512 Some (domid, name, nr_vcpus, vcpu_infos, pcpu_usages,
513 prev_pcpu_usages, cpumaps, maplen)
514 | _ -> None (* ignore missing / unequal length prev_vcpu_infos *)
517 Libvirt.Virterror _ -> None (* ignore transient libvirt errors *)
519 | (_, Inactive) -> None (* ignore inactive doms *)
521 let nr_doms = List.length doms in
523 (* Rearrange the data into a matrix. Major axis (down) is
524 * pCPUs. Minor axis (right) is domains. At each node we store:
525 * cpu_time hypervisor + domain (on this pCPU only, nanosecs),
526 * vcpu_time domain only (on this pCPU only, nanosecs).
528 let make_3d_array dimx dimy dimz e =
529 Array.init dimx (fun _ -> Array.make_matrix dimy dimz e)
531 let pcpus = make_3d_array nr_pcpus nr_doms 2 0L in
534 fun di (domid, name, nr_vcpus, vcpu_infos, pcpu_usages,
535 prev_pcpu_usages, cpumaps, maplen) ->
536 (* Which pCPUs can this dom run on? *)
537 for p = 0 to Array.length pcpu_usages - 1 do
538 pcpus.(p).(di).(0) <-
539 pcpu_usages.(p).(0) -^ prev_pcpu_usages.(p).(0);
540 pcpus.(p).(di).(1) <-
541 pcpu_usages.(p).(1) -^ prev_pcpu_usages.(p).(1)
545 (* Sum the total CPU time used by each pCPU, for the %CPU column. *)
549 let cpu_time = ref 0L in
550 for di = 0 to Array.length row-1 do
551 let t = row.(di).(0) in
552 cpu_time := !cpu_time +^ t
554 Int64.to_float !cpu_time
557 { rd_pcpu_doms = doms;
558 rd_pcpu_pcpus = pcpus;
559 rd_pcpu_pcpus_cpu_time = pcpus_cpu_time }