1 (* 'top'-like tool for libvirt domains.
2 (C) Copyright 2007-2021 Richard W.M. Jones, Red Hat Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 module C = Libvirt.Connect
21 module D = Libvirt.Domain
28 (* Intermediate "domain + stats" structure that we use to collect
29 * everything we know about a domain within the collect function.
31 type rd_domain = Inactive | Active of rd_active
33 rd_domid : int; (* Domain ID. *)
34 rd_domuuid : Libvirt.uuid; (* Domain UUID. *)
35 rd_dom : [`R] D.t; (* Domain object. *)
36 rd_info : D.info; (* Domain CPU info now. *)
37 rd_block_stats : (string * D.block_stats) list;
38 (* Domain block stats now. *)
39 rd_interface_stats : (string * D.interface_stats) list;
40 (* Domain net stats now. *)
41 rd_prev_info : D.info option; (* Domain CPU info previously. *)
42 rd_prev_block_stats : (string * D.block_stats) list;
43 (* Domain block stats prev. *)
44 rd_prev_interface_stats : (string * D.interface_stats) list;
45 (* Domain interface stats prev. *)
46 (* The following are since the last slice, or 0 if cannot be calculated: *)
47 rd_cpu_time : float; (* CPU time used in nanoseconds. *)
48 rd_percent_cpu : float; (* CPU time as percent of total. *)
49 rd_mem_bytes : int64; (* Memory usage in bytes *)
50 rd_mem_percent: int64; (* Memory usage as percent of total *)
51 (* The following are since the last slice, or None if cannot be calc'd: *)
52 rd_block_rd_reqs : int64 option; (* Number of block device read rqs. *)
53 rd_block_wr_reqs : int64 option; (* Number of block device write rqs. *)
54 rd_block_rd_bytes : int64 option; (* Number of bytes block device read *)
55 rd_block_wr_bytes : int64 option; (* Number of bytes block device write *)
56 rd_net_rx_bytes : int64 option; (* Number of bytes received. *)
57 rd_net_tx_bytes : int64 option; (* Number of bytes transmitted. *)
61 rd_doms : (string * rd_domain) list; (* List of domains. *)
63 rd_printable_time : string;
66 rd_total_cpu_per_pcpu : float;
67 rd_totals : (int * int * int * int * int * int * int * int * int * float *
72 rd_pcpu_doms : (int * string * int *
73 Libvirt.Domain.vcpu_info array * int64 array array *
74 int64 array array * string * int) list;
75 rd_pcpu_pcpus : int64 array array array;
76 rd_pcpu_pcpus_cpu_time : float array
79 (* We cache the list of block devices and interfaces for each domain
80 * here, so we don't need to reparse the XML each time.
82 let devices = Hashtbl.create 13
84 (* Function to get the list of block devices, network interfaces for
85 * a particular domain. Get it from the devices cache, and if not
86 * there then parse the domain XML.
88 let get_devices id dom =
89 try Hashtbl.find devices id
91 let blkdevs, netifs = Xml.parse_device_xml dom in
92 Hashtbl.replace devices id (blkdevs, netifs);
95 (* We save the state of domains across redraws here, which allows us
96 * to deduce %CPU usage from the running total.
98 let last_info = Hashtbl.create 13
99 let last_time = ref (Unix.gettimeofday ())
101 (* Save pcpu_usages structures across redraws too (only for pCPU display). *)
102 let last_pcpu_usages = Hashtbl.create 13
104 let clear_pcpu_display_data () =
105 Hashtbl.clear last_pcpu_usages
107 (* What to get from virConnectGetAllDomainStats. *)
109 D.StatsState; D.StatsCpuTotal; D.StatsBalloon; D.StatsVcpu;
110 D.StatsInterface; D.StatsBlock
112 (* Which domains to get. Empty list means return all domains:
113 * active, inactive, persistent, transient etc.
117 let collect (conn, _, _, _, _, node_info, _, _) =
118 (* Number of physical CPUs (some may be disabled). *)
119 let nr_pcpus = C.maxcpus_of_node_info node_info in
121 (* Get the current time. *)
122 let time = Unix.gettimeofday () in
123 let tm = Unix.localtime time in
125 sprintf "%02d:%02d:%02d" tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec in
127 (* What's the total CPU time elapsed since we were last called? (ns) *)
128 let total_cpu_per_pcpu = 1_000_000_000. *. (time -. !last_time) in
129 (* Avoid division by zero. *)
130 let total_cpu_per_pcpu =
131 if total_cpu_per_pcpu <= 0. then 1. else total_cpu_per_pcpu in
132 let total_cpu = float node_info.C.cpus *. total_cpu_per_pcpu in
134 (* Get the domains. Match up with their last_info (if any). *)
136 let doms = D.get_all_domain_stats conn what who in
137 let doms = Array.to_list doms in
139 fun { D.dom_uuid = uuid; D.params = params } ->
140 let nr_params = Array.length params in
143 if i = nr_params then None
144 else if fst params.(i) = name then Some (snd params.(i))
149 let get_param_int name default =
150 match get_param name with
152 | Some (D.TypedFieldInt32 i)
153 | Some (D.TypedFieldUInt32 i) -> Some (Int32.to_int i)
154 | Some (D.TypedFieldInt64 i)
155 | Some (D.TypedFieldUInt64 i) -> Some (Int64.to_int i)
158 let get_param_int64 name default =
159 match get_param name with
161 | Some (D.TypedFieldInt32 i)
162 | Some (D.TypedFieldUInt32 i) -> Some (Int64.of_int32 i)
163 | Some (D.TypedFieldInt64 i)
164 | Some (D.TypedFieldUInt64 i) -> Some i
168 let dom = D.lookup_by_uuid conn uuid in
169 let id = D.get_id dom in
170 let name = D.get_name dom in
171 let state = get_param_int "state.state" None in
173 if state = Some 5 (* VIR_DOMAIN_SHUTOFF *) then
178 (* Synthesize a D.info struct out of the data we have
179 * from virConnectGetAllDomainStats. Doing this is an
180 * artifact from the old APIs we used to use to fetch
181 * stats, we could simplify here, and also return the
186 | None | Some 0 -> D.InfoNoState
187 | Some 1 -> D.InfoRunning
188 | Some 2 -> D.InfoBlocked
189 | Some 3 -> D.InfoPaused
190 | Some 4 -> D.InfoShutdown
191 | Some 5 -> D.InfoShutoff
192 | Some 6 -> D.InfoCrashed
193 | Some 7 -> D.InfoPaused (* XXX really VIR_DOMAIN_PMSUSPENDED *)
194 | _ -> D.InfoNoState in
196 match get_param_int64 "balloon.current" None with
200 match get_param_int "vcpu.current" None with
204 (* NB: libvirt does not return cpu.time for non-root domains. *)
205 match get_param_int64 "cpu.time" None with
210 max_mem = -1_L; (* not used anywhere in virt-top *)
212 nr_virt_cpu = nr_virt_cpu;
217 match get_param_int "block.count" None with
224 match get_param (sprintf "block.%d.name" i) with
225 | None -> sprintf "blk%d" i
226 | Some (D.TypedFieldString s) -> s
227 | _ -> assert false in
230 (match get_param_int64 (sprintf "block.%d.rd.reqs" i) None
231 with None -> 0_L | Some v -> v);
233 (match get_param_int64 (sprintf "block.%d.rd.bytes" i) None
234 with None -> 0_L | Some v -> v);
236 (match get_param_int64 (sprintf "block.%d.wr.reqs" i) None
237 with None -> 0_L | Some v -> v);
239 (match get_param_int64 (sprintf "block.%d.wr.bytes" i) None
240 with None -> 0_L | Some v -> v);
243 ) (range 0 (nr_block_devs-1)) in
245 let nr_interface_devs =
246 match get_param_int "net.count" None with
249 let interface_stats =
253 match get_param (sprintf "net.%d.name" i) with
254 | None -> sprintf "net%d" i
255 | Some (D.TypedFieldString s) -> s
256 | _ -> assert false in
259 (match get_param_int64 (sprintf "net.%d.rx.bytes" i) None
260 with None -> 0_L | Some v -> v);
262 (match get_param_int64 (sprintf "net.%d.rx.pkts" i) None
263 with None -> 0_L | Some v -> v);
265 (match get_param_int64 (sprintf "net.%d.rx.errs" i) None
266 with None -> 0_L | Some v -> v);
268 (match get_param_int64 (sprintf "net.%d.rx.drop" i) None
269 with None -> 0_L | Some v -> v);
271 (match get_param_int64 (sprintf "net.%d.tx.bytes" i) None
272 with None -> 0_L | Some v -> v);
274 (match get_param_int64 (sprintf "net.%d.tx.pkts" i) None
275 with None -> 0_L | Some v -> v);
277 (match get_param_int64 (sprintf "net.%d.tx.errs" i) None
278 with None -> 0_L | Some v -> v);
280 (match get_param_int64 (sprintf "net.%d.tx.drop" i) None
281 with None -> 0_L | Some v -> v);
283 ) (range 0 (nr_interface_devs-1)) in
285 let prev_info, prev_block_stats, prev_interface_stats =
287 let prev_info, prev_block_stats, prev_interface_stats =
288 Hashtbl.find last_info uuid in
289 Some prev_info, prev_block_stats, prev_interface_stats
290 with Not_found -> None, [], [] in
294 rd_domid = id; rd_domuuid = uuid; rd_dom = dom;
296 rd_block_stats = block_stats;
297 rd_interface_stats = interface_stats;
298 rd_prev_info = prev_info;
299 rd_prev_block_stats = prev_block_stats;
300 rd_prev_interface_stats = prev_interface_stats;
301 rd_cpu_time = 0.; rd_percent_cpu = 0.;
302 rd_mem_bytes = 0L; rd_mem_percent = 0L;
303 rd_block_rd_reqs = None; rd_block_wr_reqs = None;
304 rd_block_rd_bytes = None; rd_block_wr_bytes = None;
305 rd_net_rx_bytes = None; rd_net_tx_bytes = None;
310 (* Calculate the CPU time (ns) and %CPU used by each domain. *)
314 (* We have previous CPU info from which to calculate it? *)
315 | name, Active ({ rd_prev_info = Some prev_info } as rd) ->
317 Int64.to_float (rd.rd_info.D.cpu_time -^ prev_info.D.cpu_time) in
318 let percent_cpu = 100. *. cpu_time /. total_cpu in
319 let mem_usage = rd.rd_info.D.memory in
321 100L *^ rd.rd_info.D.memory /^ node_info.C.memory in
323 rd_cpu_time = cpu_time;
324 rd_percent_cpu = percent_cpu;
325 rd_mem_bytes = mem_usage;
326 rd_mem_percent = mem_percent} in
328 (* For all other domains we can't calculate it, so leave as 0 *)
332 (* Calculate the number of block device read/write requests across
333 * all block devices attached to a domain.
338 (* Do we have stats from the previous slice? *)
339 | name, Active ({ rd_prev_block_stats = ((_::_) as prev_block_stats) }
341 let block_stats = rd.rd_block_stats in (* stats now *)
343 (* Add all the devices together. Throw away device names. *)
344 let prev_block_stats =
345 sum_block_stats (List.map snd prev_block_stats) in
347 sum_block_stats (List.map snd block_stats) in
349 (* Calculate increase in read & write requests. *)
351 block_stats.D.rd_req -^ prev_block_stats.D.rd_req in
353 block_stats.D.wr_req -^ prev_block_stats.D.wr_req in
355 block_stats.D.rd_bytes -^ prev_block_stats.D.rd_bytes in
357 block_stats.D.wr_bytes -^ prev_block_stats.D.wr_bytes in
360 rd_block_rd_reqs = Some read_reqs;
361 rd_block_wr_reqs = Some write_reqs;
362 rd_block_rd_bytes = Some read_bytes;
363 rd_block_wr_bytes = Some write_bytes;
366 (* For all other domains we can't calculate it, so leave as None. *)
370 (* Calculate the same as above for network interfaces across
371 * all network interfaces attached to a domain.
376 (* Do we have stats from the previous slice? *)
377 | name, Active ({ rd_prev_interface_stats =
378 ((_::_) as prev_interface_stats) }
380 let interface_stats = rd.rd_interface_stats in (* stats now *)
382 (* Add all the devices together. Throw away device names. *)
383 let prev_interface_stats =
384 sum_interface_stats (List.map snd prev_interface_stats) in
385 let interface_stats =
386 sum_interface_stats (List.map snd interface_stats) in
388 (* Calculate increase in rx & tx bytes. *)
390 interface_stats.D.rx_bytes -^ prev_interface_stats.D.rx_bytes in
392 interface_stats.D.tx_bytes -^ prev_interface_stats.D.tx_bytes in
395 rd_net_rx_bytes = Some rx_bytes;
396 rd_net_tx_bytes = Some tx_bytes } in
398 (* For all other domains we can't calculate it, so leave as None. *)
402 (* Calculate totals. *)
405 fun (count, running, blocked, paused, shutdown, shutoff,
406 crashed, active, inactive,
407 total_cpu_time, total_memory, total_domU_memory) ->
409 | (name, Active rd) ->
410 let test state orig =
411 if rd.rd_info.D.state = state then orig+1 else orig
413 let running = test D.InfoRunning running in
414 let blocked = test D.InfoBlocked blocked in
415 let paused = test D.InfoPaused paused in
416 let shutdown = test D.InfoShutdown shutdown in
417 let shutoff = test D.InfoShutoff shutoff in
418 let crashed = test D.InfoCrashed crashed in
420 let total_cpu_time = total_cpu_time +. rd.rd_cpu_time in
421 let total_memory = total_memory +^ rd.rd_info.D.memory in
422 let total_domU_memory =
424 if rd.rd_domid > 0 then rd.rd_info.D.memory else 0L in
426 (count+1, running, blocked, paused, shutdown, shutoff,
427 crashed, active+1, inactive,
428 total_cpu_time, total_memory, total_domU_memory)
430 | (name, Inactive) -> (* inactive domain *)
431 (count+1, running, blocked, paused, shutdown, shutoff,
432 crashed, active, inactive+1,
433 total_cpu_time, total_memory, total_domU_memory)
434 ) (0,0,0,0,0,0,0,0,0, 0.,0L,0L) doms in
436 (* Update last_time, last_info. *)
438 Hashtbl.clear last_info;
442 let info = rd.rd_info, rd.rd_block_stats, rd.rd_interface_stats in
443 Hashtbl.add last_info rd.rd_domuuid info
449 rd_printable_time = printable_time;
450 rd_nr_pcpus = nr_pcpus;
451 rd_total_cpu = total_cpu;
452 rd_total_cpu_per_pcpu = total_cpu_per_pcpu;
455 (* Collect some extra information in PCPUDisplay display_mode. *)
456 let collect_pcpu { rd_doms = doms; rd_nr_pcpus = nr_pcpus } =
457 (* Get the VCPU info and VCPU->PCPU mappings for active domains.
458 * Also cull some data we don't care about.
463 | (name, Active rd) ->
465 let domid = rd.rd_domid in
466 let maplen = C.cpumaplen nr_pcpus in
467 let cpu_stats = D.get_cpu_stats rd.rd_dom in
469 (* Note the terminology is confusing.
471 * In libvirt, cpu_time is the total time (hypervisor +
472 * vCPU). vcpu_time is the time only taken by the vCPU,
473 * excluding time taken inside the hypervisor.
475 * For each pCPU, libvirt may return either "cpu_time"
476 * or "vcpu_time" or neither or both. This function
477 * returns an array pair [|cpu_time, vcpu_time|];
478 * if either is missing it is returned as 0.
480 let find_cpu_usages params =
481 let rec find_uint64_field name = function
482 | (n, D.TypedFieldUInt64 usage) :: _ when n = name ->
484 | _ :: params -> find_uint64_field name params
487 [| find_uint64_field "cpu_time" params;
488 find_uint64_field "vcpu_time" params |]
491 let pcpu_usages = Array.map find_cpu_usages cpu_stats in
492 let maxinfo = rd.rd_info.D.nr_virt_cpu in
493 let nr_vcpus, vcpu_infos, cpumaps =
494 D.get_vcpus rd.rd_dom maxinfo maplen in
496 (* Got previous pcpu_usages for this domain? *)
497 let prev_pcpu_usages =
498 try Some (Hashtbl.find last_pcpu_usages domid)
499 with Not_found -> None in
500 (* Update last_pcpu_usages. *)
501 Hashtbl.replace last_pcpu_usages domid pcpu_usages;
503 (match prev_pcpu_usages with
504 | Some prev_pcpu_usages
505 when Array.length prev_pcpu_usages = Array.length pcpu_usages ->
506 Some (domid, name, nr_vcpus, vcpu_infos, pcpu_usages,
507 prev_pcpu_usages, cpumaps, maplen)
508 | _ -> None (* ignore missing / unequal length prev_vcpu_infos *)
511 Libvirt.Virterror _ -> None (* ignore transient libvirt errors *)
513 | (_, Inactive) -> None (* ignore inactive doms *)
515 let nr_doms = List.length doms in
517 (* Rearrange the data into a matrix. Major axis (down) is
518 * pCPUs. Minor axis (right) is domains. At each node we store:
519 * cpu_time hypervisor + domain (on this pCPU only, nanosecs),
520 * vcpu_time domain only (on this pCPU only, nanosecs).
522 let make_3d_array dimx dimy dimz e =
523 Array.init dimx (fun _ -> Array.make_matrix dimy dimz e)
525 let pcpus = make_3d_array nr_pcpus nr_doms 2 0L in
528 fun di (domid, name, nr_vcpus, vcpu_infos, pcpu_usages,
529 prev_pcpu_usages, cpumaps, maplen) ->
530 (* Which pCPUs can this dom run on? *)
531 for p = 0 to Array.length pcpu_usages - 1 do
532 pcpus.(p).(di).(0) <-
533 pcpu_usages.(p).(0) -^ prev_pcpu_usages.(p).(0);
534 pcpus.(p).(di).(1) <-
535 pcpu_usages.(p).(1) -^ prev_pcpu_usages.(p).(1)
539 (* Sum the total CPU time used by each pCPU, for the %CPU column. *)
543 let cpu_time = ref 0L in
544 for di = 0 to Array.length row-1 do
545 let t = row.(di).(0) in
546 cpu_time := !cpu_time +^ t
548 Int64.to_float !cpu_time
551 { rd_pcpu_doms = doms;
552 rd_pcpu_pcpus = pcpus;
553 rd_pcpu_pcpus_cpu_time = pcpus_cpu_time }