1 (* 'top'-like tool for libvirt domains.
2 (C) Copyright 2007-2021 Richard W.M. Jones, Red Hat Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 module C = Libvirt.Connect
21 module D = Libvirt.Domain
28 (* Intermediate "domain + stats" structure that we use to collect
29 * everything we know about a domain within the collect function.
31 type rd_domain = Inactive | Active of rd_active
33 rd_domid : int; (* Domain ID. *)
34 rd_domuuid : Libvirt.uuid; (* Domain UUID. *)
35 rd_dom : [`R] D.t; (* Domain object. *)
36 rd_info : D.info; (* Domain CPU info now. *)
37 rd_block_stats : (string * D.block_stats) list;
38 (* Domain block stats now. *)
39 rd_interface_stats : (string * D.interface_stats) list;
40 (* Domain net stats now. *)
41 rd_prev_info : D.info option; (* Domain CPU info previously. *)
42 rd_prev_block_stats : (string * D.block_stats) list;
43 (* Domain block stats prev. *)
44 rd_prev_interface_stats : (string * D.interface_stats) list;
45 (* Domain interface stats prev. *)
46 (* The following are since the last slice, or 0 if cannot be calculated: *)
47 rd_cpu_time : float; (* CPU time used in nanoseconds. *)
48 rd_percent_cpu : float; (* CPU time as percent of total. *)
49 rd_mem_bytes : int64; (* Memory usage in bytes *)
50 rd_mem_percent: int64; (* Memory usage as percent of total *)
51 (* The following are since the last slice, or None if cannot be calc'd: *)
52 rd_block_rd_reqs : int64 option; (* Number of block device read rqs. *)
53 rd_block_wr_reqs : int64 option; (* Number of block device write rqs. *)
54 rd_block_rd_bytes : int64 option; (* Number of bytes block device read *)
55 rd_block_wr_bytes : int64 option; (* Number of bytes block device write *)
56 rd_net_rx_bytes : int64 option; (* Number of bytes received. *)
57 rd_net_tx_bytes : int64 option; (* Number of bytes transmitted. *)
61 rd_doms : (string * rd_domain) list; (* List of domains. *)
63 rd_printable_time : string;
66 rd_total_cpu_per_pcpu : float;
67 rd_totals : (int * int * int * int * int * int * int * int * int * float *
72 rd_pcpu_doms : (int * string * int *
73 Libvirt.Domain.vcpu_info array * int64 array array *
74 int64 array array * string * int) list;
75 rd_pcpu_pcpus : int64 array array array;
76 rd_pcpu_pcpus_cpu_time : float array
79 (* We cache the list of block devices and interfaces for each domain
80 * here, so we don't need to reparse the XML each time.
82 let devices = Hashtbl.create 13
84 (* Function to get the list of block devices, network interfaces for
85 * a particular domain. Get it from the devices cache, and if not
86 * there then parse the domain XML.
88 let get_devices id dom =
89 try Hashtbl.find devices id
91 let blkdevs, netifs = Xml.parse_device_xml dom in
92 Hashtbl.replace devices id (blkdevs, netifs);
95 (* We save the state of domains across redraws here, which allows us
96 * to deduce %CPU usage from the running total.
98 let last_info = Hashtbl.create 13
99 let last_time = ref (Unix.gettimeofday ())
101 (* Save pcpu_usages structures across redraws too (only for pCPU display). *)
102 let last_pcpu_usages = Hashtbl.create 13
104 let clear_pcpu_display_data () =
105 Hashtbl.clear last_pcpu_usages
107 (* What to get from virConnectGetAllDomainStats. *)
109 D.StatsState; D.StatsCpuTotal; D.StatsBalloon; D.StatsVcpu;
110 D.StatsInterface; D.StatsBlock
112 (* Which domains to get. Empty list means return all domains:
113 * active, inactive, persistent, transient etc.
117 let collect (conn, _, _, _, _, node_info, _, _) =
118 (* Number of physical CPUs (some may be disabled). *)
119 let nr_pcpus = C.maxcpus_of_node_info node_info in
121 (* Get the current time. *)
122 let time = Unix.gettimeofday () in
123 let tm = Unix.localtime time in
125 sprintf "%02d:%02d:%02d" tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec in
127 (* What's the total CPU time elapsed since we were last called? (ns) *)
128 let total_cpu_per_pcpu = 1_000_000_000. *. (time -. !last_time) in
129 (* Avoid division by zero. *)
130 let total_cpu_per_pcpu =
131 if total_cpu_per_pcpu <= 0. then 1. else total_cpu_per_pcpu in
132 let total_cpu = float node_info.C.cpus *. total_cpu_per_pcpu in
134 (* Get the domains. Match up with their last_info (if any). *)
136 let doms = D.get_all_domain_stats conn what who in
137 let doms = Array.to_list doms in
139 fun { D.dom_uuid = uuid; D.params = params } ->
141 let nr_params = Array.length params in
144 if i = nr_params then None
145 else if fst params.(i) = name then Some (snd params.(i))
150 let get_param_int name default =
151 match get_param name with
153 | Some (D.TypedFieldInt32 i)
154 | Some (D.TypedFieldUInt32 i) -> Some (Int32.to_int i)
155 | Some (D.TypedFieldInt64 i)
156 | Some (D.TypedFieldUInt64 i) -> Some (Int64.to_int i)
159 let get_param_int64 name default =
160 match get_param name with
162 | Some (D.TypedFieldInt32 i)
163 | Some (D.TypedFieldUInt32 i) -> Some (Int64.of_int32 i)
164 | Some (D.TypedFieldInt64 i)
165 | Some (D.TypedFieldUInt64 i) -> Some i
169 let dom = D.lookup_by_uuid conn uuid in
170 let id = D.get_id dom in
171 let name = D.get_name dom in
172 let state = get_param_int "state.state" None in
174 if state = Some 5 (* VIR_DOMAIN_SHUTOFF *) then
179 (* Synthesize a D.info struct out of the data we have
180 * from virConnectGetAllDomainStats. Doing this is an
181 * artifact from the old APIs we used to use to fetch
182 * stats, we could simplify here, and also return the
187 | None | Some 0 -> D.InfoNoState
188 | Some 1 -> D.InfoRunning
189 | Some 2 -> D.InfoBlocked
190 | Some 3 -> D.InfoPaused
191 | Some 4 -> D.InfoShutdown
192 | Some 5 -> D.InfoShutoff
193 | Some 6 -> D.InfoCrashed
194 | Some 7 -> D.InfoPaused (* XXX really VIR_DOMAIN_PMSUSPENDED *)
195 | _ -> D.InfoNoState in
197 match get_param_int64 "balloon.current" None with
201 match get_param_int "vcpu.current" None with
205 (* NB: libvirt does not return cpu.time for non-root domains. *)
206 match get_param_int64 "cpu.time" None with
211 max_mem = -1_L; (* not used anywhere in virt-top *)
213 nr_virt_cpu = nr_virt_cpu;
218 match get_param_int "block.count" None with
225 match get_param (sprintf "block.%d.name" i) with
226 | None -> sprintf "blk%d" i
227 | Some (D.TypedFieldString s) -> s
228 | _ -> assert false in
231 (let n = sprintf "block.%d.rd.reqs" i in
232 match get_param_int64 n None
233 with None -> 0_L | Some v -> v);
235 (let n = sprintf "block.%d.rd.bytes" i in
236 match get_param_int64 n None
237 with None -> 0_L | Some v -> v);
239 (let n = sprintf "block.%d.wr.reqs" i in
240 match get_param_int64 n None
241 with None -> 0_L | Some v -> v);
243 (let n = sprintf "block.%d.wr.bytes" i in
244 match get_param_int64 n None
245 with None -> 0_L | Some v -> v);
248 ) (range 0 (nr_block_devs-1)) in
250 let nr_interface_devs =
251 match get_param_int "net.count" None with
254 let interface_stats =
258 match get_param (sprintf "net.%d.name" i) with
259 | None -> sprintf "net%d" i
260 | Some (D.TypedFieldString s) -> s
261 | _ -> assert false in
264 (match get_param_int64 (sprintf "net.%d.rx.bytes" i) None
265 with None -> 0_L | Some v -> v);
267 (match get_param_int64 (sprintf "net.%d.rx.pkts" i) None
268 with None -> 0_L | Some v -> v);
270 (match get_param_int64 (sprintf "net.%d.rx.errs" i) None
271 with None -> 0_L | Some v -> v);
273 (match get_param_int64 (sprintf "net.%d.rx.drop" i) None
274 with None -> 0_L | Some v -> v);
276 (match get_param_int64 (sprintf "net.%d.tx.bytes" i) None
277 with None -> 0_L | Some v -> v);
279 (match get_param_int64 (sprintf "net.%d.tx.pkts" i) None
280 with None -> 0_L | Some v -> v);
282 (match get_param_int64 (sprintf "net.%d.tx.errs" i) None
283 with None -> 0_L | Some v -> v);
285 (match get_param_int64 (sprintf "net.%d.tx.drop" i) None
286 with None -> 0_L | Some v -> v);
288 ) (range 0 (nr_interface_devs-1)) in
290 let prev_info, prev_block_stats, prev_interface_stats =
292 let prev_info, prev_block_stats, prev_interface_stats =
293 Hashtbl.find last_info uuid in
294 Some prev_info, prev_block_stats, prev_interface_stats
295 with Not_found -> None, [], [] in
299 rd_domid = id; rd_domuuid = uuid; rd_dom = dom;
301 rd_block_stats = block_stats;
302 rd_interface_stats = interface_stats;
303 rd_prev_info = prev_info;
304 rd_prev_block_stats = prev_block_stats;
305 rd_prev_interface_stats = prev_interface_stats;
306 rd_cpu_time = 0.; rd_percent_cpu = 0.;
307 rd_mem_bytes = 0L; rd_mem_percent = 0L;
308 rd_block_rd_reqs = None; rd_block_wr_reqs = None;
309 rd_block_rd_bytes = None; rd_block_wr_bytes = None;
310 rd_net_rx_bytes = None; rd_net_tx_bytes = None;
314 Libvirt.Virterror _ ->
315 (* this can happen if a domain goes away while we
316 * are reading it, just report an inactive domain
321 (* Calculate the CPU time (ns) and %CPU used by each domain. *)
325 (* We have previous CPU info from which to calculate it? *)
326 | name, Active ({ rd_prev_info = Some prev_info } as rd) ->
328 Int64.to_float (rd.rd_info.D.cpu_time -^ prev_info.D.cpu_time) in
329 let percent_cpu = 100. *. cpu_time /. total_cpu in
330 let mem_usage = rd.rd_info.D.memory in
332 100L *^ rd.rd_info.D.memory /^ node_info.C.memory in
334 rd_cpu_time = cpu_time;
335 rd_percent_cpu = percent_cpu;
336 rd_mem_bytes = mem_usage;
337 rd_mem_percent = mem_percent} in
339 (* For all other domains we can't calculate it, so leave as 0 *)
343 (* Calculate the number of block device read/write requests across
344 * all block devices attached to a domain.
349 (* Do we have stats from the previous slice? *)
350 | name, Active ({ rd_prev_block_stats = ((_::_) as prev_block_stats) }
352 let block_stats = rd.rd_block_stats in (* stats now *)
354 (* Add all the devices together. Throw away device names. *)
355 let prev_block_stats =
356 sum_block_stats (List.map snd prev_block_stats) in
358 sum_block_stats (List.map snd block_stats) in
360 (* Calculate increase in read & write requests. *)
362 block_stats.D.rd_req -^ prev_block_stats.D.rd_req in
364 block_stats.D.wr_req -^ prev_block_stats.D.wr_req in
366 block_stats.D.rd_bytes -^ prev_block_stats.D.rd_bytes in
368 block_stats.D.wr_bytes -^ prev_block_stats.D.wr_bytes in
371 rd_block_rd_reqs = Some read_reqs;
372 rd_block_wr_reqs = Some write_reqs;
373 rd_block_rd_bytes = Some read_bytes;
374 rd_block_wr_bytes = Some write_bytes;
377 (* For all other domains we can't calculate it, so leave as None. *)
381 (* Calculate the same as above for network interfaces across
382 * all network interfaces attached to a domain.
387 (* Do we have stats from the previous slice? *)
388 | name, Active ({ rd_prev_interface_stats =
389 ((_::_) as prev_interface_stats) }
391 let interface_stats = rd.rd_interface_stats in (* stats now *)
393 (* Add all the devices together. Throw away device names. *)
394 let prev_interface_stats =
395 sum_interface_stats (List.map snd prev_interface_stats) in
396 let interface_stats =
397 sum_interface_stats (List.map snd interface_stats) in
399 (* Calculate increase in rx & tx bytes. *)
401 interface_stats.D.rx_bytes -^ prev_interface_stats.D.rx_bytes in
403 interface_stats.D.tx_bytes -^ prev_interface_stats.D.tx_bytes in
406 rd_net_rx_bytes = Some rx_bytes;
407 rd_net_tx_bytes = Some tx_bytes } in
409 (* For all other domains we can't calculate it, so leave as None. *)
413 (* Calculate totals. *)
416 fun (count, running, blocked, paused, shutdown, shutoff,
417 crashed, active, inactive,
418 total_cpu_time, total_memory, total_domU_memory) ->
420 | (name, Active rd) ->
421 let test state orig =
422 if rd.rd_info.D.state = state then orig+1 else orig
424 let running = test D.InfoRunning running in
425 let blocked = test D.InfoBlocked blocked in
426 let paused = test D.InfoPaused paused in
427 let shutdown = test D.InfoShutdown shutdown in
428 let shutoff = test D.InfoShutoff shutoff in
429 let crashed = test D.InfoCrashed crashed in
431 let total_cpu_time = total_cpu_time +. rd.rd_cpu_time in
432 let total_memory = total_memory +^ rd.rd_info.D.memory in
433 let total_domU_memory =
435 if rd.rd_domid > 0 then rd.rd_info.D.memory else 0L in
437 (count+1, running, blocked, paused, shutdown, shutoff,
438 crashed, active+1, inactive,
439 total_cpu_time, total_memory, total_domU_memory)
441 | (name, Inactive) -> (* inactive domain *)
442 (count+1, running, blocked, paused, shutdown, shutoff,
443 crashed, active, inactive+1,
444 total_cpu_time, total_memory, total_domU_memory)
445 ) (0,0,0,0,0,0,0,0,0, 0.,0L,0L) doms in
447 (* Update last_time, last_info. *)
449 Hashtbl.clear last_info;
453 let info = rd.rd_info, rd.rd_block_stats, rd.rd_interface_stats in
454 Hashtbl.add last_info rd.rd_domuuid info
460 rd_printable_time = printable_time;
461 rd_nr_pcpus = nr_pcpus;
462 rd_total_cpu = total_cpu;
463 rd_total_cpu_per_pcpu = total_cpu_per_pcpu;
466 (* Collect some extra information in PCPUDisplay display_mode. *)
467 let collect_pcpu { rd_doms = doms; rd_nr_pcpus = nr_pcpus } =
468 (* Get the VCPU info and VCPU->PCPU mappings for active domains.
469 * Also cull some data we don't care about.
474 | (name, Active rd) ->
476 let domid = rd.rd_domid in
477 let maplen = C.cpumaplen nr_pcpus in
478 let cpu_stats = D.get_cpu_stats rd.rd_dom in
480 (* Note the terminology is confusing.
482 * In libvirt, cpu_time is the total time (hypervisor +
483 * vCPU). vcpu_time is the time only taken by the vCPU,
484 * excluding time taken inside the hypervisor.
486 * For each pCPU, libvirt may return either "cpu_time"
487 * or "vcpu_time" or neither or both. This function
488 * returns an array pair [|cpu_time, vcpu_time|];
489 * if either is missing it is returned as 0.
491 let find_cpu_usages params =
492 let rec find_uint64_field name = function
493 | (n, D.TypedFieldUInt64 usage) :: _ when n = name ->
495 | _ :: params -> find_uint64_field name params
498 [| find_uint64_field "cpu_time" params;
499 find_uint64_field "vcpu_time" params |]
502 let pcpu_usages = Array.map find_cpu_usages cpu_stats in
503 let maxinfo = rd.rd_info.D.nr_virt_cpu in
504 let nr_vcpus, vcpu_infos, cpumaps =
505 D.get_vcpus rd.rd_dom maxinfo maplen in
507 (* Got previous pcpu_usages for this domain? *)
508 let prev_pcpu_usages =
509 try Some (Hashtbl.find last_pcpu_usages domid)
510 with Not_found -> None in
511 (* Update last_pcpu_usages. *)
512 Hashtbl.replace last_pcpu_usages domid pcpu_usages;
514 (match prev_pcpu_usages with
515 | Some prev_pcpu_usages
516 when Array.length prev_pcpu_usages = Array.length pcpu_usages ->
517 Some (domid, name, nr_vcpus, vcpu_infos, pcpu_usages,
518 prev_pcpu_usages, cpumaps, maplen)
519 | _ -> None (* ignore missing / unequal length prev_vcpu_infos *)
522 Libvirt.Virterror _ -> None (* ignore transient libvirt errors *)
524 | (_, Inactive) -> None (* ignore inactive doms *)
526 let nr_doms = List.length doms in
528 (* Rearrange the data into a matrix. Major axis (down) is
529 * pCPUs. Minor axis (right) is domains. At each node we store:
530 * cpu_time hypervisor + domain (on this pCPU only, nanosecs),
531 * vcpu_time domain only (on this pCPU only, nanosecs).
533 let make_3d_array dimx dimy dimz e =
534 Array.init dimx (fun _ -> Array.make_matrix dimy dimz e)
536 let pcpus = make_3d_array nr_pcpus nr_doms 2 0L in
539 fun di (domid, name, nr_vcpus, vcpu_infos, pcpu_usages,
540 prev_pcpu_usages, cpumaps, maplen) ->
541 (* Which pCPUs can this dom run on? *)
542 for p = 0 to Array.length pcpu_usages - 1 do
543 pcpus.(p).(di).(0) <-
544 pcpu_usages.(p).(0) -^ prev_pcpu_usages.(p).(0);
545 pcpus.(p).(di).(1) <-
546 pcpu_usages.(p).(1) -^ prev_pcpu_usages.(p).(1)
550 (* Sum the total CPU time used by each pCPU, for the %CPU column. *)
554 let cpu_time = ref 0L in
555 for di = 0 to Array.length row-1 do
556 let t = row.(di).(0) in
557 cpu_time := !cpu_time +^ t
559 Int64.to_float !cpu_time
562 { rd_pcpu_doms = doms;
563 rd_pcpu_pcpus = pcpus;
564 rd_pcpu_pcpus_cpu_time = pcpus_cpu_time }