1 (* 'top'-like tool for libvirt domains.
2 (C) Copyright 2007-2017 Richard W.M. Jones, Red Hat Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 module C = Libvirt.Connect
21 module D = Libvirt.Domain
29 (* Hook for XML support (see [opt_xml.ml]). *)
30 let parse_device_xml : (int -> [>`R] D.t -> string list * string list) ref =
35 (* Intermediate "domain + stats" structure that we use to collect
36 * everything we know about a domain within the collect function.
38 type rd_domain = Inactive | Active of rd_active
40 rd_domid : int; (* Domain ID. *)
41 rd_domuuid : Libvirt.uuid; (* Domain UUID. *)
42 rd_dom : [`R] D.t; (* Domain object. *)
43 rd_info : D.info; (* Domain CPU info now. *)
44 rd_block_stats : (string * D.block_stats) list;
45 (* Domain block stats now. *)
46 rd_interface_stats : (string * D.interface_stats) list;
47 (* Domain net stats now. *)
48 rd_prev_info : D.info option; (* Domain CPU info previously. *)
49 rd_prev_block_stats : (string * D.block_stats) list;
50 (* Domain block stats prev. *)
51 rd_prev_interface_stats : (string * D.interface_stats) list;
52 (* Domain interface stats prev. *)
53 (* The following are since the last slice, or 0 if cannot be calculated: *)
54 rd_cpu_time : float; (* CPU time used in nanoseconds. *)
55 rd_percent_cpu : float; (* CPU time as percent of total. *)
56 rd_mem_bytes : int64; (* Memory usage in bytes *)
57 rd_mem_percent: int64; (* Memory usage as percent of total *)
58 (* The following are since the last slice, or None if cannot be calc'd: *)
59 rd_block_rd_reqs : int64 option; (* Number of block device read rqs. *)
60 rd_block_wr_reqs : int64 option; (* Number of block device write rqs. *)
61 rd_block_rd_bytes : int64 option; (* Number of bytes block device read *)
62 rd_block_wr_bytes : int64 option; (* Number of bytes block device write *)
63 rd_net_rx_bytes : int64 option; (* Number of bytes received. *)
64 rd_net_tx_bytes : int64 option; (* Number of bytes transmitted. *)
68 rd_doms : (string * rd_domain) list; (* List of domains. *)
70 rd_printable_time : string;
73 rd_total_cpu_per_pcpu : float;
74 rd_totals : (int * int * int * int * int * int * int * int * int * float *
79 rd_pcpu_doms : (int * string * int *
80 Libvirt.Domain.vcpu_info array * int64 array array *
81 int64 array array * string * int) list;
82 rd_pcpu_pcpus : int64 array array array;
83 rd_pcpu_pcpus_cpu_time : float array
86 (* We cache the list of block devices and interfaces for each domain
87 * here, so we don't need to reparse the XML each time.
89 let devices = Hashtbl.create 13
91 (* Function to get the list of block devices, network interfaces for
92 * a particular domain. Get it from the devices cache, and if not
93 * there then parse the domain XML.
95 let get_devices id dom =
96 try Hashtbl.find devices id
98 let blkdevs, netifs = (!parse_device_xml) id dom in
99 Hashtbl.replace devices id (blkdevs, netifs);
102 (* We save the state of domains across redraws here, which allows us
103 * to deduce %CPU usage from the running total.
105 let last_info = Hashtbl.create 13
106 let last_time = ref (Unix.gettimeofday ())
108 (* Save pcpu_usages structures across redraws too (only for pCPU display). *)
109 let last_pcpu_usages = Hashtbl.create 13
111 let clear_pcpu_display_data () =
112 Hashtbl.clear last_pcpu_usages
114 (* What to get from virConnectGetAllDomainStats. *)
116 D.StatsState; D.StatsCpuTotal; D.StatsBalloon; D.StatsVcpu;
117 D.StatsInterface; D.StatsBlock
119 (* Which domains to get. Empty list means return all domains:
120 * active, inactive, persistent, transient etc.
124 let collect (conn, _, _, _, _, node_info, _, _) =
125 (* Number of physical CPUs (some may be disabled). *)
126 let nr_pcpus = C.maxcpus_of_node_info node_info in
128 (* Get the current time. *)
129 let time = Unix.gettimeofday () in
130 let tm = Unix.localtime time in
132 sprintf "%02d:%02d:%02d" tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec in
134 (* What's the total CPU time elapsed since we were last called? (ns) *)
135 let total_cpu_per_pcpu = 1_000_000_000. *. (time -. !last_time) in
136 (* Avoid division by zero. *)
137 let total_cpu_per_pcpu =
138 if total_cpu_per_pcpu <= 0. then 1. else total_cpu_per_pcpu in
139 let total_cpu = float node_info.C.cpus *. total_cpu_per_pcpu in
141 (* Get the domains. Match up with their last_info (if any). *)
143 let doms = D.get_all_domain_stats conn what who in
144 let doms = Array.to_list doms in
146 fun { D.dom_uuid = uuid; D.params = params } ->
147 let nr_params = Array.length params in
150 if i = nr_params then None
151 else if fst params.(i) = name then Some (snd params.(i))
156 let get_param_int name default =
157 match get_param name with
159 | Some (D.TypedFieldInt32 i)
160 | Some (D.TypedFieldUInt32 i) -> Some (Int32.to_int i)
161 | Some (D.TypedFieldInt64 i)
162 | Some (D.TypedFieldUInt64 i) -> Some (Int64.to_int i)
165 let get_param_int64 name default =
166 match get_param name with
168 | Some (D.TypedFieldInt32 i)
169 | Some (D.TypedFieldUInt32 i) -> Some (Int64.of_int32 i)
170 | Some (D.TypedFieldInt64 i)
171 | Some (D.TypedFieldUInt64 i) -> Some i
175 let dom = D.lookup_by_uuid conn uuid in
176 let id = D.get_id dom in
177 let name = D.get_name dom in
178 let state = get_param_int "state.state" None in
180 if state = Some 5 (* VIR_DOMAIN_SHUTOFF *) then
185 (* Synthesize a D.info struct out of the data we have
186 * from virConnectGetAllDomainStats. Doing this is an
187 * artifact from the old APIs we used to use to fetch
188 * stats, we could simplify here, and also return the
193 | None | Some 0 -> D.InfoNoState
194 | Some 1 -> D.InfoRunning
195 | Some 2 -> D.InfoBlocked
196 | Some 3 -> D.InfoPaused
197 | Some 4 -> D.InfoShutdown
198 | Some 5 -> D.InfoShutoff
199 | Some 6 -> D.InfoCrashed
200 | Some 7 -> D.InfoPaused (* XXX really VIR_DOMAIN_PMSUSPENDED *)
201 | _ -> D.InfoNoState in
203 match get_param_int64 "balloon.current" None with
207 match get_param_int "vcpu.current" None with
211 (* NB: libvirt does not return cpu.time for non-root domains. *)
212 match get_param_int64 "cpu.time" None with
217 max_mem = -1_L; (* not used anywhere in virt-top *)
219 nr_virt_cpu = nr_virt_cpu;
224 match get_param_int "block.count" None with
231 match get_param (sprintf "block.%d.name" i) with
232 | None -> sprintf "blk%d" i
233 | Some (D.TypedFieldString s) -> s
234 | _ -> assert false in
237 (match get_param_int64 (sprintf "block.%d.rd.reqs" i) None
238 with None -> 0_L | Some v -> v);
240 (match get_param_int64 (sprintf "block.%d.rd.bytes" i) None
241 with None -> 0_L | Some v -> v);
243 (match get_param_int64 (sprintf "block.%d.wr.reqs" i) None
244 with None -> 0_L | Some v -> v);
246 (match get_param_int64 (sprintf "block.%d.wr.bytes" i) None
247 with None -> 0_L | Some v -> v);
250 ) (range 0 (nr_block_devs-1)) in
252 let nr_interface_devs =
253 match get_param_int "net.count" None with
256 let interface_stats =
260 match get_param (sprintf "net.%d.name" i) with
261 | None -> sprintf "net%d" i
262 | Some (D.TypedFieldString s) -> s
263 | _ -> assert false in
266 (match get_param_int64 (sprintf "net.%d.rx.bytes" i) None
267 with None -> 0_L | Some v -> v);
269 (match get_param_int64 (sprintf "net.%d.rx.pkts" i) None
270 with None -> 0_L | Some v -> v);
272 (match get_param_int64 (sprintf "net.%d.rx.errs" i) None
273 with None -> 0_L | Some v -> v);
275 (match get_param_int64 (sprintf "net.%d.rx.drop" i) None
276 with None -> 0_L | Some v -> v);
278 (match get_param_int64 (sprintf "net.%d.tx.bytes" i) None
279 with None -> 0_L | Some v -> v);
281 (match get_param_int64 (sprintf "net.%d.tx.pkts" i) None
282 with None -> 0_L | Some v -> v);
284 (match get_param_int64 (sprintf "net.%d.tx.errs" i) None
285 with None -> 0_L | Some v -> v);
287 (match get_param_int64 (sprintf "net.%d.tx.drop" i) None
288 with None -> 0_L | Some v -> v);
290 ) (range 0 (nr_interface_devs-1)) in
292 let prev_info, prev_block_stats, prev_interface_stats =
294 let prev_info, prev_block_stats, prev_interface_stats =
295 Hashtbl.find last_info uuid in
296 Some prev_info, prev_block_stats, prev_interface_stats
297 with Not_found -> None, [], [] in
301 rd_domid = id; rd_domuuid = uuid; rd_dom = dom;
303 rd_block_stats = block_stats;
304 rd_interface_stats = interface_stats;
305 rd_prev_info = prev_info;
306 rd_prev_block_stats = prev_block_stats;
307 rd_prev_interface_stats = prev_interface_stats;
308 rd_cpu_time = 0.; rd_percent_cpu = 0.;
309 rd_mem_bytes = 0L; rd_mem_percent = 0L;
310 rd_block_rd_reqs = None; rd_block_wr_reqs = None;
311 rd_block_rd_bytes = None; rd_block_wr_bytes = None;
312 rd_net_rx_bytes = None; rd_net_tx_bytes = None;
317 (* Calculate the CPU time (ns) and %CPU used by each domain. *)
321 (* We have previous CPU info from which to calculate it? *)
322 | name, Active ({ rd_prev_info = Some prev_info } as rd) ->
324 Int64.to_float (rd.rd_info.D.cpu_time -^ prev_info.D.cpu_time) in
325 let percent_cpu = 100. *. cpu_time /. total_cpu in
326 let mem_usage = rd.rd_info.D.memory in
328 100L *^ rd.rd_info.D.memory /^ node_info.C.memory in
330 rd_cpu_time = cpu_time;
331 rd_percent_cpu = percent_cpu;
332 rd_mem_bytes = mem_usage;
333 rd_mem_percent = mem_percent} in
335 (* For all other domains we can't calculate it, so leave as 0 *)
339 (* Calculate the number of block device read/write requests across
340 * all block devices attached to a domain.
345 (* Do we have stats from the previous slice? *)
346 | name, Active ({ rd_prev_block_stats = ((_::_) as prev_block_stats) }
348 let block_stats = rd.rd_block_stats in (* stats now *)
350 (* Add all the devices together. Throw away device names. *)
351 let prev_block_stats =
352 sum_block_stats (List.map snd prev_block_stats) in
354 sum_block_stats (List.map snd block_stats) in
356 (* Calculate increase in read & write requests. *)
358 block_stats.D.rd_req -^ prev_block_stats.D.rd_req in
360 block_stats.D.wr_req -^ prev_block_stats.D.wr_req in
362 block_stats.D.rd_bytes -^ prev_block_stats.D.rd_bytes in
364 block_stats.D.wr_bytes -^ prev_block_stats.D.wr_bytes in
367 rd_block_rd_reqs = Some read_reqs;
368 rd_block_wr_reqs = Some write_reqs;
369 rd_block_rd_bytes = Some read_bytes;
370 rd_block_wr_bytes = Some write_bytes;
373 (* For all other domains we can't calculate it, so leave as None. *)
377 (* Calculate the same as above for network interfaces across
378 * all network interfaces attached to a domain.
383 (* Do we have stats from the previous slice? *)
384 | name, Active ({ rd_prev_interface_stats =
385 ((_::_) as prev_interface_stats) }
387 let interface_stats = rd.rd_interface_stats in (* stats now *)
389 (* Add all the devices together. Throw away device names. *)
390 let prev_interface_stats =
391 sum_interface_stats (List.map snd prev_interface_stats) in
392 let interface_stats =
393 sum_interface_stats (List.map snd interface_stats) in
395 (* Calculate increase in rx & tx bytes. *)
397 interface_stats.D.rx_bytes -^ prev_interface_stats.D.rx_bytes in
399 interface_stats.D.tx_bytes -^ prev_interface_stats.D.tx_bytes in
402 rd_net_rx_bytes = Some rx_bytes;
403 rd_net_tx_bytes = Some tx_bytes } in
405 (* For all other domains we can't calculate it, so leave as None. *)
409 (* Calculate totals. *)
412 fun (count, running, blocked, paused, shutdown, shutoff,
413 crashed, active, inactive,
414 total_cpu_time, total_memory, total_domU_memory) ->
416 | (name, Active rd) ->
417 let test state orig =
418 if rd.rd_info.D.state = state then orig+1 else orig
420 let running = test D.InfoRunning running in
421 let blocked = test D.InfoBlocked blocked in
422 let paused = test D.InfoPaused paused in
423 let shutdown = test D.InfoShutdown shutdown in
424 let shutoff = test D.InfoShutoff shutoff in
425 let crashed = test D.InfoCrashed crashed in
427 let total_cpu_time = total_cpu_time +. rd.rd_cpu_time in
428 let total_memory = total_memory +^ rd.rd_info.D.memory in
429 let total_domU_memory =
431 if rd.rd_domid > 0 then rd.rd_info.D.memory else 0L in
433 (count+1, running, blocked, paused, shutdown, shutoff,
434 crashed, active+1, inactive,
435 total_cpu_time, total_memory, total_domU_memory)
437 | (name, Inactive) -> (* inactive domain *)
438 (count+1, running, blocked, paused, shutdown, shutoff,
439 crashed, active, inactive+1,
440 total_cpu_time, total_memory, total_domU_memory)
441 ) (0,0,0,0,0,0,0,0,0, 0.,0L,0L) doms in
443 (* Update last_time, last_info. *)
445 Hashtbl.clear last_info;
449 let info = rd.rd_info, rd.rd_block_stats, rd.rd_interface_stats in
450 Hashtbl.add last_info rd.rd_domuuid info
456 rd_printable_time = printable_time;
457 rd_nr_pcpus = nr_pcpus;
458 rd_total_cpu = total_cpu;
459 rd_total_cpu_per_pcpu = total_cpu_per_pcpu;
462 (* Collect some extra information in PCPUDisplay display_mode. *)
463 let collect_pcpu { rd_doms = doms; rd_nr_pcpus = nr_pcpus } =
464 (* Get the VCPU info and VCPU->PCPU mappings for active domains.
465 * Also cull some data we don't care about.
470 | (name, Active rd) ->
472 let domid = rd.rd_domid in
473 let maplen = C.cpumaplen nr_pcpus in
474 let cpu_stats = D.get_cpu_stats rd.rd_dom in
476 (* Note the terminology is confusing.
478 * In libvirt, cpu_time is the total time (hypervisor +
479 * vCPU). vcpu_time is the time only taken by the vCPU,
480 * excluding time taken inside the hypervisor.
482 * For each pCPU, libvirt may return either "cpu_time"
483 * or "vcpu_time" or neither or both. This function
484 * returns an array pair [|cpu_time, vcpu_time|];
485 * if either is missing it is returned as 0.
487 let find_cpu_usages params =
488 let rec find_uint64_field name = function
489 | (n, D.TypedFieldUInt64 usage) :: _ when n = name ->
491 | _ :: params -> find_uint64_field name params
494 [| find_uint64_field "cpu_time" params;
495 find_uint64_field "vcpu_time" params |]
498 let pcpu_usages = Array.map find_cpu_usages cpu_stats in
499 let maxinfo = rd.rd_info.D.nr_virt_cpu in
500 let nr_vcpus, vcpu_infos, cpumaps =
501 D.get_vcpus rd.rd_dom maxinfo maplen in
503 (* Got previous pcpu_usages for this domain? *)
504 let prev_pcpu_usages =
505 try Some (Hashtbl.find last_pcpu_usages domid)
506 with Not_found -> None in
507 (* Update last_pcpu_usages. *)
508 Hashtbl.replace last_pcpu_usages domid pcpu_usages;
510 (match prev_pcpu_usages with
511 | Some prev_pcpu_usages
512 when Array.length prev_pcpu_usages = Array.length pcpu_usages ->
513 Some (domid, name, nr_vcpus, vcpu_infos, pcpu_usages,
514 prev_pcpu_usages, cpumaps, maplen)
515 | _ -> None (* ignore missing / unequal length prev_vcpu_infos *)
518 Libvirt.Virterror _ -> None (* ignore transient libvirt errors *)
520 | (_, Inactive) -> None (* ignore inactive doms *)
522 let nr_doms = List.length doms in
524 (* Rearrange the data into a matrix. Major axis (down) is
525 * pCPUs. Minor axis (right) is domains. At each node we store:
526 * cpu_time hypervisor + domain (on this pCPU only, nanosecs),
527 * vcpu_time domain only (on this pCPU only, nanosecs).
529 let make_3d_array dimx dimy dimz e =
530 Array.init dimx (fun _ -> Array.make_matrix dimy dimz e)
532 let pcpus = make_3d_array nr_pcpus nr_doms 2 0L in
535 fun di (domid, name, nr_vcpus, vcpu_infos, pcpu_usages,
536 prev_pcpu_usages, cpumaps, maplen) ->
537 (* Which pCPUs can this dom run on? *)
538 for p = 0 to Array.length pcpu_usages - 1 do
539 pcpus.(p).(di).(0) <-
540 pcpu_usages.(p).(0) -^ prev_pcpu_usages.(p).(0);
541 pcpus.(p).(di).(1) <-
542 pcpu_usages.(p).(1) -^ prev_pcpu_usages.(p).(1)
546 (* Sum the total CPU time used by each pCPU, for the %CPU column. *)
550 let cpu_time = ref 0L in
551 for di = 0 to Array.length row-1 do
552 let t = row.(di).(0) in
553 cpu_time := !cpu_time +^ t
555 Int64.to_float !cpu_time
558 { rd_pcpu_doms = doms;
559 rd_pcpu_pcpus = pcpus;
560 rd_pcpu_pcpus_cpu_time = pcpus_cpu_time }