1 (* 'top'-like tool for libvirt domains.
2 (C) Copyright 2007-2017 Richard W.M. Jones, Red Hat Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 module C = Libvirt.Connect
21 module D = Libvirt.Domain
29 (* Hook for XML support (see [opt_xml.ml]). *)
30 let parse_device_xml : (int -> [>`R] D.t -> string list * string list) ref =
35 (* Intermediate "domain + stats" structure that we use to collect
36 * everything we know about a domain within the collect function.
38 type rd_domain = Inactive | Active of rd_active
40 rd_domid : int; (* Domain ID. *)
41 rd_dom : [`R] D.t; (* Domain object. *)
42 rd_info : D.info; (* Domain CPU info now. *)
43 rd_block_stats : (string * D.block_stats) list;
44 (* Domain block stats now. *)
45 rd_interface_stats : (string * D.interface_stats) list;
46 (* Domain net stats now. *)
47 rd_prev_info : D.info option; (* Domain CPU info previously. *)
48 rd_prev_block_stats : (string * D.block_stats) list;
49 (* Domain block stats prev. *)
50 rd_prev_interface_stats : (string * D.interface_stats) list;
51 (* Domain interface stats prev. *)
52 (* The following are since the last slice, or 0 if cannot be calculated: *)
53 rd_cpu_time : float; (* CPU time used in nanoseconds. *)
54 rd_percent_cpu : float; (* CPU time as percent of total. *)
55 rd_mem_bytes : int64; (* Memory usage in bytes *)
56 rd_mem_percent: int64; (* Memory usage as percent of total *)
57 (* The following are since the last slice, or None if cannot be calc'd: *)
58 rd_block_rd_reqs : int64 option; (* Number of block device read rqs. *)
59 rd_block_wr_reqs : int64 option; (* Number of block device write rqs. *)
60 rd_block_rd_bytes : int64 option; (* Number of bytes block device read *)
61 rd_block_wr_bytes : int64 option; (* Number of bytes block device write *)
62 (* _info fields includes the number considering --block_in_bytes option *)
63 rd_block_rd_info : int64 option; (* Block device read info for user *)
64 rd_block_wr_info : int64 option; (* Block device read info for user *)
66 rd_net_rx_bytes : int64 option; (* Number of bytes received. *)
67 rd_net_tx_bytes : int64 option; (* Number of bytes transmitted. *)
71 rd_doms : (string * rd_domain) list; (* List of domains. *)
73 rd_printable_time : string;
76 rd_total_cpu_per_pcpu : float;
77 rd_totals : (int * int * int * int * int * int * int * int * int * float *
82 rd_pcpu_doms : (int * string * int *
83 Libvirt.Domain.vcpu_info array * int64 array array *
84 int64 array array * string * int) list;
85 rd_pcpu_pcpus : int64 array array array;
86 rd_pcpu_pcpus_cpu_time : float array
89 (* We cache the list of block devices and interfaces for each domain
90 * here, so we don't need to reparse the XML each time.
92 let devices = Hashtbl.create 13
94 (* Function to get the list of block devices, network interfaces for
95 * a particular domain. Get it from the devices cache, and if not
96 * there then parse the domain XML.
98 let get_devices id dom =
99 try Hashtbl.find devices id
101 let blkdevs, netifs = (!parse_device_xml) id dom in
102 Hashtbl.replace devices id (blkdevs, netifs);
105 (* We save the state of domains across redraws here, which allows us
106 * to deduce %CPU usage from the running total.
108 let last_info = Hashtbl.create 13
109 let last_time = ref (Unix.gettimeofday ())
111 (* Save pcpu_usages structures across redraws too (only for pCPU display). *)
112 let last_pcpu_usages = Hashtbl.create 13
114 let clear_pcpu_display_data () =
115 Hashtbl.clear last_pcpu_usages
117 let collect (conn, _, _, _, _, node_info, _, _) block_in_bytes =
118 (* Number of physical CPUs (some may be disabled). *)
119 let nr_pcpus = C.maxcpus_of_node_info node_info in
121 (* Get the current time. *)
122 let time = Unix.gettimeofday () in
123 let tm = Unix.localtime time in
125 sprintf "%02d:%02d:%02d" tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec in
127 (* What's the total CPU time elapsed since we were last called? (ns) *)
128 let total_cpu_per_pcpu = 1_000_000_000. *. (time -. !last_time) in
129 (* Avoid division by zero. *)
130 let total_cpu_per_pcpu =
131 if total_cpu_per_pcpu <= 0. then 1. else total_cpu_per_pcpu in
132 let total_cpu = float node_info.C.cpus *. total_cpu_per_pcpu in
134 (* Get the domains. Match up with their last_info (if any). *)
136 (* Active domains. *)
137 let n = C.num_of_domains conn in
139 if n > 0 then Array.to_list (C.list_domains conn n)
145 let dom = D.lookup_by_id conn id in
146 let name = D.get_name dom in
147 let blkdevs, netifs = get_devices id dom in
149 (* Get current CPU, block and network stats. *)
150 let info = D.get_info dom in
152 try List.map (fun dev -> dev, D.block_stats dom dev) blkdevs
154 | Libvirt.Not_supported "virDomainBlockStats"
155 | Libvirt.Virterror _ -> [] in
156 let interface_stats =
157 try List.map (fun dev -> dev, D.interface_stats dom dev) netifs
159 | Libvirt.Not_supported "virDomainInterfaceStats"
160 | Libvirt.Virterror _ -> [] in
162 let prev_info, prev_block_stats, prev_interface_stats =
164 let prev_info, prev_block_stats, prev_interface_stats =
165 Hashtbl.find last_info id in
166 Some prev_info, prev_block_stats, prev_interface_stats
167 with Not_found -> None, [], [] in
171 rd_domid = id; rd_dom = dom; rd_info = info;
172 rd_block_stats = block_stats;
173 rd_interface_stats = interface_stats;
174 rd_prev_info = prev_info;
175 rd_prev_block_stats = prev_block_stats;
176 rd_prev_interface_stats = prev_interface_stats;
177 rd_cpu_time = 0.; rd_percent_cpu = 0.;
178 rd_mem_bytes = 0L; rd_mem_percent = 0L;
179 rd_block_rd_reqs = None; rd_block_wr_reqs = None;
180 rd_block_rd_bytes = None; rd_block_wr_bytes = None;
181 rd_block_rd_info = None; rd_block_wr_info = None;
182 rd_net_rx_bytes = None; rd_net_tx_bytes = None;
185 Libvirt.Virterror _ -> None (* ignore transient error *)
188 (* Inactive domains. *)
191 let n = C.num_of_defined_domains conn in
193 if n > 0 then Array.to_list (C.list_defined_domains conn n)
195 List.map (fun name -> name, Inactive) names
197 (* Ignore transient errors, in particular errors from
198 * num_of_defined_domains if it cannot contact xend.
200 | Libvirt.Virterror _ -> [] in
202 doms @ doms_inactive in
204 (* Calculate the CPU time (ns) and %CPU used by each domain. *)
208 (* We have previous CPU info from which to calculate it? *)
209 | name, Active ({ rd_prev_info = Some prev_info } as rd) ->
211 Int64.to_float (rd.rd_info.D.cpu_time -^ prev_info.D.cpu_time) in
212 let percent_cpu = 100. *. cpu_time /. total_cpu in
213 let mem_usage = rd.rd_info.D.memory in
215 100L *^ rd.rd_info.D.memory /^ node_info.C.memory in
217 rd_cpu_time = cpu_time;
218 rd_percent_cpu = percent_cpu;
219 rd_mem_bytes = mem_usage;
220 rd_mem_percent = mem_percent} in
222 (* For all other domains we can't calculate it, so leave as 0 *)
226 (* Calculate the number of block device read/write requests across
227 * all block devices attached to a domain.
232 (* Do we have stats from the previous slice? *)
233 | name, Active ({ rd_prev_block_stats = ((_::_) as prev_block_stats) }
235 let block_stats = rd.rd_block_stats in (* stats now *)
237 (* Add all the devices together. Throw away device names. *)
238 let prev_block_stats =
239 sum_block_stats (List.map snd prev_block_stats) in
241 sum_block_stats (List.map snd block_stats) in
243 (* Calculate increase in read & write requests. *)
245 block_stats.D.rd_req -^ prev_block_stats.D.rd_req in
247 block_stats.D.wr_req -^ prev_block_stats.D.wr_req in
249 block_stats.D.rd_bytes -^ prev_block_stats.D.rd_bytes in
251 block_stats.D.wr_bytes -^ prev_block_stats.D.wr_bytes in
254 rd_block_rd_reqs = Some read_reqs;
255 rd_block_wr_reqs = Some write_reqs;
256 rd_block_rd_bytes = Some read_bytes;
257 rd_block_wr_bytes = Some write_bytes;
261 if block_in_bytes then
262 rd.rd_block_rd_bytes else rd.rd_block_rd_reqs;
264 if block_in_bytes then
265 rd.rd_block_wr_bytes else rd.rd_block_wr_reqs;
268 (* For all other domains we can't calculate it, so leave as None. *)
272 (* Calculate the same as above for network interfaces across
273 * all network interfaces attached to a domain.
278 (* Do we have stats from the previous slice? *)
279 | name, Active ({ rd_prev_interface_stats =
280 ((_::_) as prev_interface_stats) }
282 let interface_stats = rd.rd_interface_stats in (* stats now *)
284 (* Add all the devices together. Throw away device names. *)
285 let prev_interface_stats =
286 sum_interface_stats (List.map snd prev_interface_stats) in
287 let interface_stats =
288 sum_interface_stats (List.map snd interface_stats) in
290 (* Calculate increase in rx & tx bytes. *)
292 interface_stats.D.rx_bytes -^ prev_interface_stats.D.rx_bytes in
294 interface_stats.D.tx_bytes -^ prev_interface_stats.D.tx_bytes in
297 rd_net_rx_bytes = Some rx_bytes;
298 rd_net_tx_bytes = Some tx_bytes } in
300 (* For all other domains we can't calculate it, so leave as None. *)
304 (* Calculate totals. *)
307 fun (count, running, blocked, paused, shutdown, shutoff,
308 crashed, active, inactive,
309 total_cpu_time, total_memory, total_domU_memory) ->
311 | (name, Active rd) ->
312 let test state orig =
313 if rd.rd_info.D.state = state then orig+1 else orig
315 let running = test D.InfoRunning running in
316 let blocked = test D.InfoBlocked blocked in
317 let paused = test D.InfoPaused paused in
318 let shutdown = test D.InfoShutdown shutdown in
319 let shutoff = test D.InfoShutoff shutoff in
320 let crashed = test D.InfoCrashed crashed in
322 let total_cpu_time = total_cpu_time +. rd.rd_cpu_time in
323 let total_memory = total_memory +^ rd.rd_info.D.memory in
324 let total_domU_memory =
326 if rd.rd_domid > 0 then rd.rd_info.D.memory else 0L in
328 (count+1, running, blocked, paused, shutdown, shutoff,
329 crashed, active+1, inactive,
330 total_cpu_time, total_memory, total_domU_memory)
332 | (name, Inactive) -> (* inactive domain *)
333 (count+1, running, blocked, paused, shutdown, shutoff,
334 crashed, active, inactive+1,
335 total_cpu_time, total_memory, total_domU_memory)
336 ) (0,0,0,0,0,0,0,0,0, 0.,0L,0L) doms in
338 (* Update last_time, last_info. *)
340 Hashtbl.clear last_info;
344 let info = rd.rd_info, rd.rd_block_stats, rd.rd_interface_stats in
345 Hashtbl.add last_info rd.rd_domid info
351 rd_printable_time = printable_time;
352 rd_nr_pcpus = nr_pcpus;
353 rd_total_cpu = total_cpu;
354 rd_total_cpu_per_pcpu = total_cpu_per_pcpu;
357 (* Collect some extra information in PCPUDisplay display_mode. *)
358 let collect_pcpu { rd_doms = doms; rd_nr_pcpus = nr_pcpus } =
359 (* Get the VCPU info and VCPU->PCPU mappings for active domains.
360 * Also cull some data we don't care about.
365 | (name, Active rd) ->
367 let domid = rd.rd_domid in
368 let maplen = C.cpumaplen nr_pcpus in
369 let cpu_stats = D.get_cpu_stats rd.rd_dom in
371 (* Note the terminology is confusing.
373 * In libvirt, cpu_time is the total time (hypervisor +
374 * vCPU). vcpu_time is the time only taken by the vCPU,
375 * excluding time taken inside the hypervisor.
377 * For each pCPU, libvirt may return either "cpu_time"
378 * or "vcpu_time" or neither or both. This function
379 * returns an array pair [|cpu_time, vcpu_time|];
380 * if either is missing it is returned as 0.
382 let find_cpu_usages params =
383 let rec find_uint64_field name = function
384 | (n, D.TypedFieldUInt64 usage) :: _ when n = name ->
386 | _ :: params -> find_uint64_field name params
389 [| find_uint64_field "cpu_time" params;
390 find_uint64_field "vcpu_time" params |]
393 let pcpu_usages = Array.map find_cpu_usages cpu_stats in
394 let maxinfo = rd.rd_info.D.nr_virt_cpu in
395 let nr_vcpus, vcpu_infos, cpumaps =
396 D.get_vcpus rd.rd_dom maxinfo maplen in
398 (* Got previous pcpu_usages for this domain? *)
399 let prev_pcpu_usages =
400 try Some (Hashtbl.find last_pcpu_usages domid)
401 with Not_found -> None in
402 (* Update last_pcpu_usages. *)
403 Hashtbl.replace last_pcpu_usages domid pcpu_usages;
405 (match prev_pcpu_usages with
406 | Some prev_pcpu_usages
407 when Array.length prev_pcpu_usages = Array.length pcpu_usages ->
408 Some (domid, name, nr_vcpus, vcpu_infos, pcpu_usages,
409 prev_pcpu_usages, cpumaps, maplen)
410 | _ -> None (* ignore missing / unequal length prev_vcpu_infos *)
413 Libvirt.Virterror _ -> None (* ignore transient libvirt errors *)
415 | (_, Inactive) -> None (* ignore inactive doms *)
417 let nr_doms = List.length doms in
419 (* Rearrange the data into a matrix. Major axis (down) is
420 * pCPUs. Minor axis (right) is domains. At each node we store:
421 * cpu_time hypervisor + domain (on this pCPU only, nanosecs),
422 * vcpu_time domain only (on this pCPU only, nanosecs).
424 let make_3d_array dimx dimy dimz e =
425 Array.init dimx (fun _ -> Array.make_matrix dimy dimz e)
427 let pcpus = make_3d_array nr_pcpus nr_doms 2 0L in
430 fun di (domid, name, nr_vcpus, vcpu_infos, pcpu_usages,
431 prev_pcpu_usages, cpumaps, maplen) ->
432 (* Which pCPUs can this dom run on? *)
433 for p = 0 to Array.length pcpu_usages - 1 do
434 pcpus.(p).(di).(0) <-
435 pcpu_usages.(p).(0) -^ prev_pcpu_usages.(p).(0);
436 pcpus.(p).(di).(1) <-
437 pcpu_usages.(p).(1) -^ prev_pcpu_usages.(p).(1)
441 (* Sum the total CPU time used by each pCPU, for the %CPU column. *)
445 let cpu_time = ref 0L in
446 for di = 0 to Array.length row-1 do
447 let t = row.(di).(0) in
448 cpu_time := !cpu_time +^ t
450 Int64.to_float !cpu_time
453 { rd_pcpu_doms = doms;
454 rd_pcpu_pcpus = pcpus;
455 rd_pcpu_pcpus_cpu_time = pcpus_cpu_time }