X-Git-Url: http://git.annexia.org/?p=virt-top.git;a=blobdiff_plain;f=src%2Fcollect.ml;fp=src%2Fcollect.ml;h=f856067230cc1958e26723a145b8bb69d6d09410;hp=0000000000000000000000000000000000000000;hb=90d14bc151e488972d33eefaac2242d9a6e07578;hpb=3e9ed9c0fe49c3d4e4a8e467d521f676769c485a diff --git a/src/collect.ml b/src/collect.ml new file mode 100644 index 0000000..f856067 --- /dev/null +++ b/src/collect.ml @@ -0,0 +1,455 @@ +(* 'top'-like tool for libvirt domains. + (C) Copyright 2007-2017 Richard W.M. Jones, Red Hat Inc. + http://libvirt.org/ + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*) + +module C = Libvirt.Connect +module D = Libvirt.Domain + +open Printf +open ExtList + +open Utils +open Types + +(* Hook for XML support (see [opt_xml.ml]). *) +let parse_device_xml : (int -> [>`R] D.t -> string list * string list) ref = + ref ( + fun _ _ -> [], [] + ) + +(* Intermediate "domain + stats" structure that we use to collect + * everything we know about a domain within the collect function. + *) +type rd_domain = Inactive | Active of rd_active +and rd_active = { + rd_domid : int; (* Domain ID. *) + rd_dom : [`R] D.t; (* Domain object. *) + rd_info : D.info; (* Domain CPU info now. *) + rd_block_stats : (string * D.block_stats) list; + (* Domain block stats now. *) + rd_interface_stats : (string * D.interface_stats) list; + (* Domain net stats now. *) + rd_prev_info : D.info option; (* Domain CPU info previously. *) + rd_prev_block_stats : (string * D.block_stats) list; + (* Domain block stats prev. *) + rd_prev_interface_stats : (string * D.interface_stats) list; + (* Domain interface stats prev. *) + (* The following are since the last slice, or 0 if cannot be calculated: *) + rd_cpu_time : float; (* CPU time used in nanoseconds. *) + rd_percent_cpu : float; (* CPU time as percent of total. *) + rd_mem_bytes : int64; (* Memory usage in bytes *) + rd_mem_percent: int64; (* Memory usage as percent of total *) + (* The following are since the last slice, or None if cannot be calc'd: *) + rd_block_rd_reqs : int64 option; (* Number of block device read rqs. *) + rd_block_wr_reqs : int64 option; (* Number of block device write rqs. *) + rd_block_rd_bytes : int64 option; (* Number of bytes block device read *) + rd_block_wr_bytes : int64 option; (* Number of bytes block device write *) + (* _info fields includes the number considering --block_in_bytes option *) + rd_block_rd_info : int64 option; (* Block device read info for user *) + rd_block_wr_info : int64 option; (* Block device read info for user *) + + rd_net_rx_bytes : int64 option; (* Number of bytes received. *) + rd_net_tx_bytes : int64 option; (* Number of bytes transmitted. *) +} + +type stats = { + rd_doms : (string * rd_domain) list; (* List of domains. *) + rd_time : float; + rd_printable_time : string; + rd_nr_pcpus : int; + rd_total_cpu : float; + rd_total_cpu_per_pcpu : float; + rd_totals : (int * int * int * int * int * int * int * int * int * float * + int64 * int64); +} + +type pcpu_stats = { + rd_pcpu_doms : (int * string * int * + Libvirt.Domain.vcpu_info array * int64 array array * + int64 array array * string * int) list; + rd_pcpu_pcpus : int64 array array array; + rd_pcpu_pcpus_cpu_time : float array +} + +(* We cache the list of block devices and interfaces for each domain + * here, so we don't need to reparse the XML each time. + *) +let devices = Hashtbl.create 13 + +(* Function to get the list of block devices, network interfaces for + * a particular domain. Get it from the devices cache, and if not + * there then parse the domain XML. + *) +let get_devices id dom = + try Hashtbl.find devices id + with Not_found -> + let blkdevs, netifs = (!parse_device_xml) id dom in + Hashtbl.replace devices id (blkdevs, netifs); + blkdevs, netifs + +(* We save the state of domains across redraws here, which allows us + * to deduce %CPU usage from the running total. + *) +let last_info = Hashtbl.create 13 +let last_time = ref (Unix.gettimeofday ()) + +(* Save pcpu_usages structures across redraws too (only for pCPU display). *) +let last_pcpu_usages = Hashtbl.create 13 + +let clear_pcpu_display_data () = + Hashtbl.clear last_pcpu_usages + +let collect (conn, _, _, _, _, node_info, _, _) block_in_bytes = + (* Number of physical CPUs (some may be disabled). *) + let nr_pcpus = C.maxcpus_of_node_info node_info in + + (* Get the current time. *) + let time = Unix.gettimeofday () in + let tm = Unix.localtime time in + let printable_time = + sprintf "%02d:%02d:%02d" tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec in + + (* What's the total CPU time elapsed since we were last called? (ns) *) + let total_cpu_per_pcpu = 1_000_000_000. *. (time -. !last_time) in + (* Avoid division by zero. *) + let total_cpu_per_pcpu = + if total_cpu_per_pcpu <= 0. then 1. else total_cpu_per_pcpu in + let total_cpu = float node_info.C.cpus *. total_cpu_per_pcpu in + + (* Get the domains. Match up with their last_info (if any). *) + let doms = + (* Active domains. *) + let n = C.num_of_domains conn in + let ids = + if n > 0 then Array.to_list (C.list_domains conn n) + else [] in + let doms = + List.filter_map ( + fun id -> + try + let dom = D.lookup_by_id conn id in + let name = D.get_name dom in + let blkdevs, netifs = get_devices id dom in + + (* Get current CPU, block and network stats. *) + let info = D.get_info dom in + let block_stats = + try List.map (fun dev -> dev, D.block_stats dom dev) blkdevs + with + | Libvirt.Not_supported "virDomainBlockStats" + | Libvirt.Virterror _ -> [] in + let interface_stats = + try List.map (fun dev -> dev, D.interface_stats dom dev) netifs + with + | Libvirt.Not_supported "virDomainInterfaceStats" + | Libvirt.Virterror _ -> [] in + + let prev_info, prev_block_stats, prev_interface_stats = + try + let prev_info, prev_block_stats, prev_interface_stats = + Hashtbl.find last_info id in + Some prev_info, prev_block_stats, prev_interface_stats + with Not_found -> None, [], [] in + + Some (name, + Active { + rd_domid = id; rd_dom = dom; rd_info = info; + rd_block_stats = block_stats; + rd_interface_stats = interface_stats; + rd_prev_info = prev_info; + rd_prev_block_stats = prev_block_stats; + rd_prev_interface_stats = prev_interface_stats; + rd_cpu_time = 0.; rd_percent_cpu = 0.; + rd_mem_bytes = 0L; rd_mem_percent = 0L; + rd_block_rd_reqs = None; rd_block_wr_reqs = None; + rd_block_rd_bytes = None; rd_block_wr_bytes = None; + rd_block_rd_info = None; rd_block_wr_info = None; + rd_net_rx_bytes = None; rd_net_tx_bytes = None; + }) + with + Libvirt.Virterror _ -> None (* ignore transient error *) + ) ids in + + (* Inactive domains. *) + let doms_inactive = + try + let n = C.num_of_defined_domains conn in + let names = + if n > 0 then Array.to_list (C.list_defined_domains conn n) + else [] in + List.map (fun name -> name, Inactive) names + with + (* Ignore transient errors, in particular errors from + * num_of_defined_domains if it cannot contact xend. + *) + | Libvirt.Virterror _ -> [] in + + doms @ doms_inactive in + + (* Calculate the CPU time (ns) and %CPU used by each domain. *) + let doms = + List.map ( + function + (* We have previous CPU info from which to calculate it? *) + | name, Active ({ rd_prev_info = Some prev_info } as rd) -> + let cpu_time = + Int64.to_float (rd.rd_info.D.cpu_time -^ prev_info.D.cpu_time) in + let percent_cpu = 100. *. cpu_time /. total_cpu in + let mem_usage = rd.rd_info.D.memory in + let mem_percent = + 100L *^ rd.rd_info.D.memory /^ node_info.C.memory in + let rd = { rd with + rd_cpu_time = cpu_time; + rd_percent_cpu = percent_cpu; + rd_mem_bytes = mem_usage; + rd_mem_percent = mem_percent} in + name, Active rd + (* For all other domains we can't calculate it, so leave as 0 *) + | rd -> rd + ) doms in + + (* Calculate the number of block device read/write requests across + * all block devices attached to a domain. + *) + let doms = + List.map ( + function + (* Do we have stats from the previous slice? *) + | name, Active ({ rd_prev_block_stats = ((_::_) as prev_block_stats) } + as rd) -> + let block_stats = rd.rd_block_stats in (* stats now *) + + (* Add all the devices together. Throw away device names. *) + let prev_block_stats = + sum_block_stats (List.map snd prev_block_stats) in + let block_stats = + sum_block_stats (List.map snd block_stats) in + + (* Calculate increase in read & write requests. *) + let read_reqs = + block_stats.D.rd_req -^ prev_block_stats.D.rd_req in + let write_reqs = + block_stats.D.wr_req -^ prev_block_stats.D.wr_req in + let read_bytes = + block_stats.D.rd_bytes -^ prev_block_stats.D.rd_bytes in + let write_bytes = + block_stats.D.wr_bytes -^ prev_block_stats.D.wr_bytes in + + let rd = { rd with + rd_block_rd_reqs = Some read_reqs; + rd_block_wr_reqs = Some write_reqs; + rd_block_rd_bytes = Some read_bytes; + rd_block_wr_bytes = Some write_bytes; + } in + let rd = { rd with + rd_block_rd_info = + if block_in_bytes then + rd.rd_block_rd_bytes else rd.rd_block_rd_reqs; + rd_block_wr_info = + if block_in_bytes then + rd.rd_block_wr_bytes else rd.rd_block_wr_reqs; + } in + name, Active rd + (* For all other domains we can't calculate it, so leave as None. *) + | rd -> rd + ) doms in + + (* Calculate the same as above for network interfaces across + * all network interfaces attached to a domain. + *) + let doms = + List.map ( + function + (* Do we have stats from the previous slice? *) + | name, Active ({ rd_prev_interface_stats = + ((_::_) as prev_interface_stats) } + as rd) -> + let interface_stats = rd.rd_interface_stats in (* stats now *) + + (* Add all the devices together. Throw away device names. *) + let prev_interface_stats = + sum_interface_stats (List.map snd prev_interface_stats) in + let interface_stats = + sum_interface_stats (List.map snd interface_stats) in + + (* Calculate increase in rx & tx bytes. *) + let rx_bytes = + interface_stats.D.rx_bytes -^ prev_interface_stats.D.rx_bytes in + let tx_bytes = + interface_stats.D.tx_bytes -^ prev_interface_stats.D.tx_bytes in + + let rd = { rd with + rd_net_rx_bytes = Some rx_bytes; + rd_net_tx_bytes = Some tx_bytes } in + name, Active rd + (* For all other domains we can't calculate it, so leave as None. *) + | rd -> rd + ) doms in + + (* Calculate totals. *) + let totals = + List.fold_left ( + fun (count, running, blocked, paused, shutdown, shutoff, + crashed, active, inactive, + total_cpu_time, total_memory, total_domU_memory) -> + function + | (name, Active rd) -> + let test state orig = + if rd.rd_info.D.state = state then orig+1 else orig + in + let running = test D.InfoRunning running in + let blocked = test D.InfoBlocked blocked in + let paused = test D.InfoPaused paused in + let shutdown = test D.InfoShutdown shutdown in + let shutoff = test D.InfoShutoff shutoff in + let crashed = test D.InfoCrashed crashed in + + let total_cpu_time = total_cpu_time +. rd.rd_cpu_time in + let total_memory = total_memory +^ rd.rd_info.D.memory in + let total_domU_memory = + total_domU_memory +^ + if rd.rd_domid > 0 then rd.rd_info.D.memory else 0L in + + (count+1, running, blocked, paused, shutdown, shutoff, + crashed, active+1, inactive, + total_cpu_time, total_memory, total_domU_memory) + + | (name, Inactive) -> (* inactive domain *) + (count+1, running, blocked, paused, shutdown, shutoff, + crashed, active, inactive+1, + total_cpu_time, total_memory, total_domU_memory) + ) (0,0,0,0,0,0,0,0,0, 0.,0L,0L) doms in + + (* Update last_time, last_info. *) + last_time := time; + Hashtbl.clear last_info; + List.iter ( + function + | (_, Active rd) -> + let info = rd.rd_info, rd.rd_block_stats, rd.rd_interface_stats in + Hashtbl.add last_info rd.rd_domid info + | _ -> () + ) doms; + + { rd_doms = doms; + rd_time = time; + rd_printable_time = printable_time; + rd_nr_pcpus = nr_pcpus; + rd_total_cpu = total_cpu; + rd_total_cpu_per_pcpu = total_cpu_per_pcpu; + rd_totals = totals } + +(* Collect some extra information in PCPUDisplay display_mode. *) +let collect_pcpu { rd_doms = doms; rd_nr_pcpus = nr_pcpus } = + (* Get the VCPU info and VCPU->PCPU mappings for active domains. + * Also cull some data we don't care about. + *) + let doms = + List.filter_map ( + function + | (name, Active rd) -> + (try + let domid = rd.rd_domid in + let maplen = C.cpumaplen nr_pcpus in + let cpu_stats = D.get_cpu_stats rd.rd_dom in + + (* Note the terminology is confusing. + * + * In libvirt, cpu_time is the total time (hypervisor + + * vCPU). vcpu_time is the time only taken by the vCPU, + * excluding time taken inside the hypervisor. + * + * For each pCPU, libvirt may return either "cpu_time" + * or "vcpu_time" or neither or both. This function + * returns an array pair [|cpu_time, vcpu_time|]; + * if either is missing it is returned as 0. + *) + let find_cpu_usages params = + let rec find_uint64_field name = function + | (n, D.TypedFieldUInt64 usage) :: _ when n = name -> + usage + | _ :: params -> find_uint64_field name params + | [] -> 0L + in + [| find_uint64_field "cpu_time" params; + find_uint64_field "vcpu_time" params |] + in + + let pcpu_usages = Array.map find_cpu_usages cpu_stats in + let maxinfo = rd.rd_info.D.nr_virt_cpu in + let nr_vcpus, vcpu_infos, cpumaps = + D.get_vcpus rd.rd_dom maxinfo maplen in + + (* Got previous pcpu_usages for this domain? *) + let prev_pcpu_usages = + try Some (Hashtbl.find last_pcpu_usages domid) + with Not_found -> None in + (* Update last_pcpu_usages. *) + Hashtbl.replace last_pcpu_usages domid pcpu_usages; + + (match prev_pcpu_usages with + | Some prev_pcpu_usages + when Array.length prev_pcpu_usages = Array.length pcpu_usages -> + Some (domid, name, nr_vcpus, vcpu_infos, pcpu_usages, + prev_pcpu_usages, cpumaps, maplen) + | _ -> None (* ignore missing / unequal length prev_vcpu_infos *) + ); + with + Libvirt.Virterror _ -> None (* ignore transient libvirt errors *) + ) + | (_, Inactive) -> None (* ignore inactive doms *) + ) doms in + let nr_doms = List.length doms in + + (* Rearrange the data into a matrix. Major axis (down) is + * pCPUs. Minor axis (right) is domains. At each node we store: + * cpu_time hypervisor + domain (on this pCPU only, nanosecs), + * vcpu_time domain only (on this pCPU only, nanosecs). + *) + let make_3d_array dimx dimy dimz e = + Array.init dimx (fun _ -> Array.make_matrix dimy dimz e) + in + let pcpus = make_3d_array nr_pcpus nr_doms 2 0L in + + List.iteri ( + fun di (domid, name, nr_vcpus, vcpu_infos, pcpu_usages, + prev_pcpu_usages, cpumaps, maplen) -> + (* Which pCPUs can this dom run on? *) + for p = 0 to Array.length pcpu_usages - 1 do + pcpus.(p).(di).(0) <- + pcpu_usages.(p).(0) -^ prev_pcpu_usages.(p).(0); + pcpus.(p).(di).(1) <- + pcpu_usages.(p).(1) -^ prev_pcpu_usages.(p).(1) + done + ) doms; + + (* Sum the total CPU time used by each pCPU, for the %CPU column. *) + let pcpus_cpu_time = + Array.map ( + fun row -> + let cpu_time = ref 0L in + for di = 0 to Array.length row-1 do + let t = row.(di).(0) in + cpu_time := !cpu_time +^ t + done; + Int64.to_float !cpu_time + ) pcpus in + + { rd_pcpu_doms = doms; + rd_pcpu_pcpus = pcpus; + rd_pcpu_pcpus_cpu_time = pcpus_cpu_time }