(* virt-dmesg * (C) Copyright 2008-2011 Red Hat Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *) open Printf open Utils (* C functions, see [c_utils.c] file. *) external str_mapping : int64 -> int64 -> int -> int = "virt_dmesg_str_mapping" "noalloc" external strstr_from : string -> string -> int -> int = "virt_dmesg_strstr_from" external strstr_from_aligned : string -> string -> int -> int = "virt_dmesg_strstr_from_aligned" external str_get_le32 : string -> int -> int64 = "virt_dmesg_str_get_le32" external str_get_be32 : string -> int -> int64 = "virt_dmesg_str_get_be32" external str_get_le64 : string -> int -> int64 = "virt_dmesg_str_get_le64" external str_get_be64 : string -> int -> int64 = "virt_dmesg_str_get_be64" external str_of_le32 : int64 -> string = "virt_dmesg_str_of_le32" external str_of_be32 : int64 -> string = "virt_dmesg_str_of_be32" external str_of_le64 : int64 -> string = "virt_dmesg_str_of_le64" external str_of_be64 : int64 -> string = "virt_dmesg_str_of_be64" external get_asciiz : string -> int -> string = "virt_dmesg_get_asciiz" external is_C_ident : string -> int -> bool = "virt_dmesg_is_C_ident" external crc32_of_string : string -> int32 = "virt_dmesg_crc32_of_string" external addr_compare : int64 -> int64 -> int = "virt_dmesg_addr_compare" "noalloc" type t = { (* Note that 'base_addr' is the guest virtual address of the first * byte in the 'data' string. *) data : string; base_addr : int64; endian : endian; wordsize : wordsize; } and endian = BigEndian | LittleEndian and wordsize = Word32 | Word64 let string_of_endian = function | BigEndian -> "big endian" | LittleEndian -> "little endian" let string_of_wordsize = function | Word32 -> "32 bit" | Word64 -> "64 bit" let succ_word { wordsize = wordsize } addr = match wordsize with Word32 -> addr +^ 4L | Word64 -> addr +^ 8L let pred_word { wordsize = wordsize } addr = match wordsize with Word32 -> addr -^ 4L | Word64 -> addr -^ 8L let succ_align { wordsize = wordsize } addr = let mask = match wordsize with Word32 -> 3L | Word64 -> 7L in (addr +^ mask) &^ (Int64.lognot mask) let bytes_of_wordsize = function | { wordsize = Word32 } -> 4 | { wordsize = Word64 } -> 8 let create base_addr f = (* XXX Make kernel size configurable. *) let len = 16 * 1024 * 1024 - 65536 in let top_addr = base_addr +^ Int64.of_int len in (* As we're loading the kernel, keep running stats on the number of * valid (or "probably valid") pointers found that are: * - 32 bit, aligned, little endian * - 32 bit, aligned, big endian * - 64 bit, aligned, little endian * - 64 bit, aligned, big endian * and the total amount of data read. Use these stats to heuristically * determine endianness and word size, and also reject the kernel * early by raising [Not_found] if it doesn't look like a kernel. * * Also a characteristic of qemu is that if you ask for an invalid * virtual address, it returns the same set of blocks of random data * over and over. Detect this by keeping a count of the checksum * of each block. *) let update_stats, get_endian_wordsize = let blocks = ref 0 in let size = ref 0 in let le32 = ref 0 and be32 = ref 0 and le64 = ref 0 and be64 = ref 0 in let max_le32_pc = ref 0. and max_be32_pc = ref 0. and max_le64_pc = ref 0. and max_be64_pc = ref 0. in let checksums = Counter.create () in let update_stats data = incr blocks; let n = String.length data in assert (n mod 8 = 0); size := !size + n; for i = 0 to n/4 do let p = str_get_le32 data (i*4) in if p >= base_addr && p < top_addr then incr le32; let p = str_get_be32 data (i*4) in if p >= base_addr && p < top_addr then incr be32; done; for i = 0 to n/8 do let p = str_get_le64 data (i*8) in if p >= base_addr && p < top_addr then incr le64; let p = str_get_be64 data (i*8) in if p >= base_addr && p < top_addr then incr be64; done; (* Because the kernel is likely to be smaller than the memory we * are reading, keep track of the running maximum of the * percentage of memory which contains pointers. The percentages * will decline once we run off the end of the kernel. *) if !size > 1_000_000 then ( let percent v w = float v *. 100. /. (float (!size / w)) in max_le32_pc := max !max_le32_pc (percent !le32 4); max_be32_pc := max !max_be32_pc (percent !be32 4); max_le64_pc := max !max_le64_pc (percent !le64 8); max_be64_pc := max !max_be64_pc (percent !be64 8); (* If these are all < 0.1% even after a millions of bytes have * been read, then we should bail. It's not a kernel. *) if !size > 8_000_000 && !max_le32_pc < 0.1 && !max_be32_pc < 0.1 && !max_le64_pc < 0.1 && !max_be64_pc < 0.1 then ( debug "doesn't look like a kernel: le32 %g%% be32 %g%% le64 %g%% be64 %g%%" !max_le32_pc !max_be32_pc !max_le64_pc !max_be64_pc; raise Not_found ) ); (* Update block CRC. Bail if qemu is just giving us the same * block of data over and over again. However note that most * kernels are only about 10 MB in size, and because we read a * fixed amount (usually 16 MB) at the end of this read we'll be * reading unmapped memory which can trip this heuristic. * Therefore only run this heuristic for the first 4 MB of * memory. *) if !size > 2 * 1024 * 1024 && !size <= 4 * 1024 * 1024 then ( let crc = crc32_of_string data in Counter.incr checksums crc; if Counter.get checksums crc > !blocks/4 then ( debug "looks like unmapped memory"; raise Not_found ) ) and get_endian_wordsize () = if !max_le32_pc > !max_be32_pc && !max_le32_pc > !max_le64_pc && !max_le32_pc > !max_be64_pc then LittleEndian, Word32 else if !max_be32_pc > !max_le64_pc && !max_be32_pc > !max_be64_pc then BigEndian, Word32 else if !max_le64_pc > !max_be64_pc then LittleEndian, Word64 else BigEndian, Word64 in update_stats, get_endian_wordsize in (* Loop loading the kernel. *) let rec loop i acc = if i < len then ( let str = f (base_addr +^ Int64.of_int i) in update_stats str; loop (i + String.length str) (str :: acc) ) else List.rev acc in let strs = loop 0 [] in let data = String.concat "" strs in assert (String.length data = len); let endian, wordsize = get_endian_wordsize () in { data = data; base_addr = base_addr; endian = endian; wordsize = wordsize } let rec find_first k str = find_from k k.base_addr str and find_all k str = let rec loop addr acc = try let a = find_from k addr str in loop (a +^ 1L) (a :: acc) with Not_found -> List.rev acc in loop k.base_addr [] and find_from { data = data; base_addr = base_addr } addr str = let off = str_mapping base_addr addr (String.length data) in assert (off >= 0); let found_off = strstr_from data str off in base_addr +^ Int64.of_int found_off let find_all_pointers ({ endian = endian; wordsize = wordsize } as k) ptr = let str = match endian, wordsize with | LittleEndian, Word32 -> str_of_le32 ptr | BigEndian, Word32 -> str_of_be32 ptr | LittleEndian, Word64 -> str_of_le64 ptr | BigEndian, Word64 -> str_of_be64 ptr in let find_from_aligned addr = let off = str_mapping k.base_addr addr (String.length k.data) in assert (off >= 0); let found_off = strstr_from_aligned k.data str off in k.base_addr +^ Int64.of_int found_off in let rec loop addr acc = try let a = find_from_aligned addr in loop (succ_word k a) (a :: acc) with Not_found -> List.rev acc in loop k.base_addr [] let follow_pointer k addr = let off = str_mapping k.base_addr addr (String.length k.data) in assert (off >= 0); match k.endian, k.wordsize with | LittleEndian, Word32 -> str_get_le32 k.data off | BigEndian, Word32 -> str_get_be32 k.data off | LittleEndian, Word64 -> str_get_le64 k.data off | BigEndian, Word64 -> str_get_be64 k.data off let is_mapped k addr = let off = str_mapping k.base_addr addr (String.length k.data) in off >= 0 let get_memory k addr len = let off = str_mapping k.base_addr addr (String.length k.data) in assert (off >= 0); assert (len >= 0); assert (len + off <= String.length k.data); String.sub k.data off len let get_byte k addr = let off = str_mapping k.base_addr addr (String.length k.data) in assert (off >= 0); Char.code (String.unsafe_get k.data off) let get_int32 k addr = let off = str_mapping k.base_addr addr (String.length k.data - 4) in assert (off >= 0); match k.endian with | LittleEndian -> str_get_le32 k.data off | BigEndian -> str_get_be32 k.data off let get_int64 k addr = let off = str_mapping k.base_addr addr (String.length k.data - 8) in assert (off >= 0); match k.endian with | LittleEndian -> str_get_le64 k.data off | BigEndian -> str_get_be64 k.data off let get_string k addr = let off = str_mapping k.base_addr addr (String.length k.data) in assert (off >= 0); get_asciiz k.data off let is_C_identifier k addr = let off = str_mapping k.base_addr addr (String.length k.data) in if off >= 0 then is_C_ident k.data off else false