From 8289aa1ad68ec94c87fc4d538f638d8816052d92 Mon Sep 17 00:00:00 2001 From: Richard Jones Date: Wed, 28 Jul 2010 15:40:42 +0100 Subject: [PATCH] New APIs for guest inspection. This commit converts (some of) the Perl inspection code to C and makes it available through core APIs. The new APIs are: inspect-os - Does the inspection, returns list of OSes inspect-get-* - Get results of the inspection where '*' is one of: type - 'windows' or 'linux' distro - Linux distro arch - architecture product-name - long product name string major-version minor-version - major.minor version of OS mountpoints - get a list of the mountpoints filesystems - get all filesystems associated with the OS This works for all existing supported Linux and Windows OSes. --- src/Makefile.am | 3 +- src/generator.ml | 221 +++++++++++ src/guestfs-internal.h | 54 +++ src/guestfs.c | 2 + src/guestfs.pod | 68 +++- src/inspect.c | 1017 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1360 insertions(+), 5 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 61cec04..cc01459 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -131,7 +131,7 @@ libguestfs_la_SOURCES = \ proto.c \ libguestfs.syms -libguestfs_la_LIBADD = $(LIBPCRE) $(LIBMAGIC) $(LTLIBTHREAD) ../gnulib/lib/libgnu.la +libguestfs_la_LIBADD = $(HIVEX_LIBS) $(LIBPCRE) $(LIBMAGIC) $(LTLIBTHREAD) ../gnulib/lib/libgnu.la # Make libguestfs include the convenience library. noinst_LTLIBRARIES = libprotocol.la @@ -139,6 +139,7 @@ libguestfs_la_LIBADD += libprotocol.la libguestfs_la_CFLAGS = \ -DGUESTFS_DEFAULT_PATH='"$(libdir)/guestfs"' \ + $(HIVEX_CFLAGS) \ $(WARN_CFLAGS) $(WERROR_CFLAGS) libguestfs_la_CPPFLAGS = -I$(top_srcdir)/gnulib/lib diff --git a/src/generator.ml b/src/generator.ml index 4ce49fe..a879806 100755 --- a/src/generator.ml +++ b/src/generator.ml @@ -1068,6 +1068,227 @@ initrd or kernel module(s) instead. =back"); + ("inspect_os", (RStringList "roots", []), -1, [], + [], + "inspect disk and return list of operating systems found", + "\ +This function uses other libguestfs functions and certain +heuristics to inspect the disk(s) (usually disks belonging to +a virtual machine), looking for operating systems. + +The list returned is empty if no operating systems were found. + +If one operating system was found, then this returns a list with +a single element, which is the name of the root filesystem of +this operating system. It is also possible for this function +to return a list containing more than one element, indicating +a dual-boot or multi-boot virtual machine, with each element being +the root filesystem of one of the operating systems. + +You can pass the root string(s) returned to other +C functions in order to query further +information about each operating system, such as the name +and version. + +This function uses other libguestfs features such as +C and C in order to mount +and unmount filesystems and look at the contents. This should +be called with no disks currently mounted. The function may also +use Augeas, so any existing Augeas handle will be closed. + +This function cannot decrypt encrypted disks. The caller +must do that first (supplying the necessary keys) if the +disk is encrypted. + +Please read L for more details."); + + ("inspect_get_type", (RString "name", [Device "root"]), -1, [], + [], + "get type of inspected operating system", + "\ +This function should only be called with a root device string +as returned by C. + +This returns the type of the inspected operating system. +Currently defined types are: + +=over 4 + +=item \"linux\" + +Any Linux-based operating system. + +=item \"windows\" + +Any Microsoft Windows operating system. + +=item \"unknown\" + +The operating system type could not be determined. + +=back + +Future versions of libguestfs may return other strings here. +The caller should be prepared to handle any string. + +Please read L for more details."); + + ("inspect_get_arch", (RString "arch", [Device "root"]), -1, [], + [], + "get architecture of inspected operating system", + "\ +This function should only be called with a root device string +as returned by C. + +This returns the architecture of the inspected operating system. +The possible return values are listed under +C. + +If the architecture could not be determined, then the +string C is returned. + +Please read L for more details."); + + ("inspect_get_distro", (RString "distro", [Device "root"]), -1, [], + [], + "get distro of inspected operating system", + "\ +This function should only be called with a root device string +as returned by C. + +This returns the distro (distribution) of the inspected operating +system. + +Currently defined distros are: + +=over 4 + +=item \"debian\" + +Debian or a Debian-derived distro such as Ubuntu. + +=item \"fedora\" + +Fedora. + +=item \"redhat-based\" + +Some Red Hat-derived distro. + +=item \"rhel\" + +Red Hat Enterprise Linux and some derivatives. + +=item \"windows\" + +Windows does not have distributions. This string is +returned if the OS type is Windows. + +=item \"unknown\" + +The distro could not be determined. + +=back + +Future versions of libguestfs may return other strings here. +The caller should be prepared to handle any string. + +Please read L for more details."); + + ("inspect_get_major_version", (RInt "major", [Device "root"]), -1, [], + [], + "get major version of inspected operating system", + "\ +This function should only be called with a root device string +as returned by C. + +This returns the major version number of the inspected operating +system. + +Windows uses a consistent versioning scheme which is I +reflected in the popular public names used by the operating system. +Notably the operating system known as \"Windows 7\" is really +version 6.1 (ie. major = 6, minor = 1). You can find out the +real versions corresponding to releases of Windows by consulting +Wikipedia or MSDN. + +If the version could not be determined, then C<0> is returned. + +Please read L for more details."); + + ("inspect_get_minor_version", (RInt "minor", [Device "root"]), -1, [], + [], + "get minor version of inspected operating system", + "\ +This function should only be called with a root device string +as returned by C. + +This returns the minor version number of the inspected operating +system. + +If the version could not be determined, then C<0> is returned. + +Please read L for more details. +See also C."); + + ("inspect_get_product_name", (RString "product", [Device "root"]), -1, [], + [], + "get product name of inspected operating system", + "\ +This function should only be called with a root device string +as returned by C. + +This returns the product name of the inspected operating +system. The product name is generally some freeform string +which can be displayed to the user, but should not be +parsed by programs. + +If the product name could not be determined, then the +string C is returned. + +Please read L for more details."); + + ("inspect_get_mountpoints", (RHashtable "mountpoints", [Device "root"]), -1, [], + [], + "get mountpoints of inspected operating system", + "\ +This function should only be called with a root device string +as returned by C. + +This returns a hash of where we think the filesystems +associated with this operating system should be mounted. +Callers should note that this is at best an educated guess +made by reading configuration files such as C. + +Each element in the returned hashtable has a key which +is the path of the mountpoint (eg. C) and a value +which is the filesystem that would be mounted there +(eg. C). + +Non-mounted devices such as swap devices are I +returned in this list. + +Please read L for more details. +See also C."); + + ("inspect_get_filesystems", (RStringList "filesystems", [Device "root"]), -1, [], + [], + "get filesystems associated with inspected operating system", + "\ +This function should only be called with a root device string +as returned by C. + +This returns a list of all the filesystems that we think +are associated with this operating system. This includes +the root filesystem, other ordinary filesystems, and +non-mounted devices like swap partitions. + +In the case of a multi-boot virtual machine, it is possible +for a filesystem to be shared between operating systems. + +Please read L for more details. +See also C."); + ] (* daemon_functions are any functions which cause some action diff --git a/src/guestfs-internal.h b/src/guestfs-internal.h index d0fc3ca..9bf76da 100644 --- a/src/guestfs-internal.h +++ b/src/guestfs-internal.h @@ -131,6 +131,59 @@ struct guestfs_h void * close_cb_data; int msg_next_serial; + + /* Information gathered by inspect_os. Must be freed by calling + * guestfs___free_inspect_info. + */ + struct inspect_fs *fses; + size_t nr_fses; +}; + +/* Per-filesystem data stored for inspect_os. */ +enum inspect_fs_content { + FS_CONTENT_UNKNOWN = 0, + FS_CONTENT_LINUX_ROOT, + FS_CONTENT_WINDOWS_ROOT, + FS_CONTENT_LINUX_BOOT, + FS_CONTENT_LINUX_USR, + FS_CONTENT_LINUX_USR_LOCAL, + FS_CONTENT_LINUX_VAR, +}; + +enum inspect_os_type { + OS_TYPE_UNKNOWN = 0, + OS_TYPE_LINUX, + OS_TYPE_WINDOWS, +}; + +enum inspect_os_distro { + OS_DISTRO_UNKNOWN = 0, + OS_DISTRO_DEBIAN, + OS_DISTRO_FEDORA, + OS_DISTRO_REDHAT_BASED, + OS_DISTRO_RHEL, + OS_DISTRO_WINDOWS, +}; + +struct inspect_fs { + int is_root; + char *device; + int is_mountable; + int is_swap; + enum inspect_fs_content content; + enum inspect_os_type type; + enum inspect_os_distro distro; + char *product_name; + int major_version; + int minor_version; + char *arch; + struct inspect_fstab_entry *fstab; + size_t nr_fstab; +}; + +struct inspect_fstab_entry { + char *device; + char *mountpoint; }; struct guestfs_message_header; @@ -143,6 +196,7 @@ extern void *guestfs_safe_realloc (guestfs_h *g, void *ptr, int nbytes); extern char *guestfs_safe_strdup (guestfs_h *g, const char *str); extern char *guestfs_safe_strndup (guestfs_h *g, const char *str, size_t n); extern void *guestfs_safe_memdup (guestfs_h *g, void *ptr, size_t size); +extern void guestfs___free_inspect_info (guestfs_h *g); extern int guestfs___set_busy (guestfs_h *g); extern int guestfs___end_busy (guestfs_h *g); extern int guestfs___send (guestfs_h *g, int proc_nr, xdrproc_t xdrp, char *args); diff --git a/src/guestfs.c b/src/guestfs.c index 871d713..cef80db 100644 --- a/src/guestfs.c +++ b/src/guestfs.c @@ -184,6 +184,8 @@ guestfs_close (guestfs_h *g) if (g->close_cb) g->close_cb (g, g->close_cb_data); + guestfs___free_inspect_info (g); + /* Try to sync if autosync flag is set. */ if (g->autosync && g->state == READY) { guestfs_umount_all (g); diff --git a/src/guestfs.pod b/src/guestfs.pod index 5a2e7a5..5deccb5 100644 --- a/src/guestfs.pod +++ b/src/guestfs.pod @@ -160,9 +160,10 @@ you have to find out. Libguestfs can do that too: use L and L to list possible partitions and LVs, and either try mounting each to see what is mountable, or else examine them with L or -L. But you might find it easier to look at higher level -programs built on top of libguestfs, in particular -L. +L. Libguestfs also has a set of APIs for inspection of +disk images (see L below). But you might find it easier +to look at higher level programs built on top of libguestfs, in +particular L. To mount a disk image read-only, use L. There are several other variations of the C call. @@ -481,6 +482,65 @@ Then close the mapper device by calling L on the C device (I the underlying encrypted block device). +=head2 INSPECTION + +Libguestfs has APIs for inspecting an unknown disk image to find out +if it contains operating systems. (These APIs used to be in a +separate Perl-only library called L but since +version 1.5.3 the most frequently used part of this library has been +rewritten in C and moved into the core code). + +Add all disks belonging to the unknown virtual machine and call +L in the usual way. + +Then call L. This function uses other libguestfs +calls and certain heuristics, and returns a list of operating systems +that were found. An empty list means none were found. A single +element is the root filesystem of the operating system. For dual- or +multi-boot guests, multiple roots can be returned, each one +corresponding to a separate operating system. (Multi-boot virtual +machines are extremely rare in the world of virtualization, but since +this scenario can happen, we have built libguestfs to deal with it.) + +For each root, you can then call various C +functions to get additional details about that operating system. For +example, call L to return the string +C or C for Windows and Linux-based operating systems +respectively. + +Un*x-like and Linux-based operating systems usually consist of several +filesystems which are mounted at boot time (for example, a separate +boot partition mounted on C). The inspection rules are able to +detect how filesystems correspond to mount points. Call +C to get this mapping. It might +return a hash table like this example: + + /boot => /dev/sda1 + / => /dev/vg_guest/lv_root + /usr => /dev/vg_guest/lv_usr + +The caller can then make calls to L to +mount the filesystems as suggested. + +Be careful to mount filesystems in the right order (eg. C before +C). Sorting the keys of the hash by length, shortest first, +should work. + +Inspection currently only works for some common operating systems. +Contributors are welcome to send patches for other operating systems +that we currently cannot detect. + +Encrypted disks must be opened before inspection. See +L for more details. The L +function just ignores any encrypted devices. + +A note on the implementation: The call L performs +inspection and caches the results in the guest handle. Subsequent +calls to C return this cached information, but +I re-read the disks. If you change the content of the guest +disks, you can redo inspection by calling L +again. + =head2 SPECIAL CONSIDERATIONS FOR WINDOWS GUESTS Libguestfs can mount NTFS partitions. It does this using the @@ -495,7 +555,7 @@ that directory might be referred to as C. Drive letter mappings are outside the scope of libguestfs. You have to use libguestfs to read the appropriate Windows Registry and configuration files, to determine yourself how drives are mapped (see -also L). +also L and L). Replacing backslash characters with forward slash characters is also outside the scope of libguestfs, but something that you can easily do. diff --git a/src/inspect.c b/src/inspect.c index d19e23b..d1bb7bb 100644 --- a/src/inspect.c +++ b/src/inspect.c @@ -28,8 +28,11 @@ #include #include +#include +#include #include "ignore-value.h" +#include "xstrtol.h" #include "guestfs.h" #include "guestfs-internal.h" @@ -44,6 +47,14 @@ static pcre *re_file_elf; static pcre *re_file_win64; static pcre *re_elf_ppc64; +static pcre *re_fedora; +static pcre *re_rhel_old; +static pcre *re_rhel; +static pcre *re_rhel_no_minor; +static pcre *re_debian; +static pcre *re_aug_seq; +static pcre *re_xdev; +static pcre *re_windows_version; static void compile_regexps (void) __attribute__((constructor)); static void @@ -64,6 +75,17 @@ compile_regexps (void) COMPILE (re_file_elf, "ELF.*(?:executable|shared object|relocatable), (.+?),", 0); COMPILE (re_elf_ppc64, "64.*PowerPC", 0); + COMPILE (re_fedora, "Fedora release (\\d+)", 0); + COMPILE (re_rhel_old, + "(?:Red Hat Enterprise Linux|CentOS|Scientific Linux).*release (\\d+).*Update (\\d+)", 0); + COMPILE (re_rhel, + "(?:Red Hat Enterprise Linux|CentOS|Scientific Linux).*release (\\d+)\\.(\\d+)", 0); + COMPILE (re_rhel_no_minor, + "(?:Red Hat Enterprise Linux|CentOS|Scientific Linux).*release (\\d+)", 0); + COMPILE (re_debian, "(\\d+)\\.(\\d+)", 0); + COMPILE (re_aug_seq, "/\\d+$", 0); + COMPILE (re_xdev, "^/dev/(?:h|s|v|xv)d([a-z]\\d*)$", 0); + COMPILE (re_windows_version, "^(\\d+)\\.(\\d+)", 0); } /* Match a regular expression which contains no captures. Returns @@ -111,6 +133,29 @@ match1 (guestfs_h *g, const char *str, const pcre *re) return safe_strndup (g, &str[vec[2]], vec[3]-vec[2]); } +/* Match a regular expression which contains exactly two captures. */ +static int +match2 (guestfs_h *g, const char *str, const pcre *re, char **ret1, char **ret2) +{ + size_t len = strlen (str); + int vec[30], r; + + r = pcre_exec (re, NULL, str, len, 0, 0, vec, 30); + if (r == PCRE_ERROR_NOMATCH) + return 0; + if (r != 3) { + /* Internal error -- should not happen. */ + fprintf (stderr, "libguestfs: %s: %s: internal error: pcre_exec returned unexpected error code %d when matching against the string \"%s\"\n", + __FILE__, __func__, r, str); + return 0; + } + + *ret1 = safe_strndup (g, &str[vec[2]], vec[3]-vec[2]); + *ret2 = safe_strndup (g, &str[vec[4]], vec[5]-vec[4]); + + return 1; +} + /* Convert output from 'file' command on ELF files to the canonical * architecture string. Caller must free the result. */ @@ -278,3 +323,975 @@ guestfs__file_architecture (guestfs_h *g, const char *path) free (elf_arch); return ret; /* caller frees */ } + +/* The main inspection code. */ +static int feature_available (guestfs_h *g, const char *feature); +static void free_string_list (char **); +static int check_for_filesystem_on (guestfs_h *g, const char *device); + +char ** +guestfs__inspect_os (guestfs_h *g) +{ + /* Remove any information previously stored in the handle. */ + guestfs___free_inspect_info (g); + + if (guestfs_umount_all (g) == -1) + return NULL; + + /* Iterate over all possible devices. Try to mount each + * (read-only). Examine ones which contain filesystems and add that + * information to the handle. + */ + /* Look to see if any devices directly contain filesystems (RHBZ#590167). */ + char **devices; + devices = guestfs_list_devices (g); + if (devices == NULL) + return NULL; + + size_t i; + for (i = 0; devices[i] != NULL; ++i) { + if (check_for_filesystem_on (g, devices[i]) == -1) { + free_string_list (devices); + guestfs___free_inspect_info (g); + return NULL; + } + } + free_string_list (devices); + + /* Look at all partitions. */ + char **partitions; + partitions = guestfs_list_partitions (g); + if (partitions == NULL) { + guestfs___free_inspect_info (g); + return NULL; + } + + for (i = 0; partitions[i] != NULL; ++i) { + if (check_for_filesystem_on (g, partitions[i]) == -1) { + free_string_list (partitions); + guestfs___free_inspect_info (g); + return NULL; + } + } + free_string_list (partitions); + + /* Look at all LVs. */ + if (feature_available (g, "lvm2")) { + char **lvs; + lvs = guestfs_lvs (g); + if (lvs == NULL) { + guestfs___free_inspect_info (g); + return NULL; + } + + for (i = 0; lvs[i] != NULL; ++i) { + if (check_for_filesystem_on (g, lvs[i]) == -1) { + free_string_list (lvs); + guestfs___free_inspect_info (g); + return NULL; + } + } + free_string_list (lvs); + } + + /* At this point we have, in the handle, a list of all filesystems + * found and data about each one. Now we assemble the list of + * filesystems which are root devices and return that to the user. + */ + size_t count = 0; + for (i = 0; i < g->nr_fses; ++i) + if (g->fses[i].is_root) + count++; + + char **ret = calloc (count+1, sizeof (char *)); + if (ret == NULL) { + perrorf (g, "calloc"); + guestfs___free_inspect_info (g); + return NULL; + } + + count = 0; + for (i = 0; i < g->nr_fses; ++i) { + if (g->fses[i].is_root) { + ret[count] = safe_strdup (g, g->fses[i].device); + count++; + } + } + ret[count] = NULL; + + return ret; +} + +void +guestfs___free_inspect_info (guestfs_h *g) +{ + size_t i; + for (i = 0; i < g->nr_fses; ++i) { + free (g->fses[i].device); + free (g->fses[i].product_name); + free (g->fses[i].arch); + size_t j; + for (j = 0; j < g->fses[i].nr_fstab; ++j) { + free (g->fses[i].fstab[j].device); + free (g->fses[i].fstab[j].mountpoint); + } + free (g->fses[i].fstab); + } + free (g->fses); + g->nr_fses = 0; + g->fses = NULL; +} + +static void +free_string_list (char **argv) +{ + size_t i; + for (i = 0; argv[i] != NULL; ++i) + free (argv[i]); + free (argv); +} + +/* In the Perl code this is a public function. */ +static int +feature_available (guestfs_h *g, const char *feature) +{ + /* If there's an error we should ignore it, so to do that we have to + * temporarily replace the error handler with a null one. + */ + guestfs_error_handler_cb old_error_cb = g->error_cb; + g->error_cb = NULL; + + const char *groups[] = { feature, NULL }; + int r = guestfs_available (g, (char * const *) groups); + + g->error_cb = old_error_cb; + + return r == 0 ? 1 : 0; +} + +/* Find out if 'device' contains a filesystem. If it does, add + * another entry in g->fses. + */ +static int check_filesystem (guestfs_h *g, const char *device); +static int check_linux_root (guestfs_h *g, struct inspect_fs *fs); +static int check_fstab (guestfs_h *g, struct inspect_fs *fs); +static int check_windows_root (guestfs_h *g, struct inspect_fs *fs); +static int check_windows_arch (guestfs_h *g, struct inspect_fs *fs, + const char *systemroot); +static int check_windows_registry (guestfs_h *g, struct inspect_fs *fs, + const char *systemroot); +static char *resolve_windows_path_silently (guestfs_h *g, const char *); +static int extend_fses (guestfs_h *g); +static int parse_unsigned_int (guestfs_h *g, const char *str); +static int add_fstab_entry (guestfs_h *g, struct inspect_fs *fs, + const char *spec, const char *mp); +static char *resolve_fstab_device (guestfs_h *g, const char *spec); + +static int +check_for_filesystem_on (guestfs_h *g, const char *device) +{ + /* Get vfs-type in order to check if it's a Linux(?) swap device. + * If there's an error we should ignore it, so to do that we have to + * temporarily replace the error handler with a null one. + */ + guestfs_error_handler_cb old_error_cb = g->error_cb; + g->error_cb = NULL; + char *vfs_type = guestfs_vfs_type (g, device); + g->error_cb = old_error_cb; + + int is_swap = vfs_type && STREQ (vfs_type, "swap"); + + if (g->verbose) + fprintf (stderr, "check_for_filesystem_on: %s (%s)\n", + device, vfs_type ? vfs_type : "failed to get vfs type"); + + if (is_swap) { + free (vfs_type); + if (extend_fses (g) == -1) + return -1; + g->fses[g->nr_fses-1].is_swap = 1; + return 0; + } + + /* Try mounting the device. As above, ignore errors. */ + g->error_cb = NULL; + int r = guestfs_mount_ro (g, device, "/"); + if (r == -1 && vfs_type && STREQ (vfs_type, "ufs")) /* Hack for the *BSDs. */ + r = guestfs_mount_vfs (g, "ro,ufstype=ufs2", "ufs", device, "/"); + free (vfs_type); + g->error_cb = old_error_cb; + if (r == -1) + return 0; + + /* Do the rest of the checks. */ + r = check_filesystem (g, device); + + /* Unmount the filesystem. */ + if (guestfs_umount_all (g) == -1) + return -1; + + return r; +} + +static int +check_filesystem (guestfs_h *g, const char *device) +{ + if (extend_fses (g) == -1) + return -1; + + struct inspect_fs *fs = &g->fses[g->nr_fses-1]; + + fs->device = safe_strdup (g, device); + fs->is_mountable = 1; + + /* Grub /boot? */ + if (guestfs_is_file (g, "/grub/menu.lst") > 0 || + guestfs_is_file (g, "/grub/grub.conf") > 0) + fs->content = FS_CONTENT_LINUX_BOOT; + /* Linux root? */ + else if (guestfs_is_dir (g, "/etc") > 0 && + guestfs_is_dir (g, "/bin") > 0 && + guestfs_is_file (g, "/etc/fstab") > 0) { + fs->is_root = 1; + fs->content = FS_CONTENT_LINUX_ROOT; + if (check_linux_root (g, fs) == -1) + return -1; + } + /* Linux /usr/local? */ + else if (guestfs_is_dir (g, "/etc") > 0 && + guestfs_is_dir (g, "/bin") > 0 && + guestfs_is_dir (g, "/share") > 0 && + guestfs_exists (g, "/local") == 0 && + guestfs_is_file (g, "/etc/fstab") == 0) + fs->content = FS_CONTENT_LINUX_USR_LOCAL; + /* Linux /usr? */ + else if (guestfs_is_dir (g, "/etc") > 0 && + guestfs_is_dir (g, "/bin") > 0 && + guestfs_is_dir (g, "/share") > 0 && + guestfs_exists (g, "/local") > 0 && + guestfs_is_file (g, "/etc/fstab") == 0) + fs->content = FS_CONTENT_LINUX_USR; + /* Linux /var? */ + else if (guestfs_is_dir (g, "/log") > 0 && + guestfs_is_dir (g, "/run") > 0 && + guestfs_is_dir (g, "/spool") > 0) + fs->content = FS_CONTENT_LINUX_VAR; + /* Windows root? */ + else if (guestfs_is_file (g, "/AUTOEXEC.BAT") > 0 || + guestfs_is_file (g, "/autoexec.bat") > 0 || + guestfs_is_dir (g, "/Program Files") > 0 || + guestfs_is_dir (g, "/WINDOWS") > 0 || + guestfs_is_dir (g, "/Windows") > 0 || + guestfs_is_dir (g, "/windows") > 0 || + guestfs_is_dir (g, "/WIN32") > 0 || + guestfs_is_dir (g, "/Win32") > 0 || + guestfs_is_dir (g, "/WINNT") > 0 || + guestfs_is_file (g, "/boot.ini") > 0 || + guestfs_is_file (g, "/ntldr") > 0) { + fs->is_root = 1; + fs->content = FS_CONTENT_WINDOWS_ROOT; + if (check_windows_root (g, fs) == -1) + return -1; + } + + return 0; +} + +/* The currently mounted device is known to be a Linux root. Try to + * determine from this the distro, version, etc. Also parse + * /etc/fstab to determine the arrangement of mountpoints and + * associated devices. + */ +static int +check_linux_root (guestfs_h *g, struct inspect_fs *fs) +{ + fs->type = OS_TYPE_LINUX; + + if (guestfs_exists (g, "/etc/redhat-release") > 0) { + fs->distro = OS_DISTRO_REDHAT_BASED; /* Something generic Red Hat-like. */ + + char **product_name = guestfs_head_n (g, 1, "/etc/redhat-release"); + if (product_name == NULL) + return -1; + if (product_name[0] == NULL) { + error (g, "/etc/redhat-release file is empty"); + free_string_list (product_name); + return -1; + } + + /* Note that this string becomes owned by the handle and will + * be freed by guestfs___free_inspect_info. + */ + fs->product_name = product_name[0]; + free (product_name); + + char *major, *minor; + if ((major = match1 (g, fs->product_name, re_fedora)) != NULL) { + fs->distro = OS_DISTRO_FEDORA; + fs->major_version = parse_unsigned_int (g, major); + free (major); + if (fs->major_version == -1) + return -1; + } + else if (match2 (g, fs->product_name, re_rhel_old, &major, &minor) || + match2 (g, fs->product_name, re_rhel, &major, &minor)) { + fs->distro = OS_DISTRO_RHEL; + fs->major_version = parse_unsigned_int (g, major); + free (major); + if (fs->major_version == -1) { + free (minor); + return -1; + } + fs->minor_version = parse_unsigned_int (g, minor); + free (minor); + if (fs->minor_version == -1) + return -1; + } + else if ((major = match1 (g, fs->product_name, re_rhel_no_minor)) != NULL) { + fs->distro = OS_DISTRO_RHEL; + fs->major_version = parse_unsigned_int (g, major); + free (major); + if (fs->major_version == -1) + return -1; + fs->minor_version = 0; + } + } + else if (guestfs_exists (g, "/etc/debian_version") > 0) { + fs->distro = OS_DISTRO_DEBIAN; + + char **product_name = guestfs_head_n (g, 1, "/etc/debian_version"); + if (product_name == NULL) + return -1; + if (product_name[0] == NULL) { + error (g, "/etc/debian_version file is empty"); + free_string_list (product_name); + return -1; + } + + /* Note that this string becomes owned by the handle and will + * be freed by guestfs___free_inspect_info. + */ + fs->product_name = product_name[0]; + free (product_name); + + char *major, *minor; + if (match2 (g, fs->product_name, re_debian, &major, &minor)) { + fs->major_version = parse_unsigned_int (g, major); + free (major); + if (fs->major_version == -1) { + free (minor); + return -1; + } + fs->minor_version = parse_unsigned_int (g, minor); + free (minor); + if (fs->minor_version == -1) + return -1; + } + } + + /* Determine the architecture. */ + const char *binaries[] = + { "/bin/bash", "/bin/ls", "/bin/echo", "/bin/rm", "/bin/sh" }; + size_t i; + for (i = 0; i < sizeof binaries / sizeof binaries[0]; ++i) { + if (guestfs_is_file (g, binaries[i]) > 0) { + /* Ignore errors from file_architecture call. */ + guestfs_error_handler_cb old_error_cb = g->error_cb; + g->error_cb = NULL; + char *arch = guestfs_file_architecture (g, binaries[i]); + g->error_cb = old_error_cb; + + if (arch) { + /* String will be owned by handle, freed by + * guestfs___free_inspect_info. + */ + fs->arch = arch; + break; + } + } + } + + /* We already know /etc/fstab exists because it's part of the test + * for Linux root above. We must now parse this file to determine + * which filesystems are used by the operating system and how they + * are mounted. + * XXX What if !feature_available (g, "augeas")? + */ + if (guestfs_aug_init (g, "/", AUG_NO_LOAD|AUG_SAVE_NOOP) == -1) + return -1; + + /* Tell Augeas to only load /etc/fstab (thanks Raphaël Pinson). */ + guestfs_aug_rm (g, "/augeas/load//incl[. != \"/etc/fstab\"]"); + guestfs_aug_load (g); + + int r = check_fstab (g, fs); + guestfs_aug_close (g); + if (r == -1) + return -1; + + return 0; +} + +static int +check_fstab (guestfs_h *g, struct inspect_fs *fs) +{ + char **lines = guestfs_aug_ls (g, "/files/etc/fstab"); + if (lines == NULL) + return -1; + + if (lines[0] == NULL) { + error (g, "could not parse /etc/fstab or empty file"); + free_string_list (lines); + return -1; + } + + size_t i; + char augpath[256]; + for (i = 0; lines[i] != NULL; ++i) { + /* Ignore comments. Only care about sequence lines which + * match m{/\d+$}. + */ + if (match (g, lines[i], re_aug_seq)) { + snprintf (augpath, sizeof augpath, "%s/spec", lines[i]); + char *spec = guestfs_aug_get (g, augpath); + if (spec == NULL) { + free_string_list (lines); + return -1; + } + + snprintf (augpath, sizeof augpath, "%s/file", lines[i]); + char *mp = guestfs_aug_get (g, augpath); + if (mp == NULL) { + free_string_list (lines); + free (spec); + return -1; + } + + int r = add_fstab_entry (g, fs, spec, mp); + free (spec); + free (mp); + + if (r == -1) { + free_string_list (lines); + return -1; + } + } + } + + free_string_list (lines); + return 0; +} + +/* Add a filesystem and possibly a mountpoint entry for + * the root filesystem 'fs'. + * + * 'spec' is the fstab spec field, which might be a device name or a + * pseudodevice or 'UUID=...' or 'LABEL=...'. + * + * 'mp' is the mount point, which could also be 'swap' or 'none'. + */ +static int +add_fstab_entry (guestfs_h *g, struct inspect_fs *fs, + const char *spec, const char *mp) +{ + /* Ignore certain mountpoints. */ + if (STRPREFIX (mp, "/dev/") || + STREQ (mp, "/dev") || + STRPREFIX (mp, "/media/") || + STRPREFIX (mp, "/proc/") || + STREQ (mp, "/proc") || + STRPREFIX (mp, "/selinux/") || + STREQ (mp, "/selinux") || + STRPREFIX (mp, "/sys/") || + STREQ (mp, "/sys")) + return 0; + + /* Resolve UUID= and LABEL= to the actual device. */ + char *device = NULL; + if (STRPREFIX (spec, "UUID=")) + device = guestfs_findfs_uuid (g, &spec[5]); + else if (STRPREFIX (spec, "LABEL=")) + device = guestfs_findfs_label (g, &spec[6]); + /* Resolve guest block device names. */ + else if (spec[0] == '/') + device = resolve_fstab_device (g, spec); + /* Also ignore pseudo-devices completely, like spec == "tmpfs". + * If we haven't resolved the device successfully by this point, + * we don't care, just ignore it. + */ + if (device == NULL) + return 0; + + char *mountpoint = safe_strdup (g, mp); + + /* Add this to the fstab entry in 'fs'. + * Note these are further filtered by guestfs_inspect_get_mountpoints + * and guestfs_inspect_get_filesystems. + */ + size_t n = fs->nr_fstab + 1; + struct inspect_fstab_entry *p; + + p = realloc (fs->fstab, n * sizeof (struct inspect_fstab_entry)); + if (p == NULL) { + perrorf (g, "realloc"); + free (device); + free (mountpoint); + return -1; + } + + fs->fstab = p; + fs->nr_fstab = n; + + /* These are owned by the handle and freed by guestfs___free_inspect_info. */ + fs->fstab[n-1].device = device; + fs->fstab[n-1].mountpoint = mountpoint; + + if (g->verbose) + fprintf (stderr, "fstab: device=%s mountpoint=%s\n", device, mountpoint); + + return 0; +} + +/* Resolve block device name to the libguestfs device name, eg. + * /dev/xvdb1 => /dev/vdb1. This assumes that disks were added in the + * same order as they appear to the real VM, which is a reasonable + * assumption to make. Return things like LV names unchanged (or + * anything we don't recognize). + */ +static char * +resolve_fstab_device (guestfs_h *g, const char *spec) +{ + char **devices = guestfs_list_devices (g); + if (devices == NULL) + return NULL; + + size_t count; + for (count = 0; devices[count] != NULL; count++) + ; + + char *device = NULL; + char *a1 = match1 (g, spec, re_xdev); + if (a1) { + size_t i = a1[0] - 'a'; /* a1[0] is always [a-z] because of regex. */ + if (i < count) { + size_t len = strlen (devices[i]) + strlen (a1) + 16; + device = safe_malloc (g, len); + snprintf (device, len, "%s%s", devices[i], &a1[1]); + } + } else { + /* Didn't match device pattern, return original spec unchanged. */ + device = safe_strdup (g, spec); + } + + free (a1); + free_string_list (devices); + + return device; +} + +/* XXX Handling of boot.ini in the Perl version was pretty broken. It + * essentially didn't do anything for modern Windows guests. + * Therefore I've omitted all that code. + */ +static int +check_windows_root (guestfs_h *g, struct inspect_fs *fs) +{ + fs->type = OS_TYPE_WINDOWS; + fs->distro = OS_DISTRO_WINDOWS; + + /* Try to find Windows systemroot using some common locations. */ + const char *systemroots[] = + { "/windows", "/winnt", "/win32", "/win" }; + size_t i; + char *systemroot = NULL; + for (i = 0; + systemroot == NULL && i < sizeof systemroots / sizeof systemroots[0]; + ++i) { + systemroot = resolve_windows_path_silently (g, systemroots[i]); + } + + if (!systemroot) { + error (g, _("cannot resolve Windows %%SYSTEMROOT%%")); + return -1; + } + + /* XXX There is a case for exposing systemroot and many variables + * from the registry through the libguestfs API. + */ + + if (g->verbose) + fprintf (stderr, "windows %%SYSTEMROOT%% = %s", systemroot); + + if (check_windows_arch (g, fs, systemroot) == -1) { + free (systemroot); + return -1; + } + + if (check_windows_registry (g, fs, systemroot) == -1) { + free (systemroot); + return -1; + } + + free (systemroot); + return 0; +} + +static int +check_windows_arch (guestfs_h *g, struct inspect_fs *fs, + const char *systemroot) +{ + size_t len = strlen (systemroot) + 32; + char cmd_exe[len]; + snprintf (cmd_exe, len, "%s/system32/cmd.exe", systemroot); + + char *cmd_exe_path = resolve_windows_path_silently (g, cmd_exe); + if (!cmd_exe_path) + return 0; + + char *arch = guestfs_file_architecture (g, cmd_exe_path); + free (cmd_exe_path); + + if (arch) + fs->arch = arch; /* freed by guestfs___free_inspect_info */ + + return 0; +} + +/* At the moment, pull just the ProductName and version numbers from + * the registry. In future there is a case for making many more + * registry fields available to callers. + */ +static int +check_windows_registry (guestfs_h *g, struct inspect_fs *fs, + const char *systemroot) +{ + size_t len = strlen (systemroot) + 64; + char software[len]; + snprintf (software, len, "%s/system32/config/software", systemroot); + + char *software_path = resolve_windows_path_silently (g, software); + if (!software_path) + /* If the software hive doesn't exist, just accept that we cannot + * find product_name etc. + */ + return 0; + + int ret = -1; + hive_h *h = NULL; + hive_value_h *values = NULL; + + char dir[] = "/tmp/winreg.XXXXXX"; +#define dir_len 18 + if (mkdtemp (dir) == NULL) { + perrorf (g, "mkdtemp"); + goto out; + } + + char software_hive[dir_len + 16]; + snprintf (software_hive, dir_len + 16, "%s/software", dir); + + if (guestfs_download (g, software_path, software_hive) == -1) + goto out; + + h = hivex_open (software_hive, g->verbose ? HIVEX_OPEN_VERBOSE : 0); + if (h == NULL) { + perrorf (g, "hivex_open"); + goto out; + } + + hive_node_h node = hivex_root (h); + const char *hivepath[] = + { "Microsoft", "Windows NT", "CurrentVersion" }; + size_t i; + for (i = 0; + node != 0 && i < sizeof hivepath / sizeof hivepath[0]; + ++i) { + node = hivex_node_get_child (h, node, hivepath[i]); + } + + if (node == 0) { + perrorf (g, "hivex: cannot locate HKLM\\SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion"); + goto out; + } + + values = hivex_node_values (h, node); + + for (i = 0; values[i] != 0; ++i) { + char *key = hivex_value_key (h, values[i]); + if (key == NULL) { + perrorf (g, "hivex_value_key"); + goto out; + } + + if (STRCASEEQ (key, "ProductName")) { + fs->product_name = hivex_value_string (h, values[i]); + if (!fs->product_name) { + perrorf (g, "hivex_value_string"); + free (key); + goto out; + } + } + else if (STRCASEEQ (key, "CurrentVersion")) { + char *version = hivex_value_string (h, values[i]); + if (!version) { + perrorf (g, "hivex_value_string"); + free (key); + goto out; + } + char *major, *minor; + if (match2 (g, version, re_windows_version, &major, &minor)) { + fs->major_version = parse_unsigned_int (g, major); + free (major); + if (fs->major_version == -1) { + free (minor); + free (key); + free (version); + goto out; + } + fs->minor_version = parse_unsigned_int (g, minor); + free (minor); + if (fs->minor_version == -1) { + free (key); + free (version); + return -1; + } + } + + free (version); + } + + free (key); + } + + ret = 0; + + out: + if (h) hivex_close (h); + free (values); + free (software_path); + + /* Free up the temporary directory. Note the directory name cannot + * contain shell meta-characters because of the way it was + * constructed above. + */ + char cmd[dir_len + 16]; + snprintf (cmd, dir_len + 16, "rm -rf %s", dir); + ignore_value (system (cmd)); +#undef dir_len + + return ret; +} + +static char * +resolve_windows_path_silently (guestfs_h *g, const char *path) +{ + guestfs_error_handler_cb old_error_cb = g->error_cb; + g->error_cb = NULL; + char *ret = guestfs_case_sensitive_path (g, path); + g->error_cb = old_error_cb; + return ret; +} + +static int +extend_fses (guestfs_h *g) +{ + size_t n = g->nr_fses + 1; + struct inspect_fs *p; + + p = realloc (g->fses, n * sizeof (struct inspect_fs)); + if (p == NULL) { + perrorf (g, "realloc"); + return -1; + } + + g->fses = p; + g->nr_fses = n; + + memset (&g->fses[n-1], 0, sizeof (struct inspect_fs)); + + return 0; +} + +/* Parse small, unsigned ints, as used in version numbers. */ +static int +parse_unsigned_int (guestfs_h *g, const char *str) +{ + long ret; + int r = xstrtol (str, NULL, 10, &ret, ""); + if (r != LONGINT_OK) { + error (g, "could not parse integer in version number: %s", str); + return -1; + } + return ret; +} + +static struct inspect_fs * +search_for_root (guestfs_h *g, const char *root) +{ + if (g->nr_fses == 0) { + error (g, _("no inspection data: call guestfs_inspect_os first")); + return NULL; + } + + size_t i; + struct inspect_fs *fs; + for (i = 0; i < g->nr_fses; ++i) { + fs = &g->fses[i]; + if (fs->is_root && STREQ (root, fs->device)) + return fs; + } + + error (g, _("%s: root device not found: only call this function with a root device previously returned by guestfs_inspect_os"), + root); + return NULL; +} + +char * +guestfs__inspect_get_type (guestfs_h *g, const char *root) +{ + struct inspect_fs *fs = search_for_root (g, root); + if (!fs) + return NULL; + + char *ret; + switch (fs->type) { + case OS_TYPE_LINUX: ret = safe_strdup (g, "linux"); break; + case OS_TYPE_WINDOWS: ret = safe_strdup (g, "windows"); break; + case OS_TYPE_UNKNOWN: default: ret = safe_strdup (g, "unknown"); break; + } + + return ret; +} + +char * +guestfs__inspect_get_arch (guestfs_h *g, const char *root) +{ + struct inspect_fs *fs = search_for_root (g, root); + if (!fs) + return NULL; + + return safe_strdup (g, fs->arch ? : "unknown"); +} + +char * +guestfs__inspect_get_distro (guestfs_h *g, const char *root) +{ + struct inspect_fs *fs = search_for_root (g, root); + if (!fs) + return NULL; + + char *ret; + switch (fs->distro) { + case OS_DISTRO_DEBIAN: ret = safe_strdup (g, "debian"); break; + case OS_DISTRO_FEDORA: ret = safe_strdup (g, "fedora"); break; + case OS_DISTRO_REDHAT_BASED: ret = safe_strdup (g, "redhat-based"); break; + case OS_DISTRO_RHEL: ret = safe_strdup (g, "rhel"); break; + case OS_DISTRO_WINDOWS: ret = safe_strdup (g, "windows"); break; + case OS_DISTRO_UNKNOWN: default: ret = safe_strdup (g, "unknown"); break; + } + + return ret; +} + +int +guestfs__inspect_get_major_version (guestfs_h *g, const char *root) +{ + struct inspect_fs *fs = search_for_root (g, root); + if (!fs) + return -1; + + return fs->major_version; +} + +int +guestfs__inspect_get_minor_version (guestfs_h *g, const char *root) +{ + struct inspect_fs *fs = search_for_root (g, root); + if (!fs) + return -1; + + return fs->minor_version; +} + +char * +guestfs__inspect_get_product_name (guestfs_h *g, const char *root) +{ + struct inspect_fs *fs = search_for_root (g, root); + if (!fs) + return NULL; + + return safe_strdup (g, fs->product_name ? : "unknown"); +} + +char ** +guestfs__inspect_get_mountpoints (guestfs_h *g, const char *root) +{ + struct inspect_fs *fs = search_for_root (g, root); + if (!fs) + return NULL; + + char **ret; + + /* If no fstab information (Windows) return just the root. */ + if (fs->nr_fstab == 0) { + ret = calloc (3, sizeof (char *)); + ret[0] = safe_strdup (g, "/"); + ret[1] = safe_strdup (g, root); + ret[2] = NULL; + return ret; + } + +#define CRITERION fs->fstab[i].mountpoint[0] == '/' + size_t i, count = 0; + for (i = 0; i < fs->nr_fstab; ++i) + if (CRITERION) + count++; + + /* Hashtables have 2N+1 entries. */ + ret = calloc (2*count+1, sizeof (char *)); + if (ret == NULL) { + perrorf (g, "calloc"); + return NULL; + } + + count = 0; + for (i = 0; i < fs->nr_fstab; ++i) + if (CRITERION) { + ret[2*count] = safe_strdup (g, fs->fstab[i].mountpoint); + ret[2*count+1] = safe_strdup (g, fs->fstab[i].device); + count++; + } +#undef CRITERION + + return ret; +} + +char ** +guestfs__inspect_get_filesystems (guestfs_h *g, const char *root) +{ + struct inspect_fs *fs = search_for_root (g, root); + if (!fs) + return NULL; + + char **ret; + + /* If no fstab information (Windows) return just the root. */ + if (fs->nr_fstab == 0) { + ret = calloc (2, sizeof (char *)); + ret[0] = safe_strdup (g, root); + ret[1] = NULL; + return ret; + } + + ret = calloc (fs->nr_fstab + 1, sizeof (char *)); + if (ret == NULL) { + perrorf (g, "calloc"); + return NULL; + } + + size_t i; + for (i = 0; i < fs->nr_fstab; ++i) + ret[i] = safe_strdup (g, fs->fstab[i].device); + + return ret; +} -- 1.8.3.1