commit xen for openSUSE:Factory
Hello community, here is the log from the commit of package xen for openSUSE:Factory checked in at 2014-07-08 16:58:56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/xen (Old) and /work/SRC/openSUSE:Factory/.xen.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "xen" Changes: -------- --- /work/SRC/openSUSE:Factory/xen/xen.changes 2014-06-19 13:19:50.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.xen.new/xen.changes 2014-07-08 16:59:04.000000000 +0200 @@ -1,0 +2,34 @@ +Fri Jun 27 12:21:47 MDT 2014 - carnold@suse.com + +- bnc#882127 - Xen kernel panics on booting SLES12 Beta 8 + 53a199d7-x86-EFI-allow-FPU-XMM-use-in-runtime-service-functions.patch +- Upstream patches from Jan + 538c338f-x86-amd_ucode-flip-revision-numbers-in-printk.patch + 538ee637-ACPI-Prevent-acpi_table_entries-from-falling-into-a-infinite-loop.patch + 5390917a-VT-d-honor-APEI-firmware-first-mode-in-XSA-59-workaround-code.patch + 53909259-x86-domctl-two-functional-fixes-to-XEN_DOMCTL_-gs-etvcpuextstate.patch + 5390927f-x86-fix-reboot-shutdown-with-running-HVM-guests.patch + 5396d818-avoid-crash-on-HVM-domain-destroy-with-PCI-passthrough.patch + 5396e805-x86-HVM-refine-SMEP-test-in-HVM_CR4_GUEST_RESERVED_BITS.patch + 539ebe62-x86-EFI-improve-boot-time-diagnostics.patch + 539ec004-x86-mce-don-t-spam-the-console-with-CPUx-Temperature-z.patch + 53a040c6-page-alloc-scrub-pages-used-by-hypervisor-upon-freeing.patch (replaces xsa100.patch) + 53a1990a-IOMMU-prevent-VT-d-device-IOTLB-operations-on-wrong-IOMMU.patch + +------------------------------------------------------------------- +Tue Jun 24 16:11:08 MDT 2014 - jfehlig@suse.com + +- Replace 'domUloader' with 'pygrub' when converting or importing + Xen domains into libvirt with xen2libvirt. domUloader is no + longer provided in xen-tools. + Modified: xen2libvirt.py + +------------------------------------------------------------------- +Thu Jun 13 15:50:19 MDT 2014 - cyliu@suse.com + +- fate#310956: Support Direct Kernel Boot for FV guests + patches would go to upstream: + qemu side: qemu-support-xen-hvm-direct-kernel-boot.patch + xen side: xen-pass-kernel-initrd-to-qemu.patch + +------------------------------------------------------------------- @@ -9,0 +44,3 @@ +- bnc#880751 - VUL-0: xen: Hypervisor heap contents leaked to + guests + xsa100.patch New: ---- 538c338f-x86-amd_ucode-flip-revision-numbers-in-printk.patch 538ee637-ACPI-Prevent-acpi_table_entries-from-falling-into-a-infinite-loop.patch 5390917a-VT-d-honor-APEI-firmware-first-mode-in-XSA-59-workaround-code.patch 53909259-x86-domctl-two-functional-fixes-to-XEN_DOMCTL_-gs-etvcpuextstate.patch 5390927f-x86-fix-reboot-shutdown-with-running-HVM-guests.patch 5396d818-avoid-crash-on-HVM-domain-destroy-with-PCI-passthrough.patch 5396e805-x86-HVM-refine-SMEP-test-in-HVM_CR4_GUEST_RESERVED_BITS.patch 539ebe62-x86-EFI-improve-boot-time-diagnostics.patch 539ec004-x86-mce-don-t-spam-the-console-with-CPUx-Temperature-z.patch 53a040c6-page-alloc-scrub-pages-used-by-hypervisor-upon-freeing.patch 53a1990a-IOMMU-prevent-VT-d-device-IOTLB-operations-on-wrong-IOMMU.patch 53a199d7-x86-EFI-allow-FPU-XMM-use-in-runtime-service-functions.patch qemu-support-xen-hvm-direct-kernel-boot.patch xen-pass-kernel-initrd-to-qemu.patch ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ xen.spec ++++++ --- /var/tmp/diff_new_pack.6kvNQC/_old 2014-07-08 16:59:08.000000000 +0200 +++ /var/tmp/diff_new_pack.6kvNQC/_new 2014-07-08 16:59:08.000000000 +0200 @@ -154,7 +154,7 @@ %endif %endif -Version: 4.4.0_22 +Version: 4.4.0_24 Release: 0 PreReq: %insserv_prereq %fillup_prereq Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) @@ -262,7 +262,19 @@ Patch47: 53859549-AMD-IOMMU-don-t-free-page-table-prematurely.patch Patch48: 5385956b-x86-don-t-use-VA-for-cache-flush-when-also-flushing-TLB.patch Patch49: 53859956-timers-set-the-deadline-more-accurately.patch -Patch50: 538dcada-x86-HVM-eliminate-vulnerabilities-from-hvm_inject_msi.patch +Patch50: 538c338f-x86-amd_ucode-flip-revision-numbers-in-printk.patch +Patch51: 538dcada-x86-HVM-eliminate-vulnerabilities-from-hvm_inject_msi.patch +Patch52: 538ee637-ACPI-Prevent-acpi_table_entries-from-falling-into-a-infinite-loop.patch +Patch53: 5390917a-VT-d-honor-APEI-firmware-first-mode-in-XSA-59-workaround-code.patch +Patch54: 53909259-x86-domctl-two-functional-fixes-to-XEN_DOMCTL_-gs-etvcpuextstate.patch +Patch55: 5390927f-x86-fix-reboot-shutdown-with-running-HVM-guests.patch +Patch56: 5396d818-avoid-crash-on-HVM-domain-destroy-with-PCI-passthrough.patch +Patch57: 5396e805-x86-HVM-refine-SMEP-test-in-HVM_CR4_GUEST_RESERVED_BITS.patch +Patch58: 539ebe62-x86-EFI-improve-boot-time-diagnostics.patch +Patch59: 539ec004-x86-mce-don-t-spam-the-console-with-CPUx-Temperature-z.patch +Patch60: 53a040c6-page-alloc-scrub-pages-used-by-hypervisor-upon-freeing.patch +Patch61: 53a1990a-IOMMU-prevent-VT-d-device-IOTLB-operations-on-wrong-IOMMU.patch +Patch62: 53a199d7-x86-EFI-allow-FPU-XMM-use-in-runtime-service-functions.patch # Upstream qemu Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch @@ -389,6 +401,8 @@ Patch468: libxl.introduce-an-option-to-disable-the-non-O_DIRECT-workaround.patch Patch469: libxl.add-option-to-disable-disk-cache-flushes-in-qdisk.patch Patch470: qemu-xen-upstream-qdisk-cache-unsafe.patch +Patch471: xen-pass-kernel-initrd-to-qemu.patch +Patch472: qemu-support-xen-hvm-direct-kernel-boot.patch # Hypervisor and PV driver Patches Patch501: x86-ioapic-ack-default.patch Patch502: x86-cpufreq-report.patch @@ -676,6 +690,18 @@ %patch48 -p1 %patch49 -p1 %patch50 -p1 +%patch51 -p1 +%patch52 -p1 +%patch53 -p1 +%patch54 -p1 +%patch55 -p1 +%patch56 -p1 +%patch57 -p1 +%patch58 -p1 +%patch59 -p1 +%patch60 -p1 +%patch61 -p1 +%patch62 -p1 # Upstream qemu patches %patch250 -p1 %patch251 -p1 @@ -801,6 +827,8 @@ %patch468 -p1 %patch469 -p1 %patch470 -p1 +%patch471 -p1 +%patch472 -p1 # Hypervisor and PV driver Patches %patch501 -p1 %patch502 -p1 ++++++ 538c338f-x86-amd_ucode-flip-revision-numbers-in-printk.patch ++++++ # Commit 071a4c70a634f7d4f74cde4086ff3202968538c9 # Date 2014-06-02 10:19:27 +0200 # Author Aravind Gopalakrishnan <aravind.gopalakrishnan@amd.com> # Committer Jan Beulich <jbeulich@suse.com> x86, amd_ucode: flip revision numbers in printk A failure would result in log message like so- (XEN) microcode: CPU0 update from revision 0x6000637 to 0x6000626 failed ^^^^^^^^^^^^^^^^^^^^^^ The above message has the revision numbers inverted. Fix this. Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@amd.com> --- a/xen/arch/x86/microcode_amd.c +++ b/xen/arch/x86/microcode_amd.c @@ -164,7 +164,7 @@ static int apply_microcode(int cpu) if ( rev != hdr->patch_id ) { printk(KERN_ERR "microcode: CPU%d update from revision " - "%#x to %#x failed\n", cpu, hdr->patch_id, rev); + "%#x to %#x failed\n", cpu, rev, hdr->patch_id); return -EIO; } ++++++ 538ee637-ACPI-Prevent-acpi_table_entries-from-falling-into-a-infinite-loop.patch ++++++ # Commit 9c1e8cae657bc13e8b1ddeede17603d77f3ad341 # Date 2014-06-04 11:26:15 +0200 # Author Malcolm Crossley <malcolm.crossley@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> ACPI: Prevent acpi_table_entries from falling into a infinite loop If a buggy BIOS programs an ACPI table with to small an entry length then acpi_table_entries gets stuck in an infinite loop. To aid debugging, report the error and exit the loop. Based on Linux kernel commit 369d913b242cae2205471b11b6e33ac368ed33ec Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com> Use < instead of <= (which I wrongly suggested), return -ENODATA instead of -EINVAL, and make description match code. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/acpi/tables.c +++ b/xen/drivers/acpi/tables.c @@ -233,6 +233,12 @@ acpi_table_parse_entries(char *id, while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) < table_end) { + if (entry->length < sizeof(*entry)) { + printk(KERN_ERR PREFIX "[%4.4s:%#x] Invalid length\n", + id, entry_id); + return -ENODATA; + } + if (entry->type == entry_id && (!max_entries || count++ < max_entries)) if (handler(entry, table_end)) ++++++ 5390917a-VT-d-honor-APEI-firmware-first-mode-in-XSA-59-workaround-code.patch ++++++ # Commit 1cc37ba8dbd89fb86dad3f6c78c3fba06019fe21 # Date 2014-06-05 17:49:14 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> VT-d: honor APEI firmware-first mode in XSA-59 workaround code When firmware-first mode is being indicated by firmware, we shouldn't be modifying AER registers - these are considered to be owned by firmware in that case. Violating this is being reported to result in SMI storms. While circumventing the workaround means re-exposing affected hosts to the XSA-59 issues, this in any event seems better than not booting at all. Respective messages are being issued to the log, so the situation can be diagnosed. The basic building blocks were taken from Linux 3.15-rc. Note that this includes a block of code enclosed in #ifdef CONFIG_X86_MCE - we don't define that symbol, and that code also wouldn't build without suitable machine check side code added; that should happen eventually, but isn't subject of this change. Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> Reported-by: Malcolm Crossley <malcolm.crossley@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> Tested-by: Malcolm Crossley <malcolm.crossley@citrix.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> Acked-by: Yang Zhang <yang.z.zhang@intel.com> --- a/xen/arch/x86/acpi/boot.c +++ b/xen/arch/x86/acpi/boot.c @@ -754,6 +754,8 @@ int __init acpi_boot_init(void) erst_init(); + acpi_hest_init(); + acpi_table_parse(ACPI_SIG_BGRT, acpi_invalidate_bgrt); return 0; --- a/xen/drivers/acpi/apei/Makefile +++ b/xen/drivers/acpi/apei/Makefile @@ -1,3 +1,4 @@ obj-y += erst.o +obj-y += hest.o obj-y += apei-base.o obj-y += apei-io.o --- /dev/null +++ b/xen/drivers/acpi/apei/hest.c @@ -0,0 +1,200 @@ +/* + * APEI Hardware Error Souce Table support + * + * HEST describes error sources in detail; communicates operational + * parameters (i.e. severity levels, masking bits, and threshold + * values) to Linux as necessary. It also allows the BIOS to report + * non-standard error sources to Linux (for example, chipset-specific + * error registers). + * + * For more information about HEST, please refer to ACPI Specification + * version 4.0, section 17.3.2. + * + * Copyright 2009 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <xen/errno.h> +#include <xen/init.h> +#include <xen/kernel.h> +#include <xen/mm.h> +#include <xen/pfn.h> +#include <acpi/acpi.h> +#include <acpi/apei.h> + +#include "apei-internal.h" + +#define HEST_PFX "HEST: " + +static bool_t hest_disable; +boolean_param("hest_disable", hest_disable); + +/* HEST table parsing */ + +static struct acpi_table_hest *__read_mostly hest_tab; + +static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { + [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */ + [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1, + [ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi), + [ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root), + [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), + [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), + [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), +}; + +static int hest_esrc_len(const struct acpi_hest_header *hest_hdr) +{ + u16 hest_type = hest_hdr->type; + int len; + + if (hest_type >= ACPI_HEST_TYPE_RESERVED) + return 0; + + len = hest_esrc_len_tab[hest_type]; + + if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) { + const struct acpi_hest_ia_corrected *cmc = + container_of(hest_hdr, + const struct acpi_hest_ia_corrected, + header); + + len = sizeof(*cmc) + cmc->num_hardware_banks * + sizeof(struct acpi_hest_ia_error_bank); + } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) { + const struct acpi_hest_ia_machine_check *mc = + container_of(hest_hdr, + const struct acpi_hest_ia_machine_check, + header); + + len = sizeof(*mc) + mc->num_hardware_banks * + sizeof(struct acpi_hest_ia_error_bank); + } + BUG_ON(len == -1); + + return len; +}; + +int apei_hest_parse(apei_hest_func_t func, void *data) +{ + struct acpi_hest_header *hest_hdr; + int i, rc, len; + + if (hest_disable || !hest_tab) + return -EINVAL; + + hest_hdr = (struct acpi_hest_header *)(hest_tab + 1); + for (i = 0; i < hest_tab->error_source_count; i++) { + len = hest_esrc_len(hest_hdr); + if (!len) { + printk(XENLOG_WARNING HEST_PFX + "Unknown or unused hardware error source " + "type: %d for hardware error source: %d\n", + hest_hdr->type, hest_hdr->source_id); + return -EINVAL; + } + if ((void *)hest_hdr + len > + (void *)hest_tab + hest_tab->header.length) { + printk(XENLOG_WARNING HEST_PFX + "Table contents overflow for hardware error source: %d\n", + hest_hdr->source_id); + return -EINVAL; + } + + rc = func(hest_hdr, data); + if (rc) + return rc; + + hest_hdr = (void *)hest_hdr + len; + } + + return 0; +} + +/* + * Check if firmware advertises firmware first mode. We need FF bit to be set + * along with a set of MC banks which work in FF mode. + */ +static int __init hest_parse_cmc(const struct acpi_hest_header *hest_hdr, + void *data) +{ +#ifdef CONFIG_X86_MCE + unsigned int i; + const struct acpi_hest_ia_corrected *cmc; + const struct acpi_hest_ia_error_bank *mc_bank; + + if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) + return 0; + + cmc = container_of(hest_hdr, const struct acpi_hest_ia_corrected, header); + if (!cmc->enabled) + return 0; + + /* + * We expect HEST to provide a list of MC banks that report errors + * in firmware first mode. Otherwise, return non-zero value to + * indicate that we are done parsing HEST. + */ + if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks) + return 1; + + printk(XENLOG_INFO HEST_PFX "Enabling Firmware First mode for corrected errors.\n"); + + mc_bank = (const struct acpi_hest_ia_error_bank *)(cmc + 1); + for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++) + mce_disable_bank(mc_bank->bank_number); +#else +# define acpi_disable_cmcff 1 +#endif + + return 1; +} + +void __init acpi_hest_init(void) +{ + acpi_status status; + acpi_physical_address hest_addr; + acpi_native_uint hest_len; + + if (acpi_disabled) + return; + + if (hest_disable) { + printk(XENLOG_INFO HEST_PFX "Table parsing disabled.\n"); + return; + } + + status = acpi_get_table_phys(ACPI_SIG_HEST, 0, &hest_addr, &hest_len); + if (status == AE_NOT_FOUND) + goto err; + if (ACPI_FAILURE(status)) { + printk(XENLOG_ERR HEST_PFX "Failed to get table, %s\n", + acpi_format_exception(status)); + goto err; + } + map_pages_to_xen((unsigned long)__va(hest_addr), PFN_DOWN(hest_addr), + PFN_UP(hest_addr + hest_len) - PFN_DOWN(hest_addr), + PAGE_HYPERVISOR); + hest_tab = __va(hest_addr); + + if (!acpi_disable_cmcff) + apei_hest_parse(hest_parse_cmc, NULL); + + printk(XENLOG_INFO HEST_PFX "Table parsing has been initialized\n"); + return; +err: + hest_disable = 1; +} --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -1066,6 +1066,106 @@ void __init setup_dom0_pci_devices( spin_unlock(&pcidevs_lock); } +#ifdef CONFIG_ACPI +#include <acpi/acpi.h> +#include <acpi/apei.h> + +static int hest_match_pci(const struct acpi_hest_aer_common *p, + const struct pci_dev *pdev) +{ + return ACPI_HEST_SEGMENT(p->bus) == pdev->seg && + ACPI_HEST_BUS(p->bus) == pdev->bus && + p->device == PCI_SLOT(pdev->devfn) && + p->function == PCI_FUNC(pdev->devfn); +} + +static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr, + const struct pci_dev *pdev) +{ + unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus, + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), + PCI_CAP_ID_EXP); + u8 pcie = MASK_EXTR(pci_conf_read16(pdev->seg, pdev->bus, + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), + pos + PCI_EXP_FLAGS), + PCI_EXP_FLAGS_TYPE); + + switch ( hest_hdr->type ) + { + case ACPI_HEST_TYPE_AER_ROOT_PORT: + return pcie == PCI_EXP_TYPE_ROOT_PORT; + case ACPI_HEST_TYPE_AER_ENDPOINT: + return pcie == PCI_EXP_TYPE_ENDPOINT; + case ACPI_HEST_TYPE_AER_BRIDGE: + return pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), PCI_CLASS_DEVICE) == + PCI_CLASS_BRIDGE_PCI; + } + + return 0; +} + +struct aer_hest_parse_info { + const struct pci_dev *pdev; + bool_t firmware_first; +}; + +static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header *hest_hdr) +{ + if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT || + hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT || + hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE ) + return 1; + return 0; +} + +static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void *data) +{ + struct aer_hest_parse_info *info = data; + const struct acpi_hest_aer_common *p; + bool_t ff; + + if ( !hest_source_is_pcie_aer(hest_hdr) ) + return 0; + + p = (const struct acpi_hest_aer_common *)(hest_hdr + 1); + ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); + + /* + * If no specific device is supplied, determine whether + * FIRMWARE_FIRST is set for *any* PCIe device. + */ + if ( !info->pdev ) + { + info->firmware_first |= ff; + return 0; + } + + /* Otherwise, check the specific device */ + if ( p->flags & ACPI_HEST_GLOBAL ? + hest_match_type(hest_hdr, info->pdev) : + hest_match_pci(p, info->pdev) ) + { + info->firmware_first = ff; + return 1; + } + + return 0; +} + +bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev) +{ + struct aer_hest_parse_info info = { .pdev = pdev }; + + return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) && + apei_hest_parse(aer_hest_parse, &info) >= 0 && + info.firmware_first; +} +#endif + static int _dump_pci_devices(struct pci_seg *pseg, void *arg) { struct pci_dev *pdev; --- a/xen/drivers/passthrough/vtd/quirks.c +++ b/xen/drivers/passthrough/vtd/quirks.c @@ -386,9 +386,11 @@ void pci_vtd_quirk(const struct pci_dev int dev = PCI_SLOT(pdev->devfn); int func = PCI_FUNC(pdev->devfn); int pos; - u32 val; + bool_t ff; + u32 val, val2; u64 bar; paddr_t pa; + const char *action; if ( pci_conf_read16(seg, bus, dev, func, PCI_VENDOR_ID) != PCI_VENDOR_ID_INTEL ) @@ -438,7 +440,10 @@ void pci_vtd_quirk(const struct pci_dev pos = pci_find_next_ext_capability(seg, bus, pdev->devfn, pos, PCI_EXT_CAP_ID_VNDR); } + ff = 0; } + else + ff = pcie_aer_get_firmware_first(pdev); if ( !pos ) { printk(XENLOG_WARNING "%04x:%02x:%02x.%u without AER capability?\n", @@ -447,18 +452,26 @@ void pci_vtd_quirk(const struct pci_dev } val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK); - pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK, - val | PCI_ERR_UNC_UNSUP); - val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK); - pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK, - val | PCI_ERR_COR_ADV_NFAT); + val2 = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK); + if ( (val & PCI_ERR_UNC_UNSUP) && (val2 & PCI_ERR_COR_ADV_NFAT) ) + action = "Found masked"; + else if ( !ff ) + { + pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK, + val | PCI_ERR_UNC_UNSUP); + pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK, + val2 | PCI_ERR_COR_ADV_NFAT); + action = "Masked"; + } + else + action = "Must not mask"; /* XPUNCERRMSK Send Completion with Unsupported Request */ val = pci_conf_read32(seg, bus, dev, func, 0x20c); pci_conf_write32(seg, bus, dev, func, 0x20c, val | (1 << 4)); - printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n", - seg, bus, dev, func); + printk(XENLOG_INFO "%s UR signaling on %04x:%02x:%02x.%u\n", + action, seg, bus, dev, func); break; case 0x100: case 0x104: case 0x108: /* Sandybridge */ --- a/xen/include/acpi/actbl1.h +++ b/xen/include/acpi/actbl1.h @@ -445,6 +445,14 @@ struct acpi_hest_aer_common { #define ACPI_HEST_FIRMWARE_FIRST (1) #define ACPI_HEST_GLOBAL (1<<1) +/* + * Macros to access the bus/segment numbers in Bus field above: + * Bus number is encoded in bits 7:0 + * Segment number is encoded in bits 23:8 + */ +#define ACPI_HEST_BUS(bus) ((bus) & 0xFF) +#define ACPI_HEST_SEGMENT(bus) (((bus) >> 8) & 0xFFFF) + /* Hardware Error Notification */ struct acpi_hest_notify { --- a/xen/include/acpi/apei.h +++ b/xen/include/acpi/apei.h @@ -12,6 +12,9 @@ #define FIX_APEI_RANGE_MAX 64 +typedef int (*apei_hest_func_t)(const struct acpi_hest_header *, void *); +int apei_hest_parse(apei_hest_func_t, void *); + int erst_write(const struct cper_record_header *record); size_t erst_get_record_count(void); int erst_get_next_record_id(u64 *record_id); --- a/xen/include/xen/acpi.h +++ b/xen/include/xen/acpi.h @@ -61,6 +61,7 @@ int acpi_boot_init (void); int acpi_boot_table_init (void); int acpi_numa_init (void); int erst_init(void); +void acpi_hest_init(void); int acpi_table_init (void); int acpi_table_parse(char *id, acpi_table_handler handler); --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -144,6 +144,8 @@ int pci_find_next_ext_capability(int seg const char *parse_pci(const char *, unsigned int *seg, unsigned int *bus, unsigned int *dev, unsigned int *func); +bool_t pcie_aer_get_firmware_first(const struct pci_dev *); + struct pirq; int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable); void msixtbl_pt_unregister(struct domain *, struct pirq *); ++++++ 53909259-x86-domctl-two-functional-fixes-to-XEN_DOMCTL_-gs-etvcpuextstate.patch ++++++ # Commit 090ca8c155b7321404ea7713a28aaedb7ac4fffd # Date 2014-06-05 17:52:57 +0200 # Author Andrew Cooper <andrew.cooper3@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86/domctl: two functional fixes to XEN_DOMCTL_[gs]etvcpuextstate Interacting with the vcpu itself should be protected by vcpu_pause(). Buggy/naive toolstacks might encounter adverse interaction with a vcpu context switch, or increase of xcr0_accum. There are no much problems with current in-tree code. Explicitly permit a NULL guest handle as being a request for size. It is the prevailing Xen style, and without it, valgrind's ioctl handler is unable to determine whether evc->buffer actually got written to. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> # Commit 895661ae98f0249f50280b4acfb9dda70b76d7e9 # Date 2014-06-10 12:03:16 +0200 # Author Andrew Cooper <andrew.cooper3@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86/domctl: further fix to XEN_DOMCTL_[gs]etvcpuextstate Do not clobber errors from certain codepaths. Clobbering of -EINVAL from failing "evc->size <= PV_XSAVE_SIZE(_xcr0_accum)" was a pre-existing bug. However, clobbering -EINVAL/-EFAULT from the get codepath was a bug unintentionally introduced by 090ca8c1 "x86/domctl: two functional fixes to XEN_DOMCTL_[gs]etvcpuextstate". Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -1089,45 +1089,48 @@ long arch_do_domctl( ((v = d->vcpu[evc->vcpu]) == NULL) ) goto vcpuextstate_out; + ret = -EINVAL; + if ( v == current ) /* no vcpu_pause() */ + goto vcpuextstate_out; + if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate ) { - unsigned int size = PV_XSAVE_SIZE(v->arch.xcr0_accum); + unsigned int size; + + ret = 0; + vcpu_pause(v); - if ( !evc->size && !evc->xfeature_mask ) + size = PV_XSAVE_SIZE(v->arch.xcr0_accum); + if ( (!evc->size && !evc->xfeature_mask) || + guest_handle_is_null(evc->buffer) ) { evc->xfeature_mask = xfeature_mask; evc->size = size; - ret = 0; + vcpu_unpause(v); goto vcpuextstate_out; } + if ( evc->size != size || evc->xfeature_mask != xfeature_mask ) - { ret = -EINVAL; - goto vcpuextstate_out; - } - if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer, - offset, (void *)&v->arch.xcr0, - sizeof(v->arch.xcr0)) ) - { + + if ( !ret && copy_to_guest_offset(evc->buffer, offset, + (void *)&v->arch.xcr0, + sizeof(v->arch.xcr0)) ) ret = -EFAULT; - goto vcpuextstate_out; - } + offset += sizeof(v->arch.xcr0); - if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer, - offset, (void *)&v->arch.xcr0_accum, - sizeof(v->arch.xcr0_accum)) ) - { + if ( !ret && copy_to_guest_offset(evc->buffer, offset, + (void *)&v->arch.xcr0_accum, + sizeof(v->arch.xcr0_accum)) ) ret = -EFAULT; - goto vcpuextstate_out; - } + offset += sizeof(v->arch.xcr0_accum); - if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer, - offset, (void *)v->arch.xsave_area, - size - 2 * sizeof(uint64_t)) ) - { + if ( !ret && copy_to_guest_offset(evc->buffer, offset, + (void *)v->arch.xsave_area, + size - 2 * sizeof(uint64_t)) ) ret = -EFAULT; - goto vcpuextstate_out; - } + + vcpu_unpause(v); } else { @@ -1176,12 +1179,14 @@ long arch_do_domctl( if ( evc->size <= PV_XSAVE_SIZE(_xcr0_accum) ) { + vcpu_pause(v); v->arch.xcr0 = _xcr0; v->arch.xcr0_accum = _xcr0_accum; if ( _xcr0_accum & XSTATE_NONLAZY ) v->arch.nonlazy_xstate_used = 1; memcpy(v->arch.xsave_area, _xsave_area, evc->size - 2 * sizeof(uint64_t)); + vcpu_unpause(v); } else ret = -EINVAL; @@ -1189,8 +1194,6 @@ long arch_do_domctl( xfree(receive_buf); } - ret = 0; - vcpuextstate_out: if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate ) copyback = 1; ++++++ 5390927f-x86-fix-reboot-shutdown-with-running-HVM-guests.patch ++++++ # Commit 39ede234d1fd683430ffb1784d6d35b096f16457 # Date 2014-06-05 17:53:35 +0200 # Author Roger Pau Monné <roger.pau@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86: fix reboot/shutdown with running HVM guests If there's a guest using VMX/SVM when the hypervisor shuts down, it can lead to the following crash due to VMX/SVM functions being called after hvm_cpu_down has been called. In order to prevent that, check in {svm/vmx}_ctxt_switch_from that the cpu virtualization extensions are still enabled. (XEN) Domain 0 shutdown: rebooting machine. (XEN) Assertion 'read_cr0() & X86_CR0_TS' failed at vmx.c:644 (XEN) ----[ Xen-4.5-unstable x86_64 debug=y Tainted: C ]---- (XEN) CPU: 0 (XEN) RIP: e008:[<ffff82d0801d90ce>] vmx_ctxt_switch_from+0x1e/0x14c ... (XEN) Xen call trace: (XEN) [<ffff82d0801d90ce>] vmx_ctxt_switch_from+0x1e/0x14c (XEN) [<ffff82d08015d129>] __context_switch+0x127/0x462 (XEN) [<ffff82d080160acf>] __sync_local_execstate+0x6a/0x8b (XEN) [<ffff82d080160af9>] sync_local_execstate+0x9/0xb (XEN) [<ffff82d080161728>] map_domain_page+0x88/0x4de (XEN) [<ffff82d08014e721>] map_vtd_domain_page+0xd/0xf (XEN) [<ffff82d08014cda2>] io_apic_read_remap_rte+0x158/0x29f (XEN) [<ffff82d0801448a8>] iommu_read_apic_from_ire+0x27/0x29 (XEN) [<ffff82d080165625>] io_apic_read+0x17/0x65 (XEN) [<ffff82d080166143>] __ioapic_read_entry+0x38/0x61 (XEN) [<ffff82d080166aa8>] clear_IO_APIC_pin+0x1a/0xf3 (XEN) [<ffff82d080166bae>] clear_IO_APIC+0x2d/0x60 (XEN) [<ffff82d080166f63>] disable_IO_APIC+0xd/0x81 (XEN) [<ffff82d08018228b>] smp_send_stop+0x58/0x68 (XEN) [<ffff82d080181aa7>] machine_restart+0x80/0x20a (XEN) [<ffff82d080181c3c>] __machine_restart+0xb/0xf (XEN) [<ffff82d080128fb9>] smp_call_function_interrupt+0x99/0xc0 (XEN) [<ffff82d080182330>] call_function_interrupt+0x33/0x43 (XEN) [<ffff82d08016bd89>] do_IRQ+0x9e/0x63a (XEN) [<ffff82d08016406f>] common_interrupt+0x5f/0x70 (XEN) [<ffff82d0801a8600>] mwait_idle+0x29c/0x2f7 (XEN) [<ffff82d08015cf67>] idle_loop+0x58/0x76 (XEN) (XEN) (XEN) **************************************** (XEN) Panic on CPU 0: (XEN) Assertion 'read_cr0() & X86_CR0_TS' failed at vmx.c:644 (XEN) **************************************** Suggested-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -861,6 +861,14 @@ static void svm_ctxt_switch_from(struct { int cpu = smp_processor_id(); + /* + * Return early if trying to do a context switch without SVM enabled, + * this can happen when the hypervisor shuts down with HVM guests + * still running. + */ + if ( unlikely((read_efer() & EFER_SVME) == 0) ) + return; + svm_fpu_leave(v); svm_save_dr(v); --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -74,7 +74,7 @@ u64 vmx_ept_vpid_cap __read_mostly; static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, vmxon_region); static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs); static DEFINE_PER_CPU(struct list_head, active_vmcs_list); -static DEFINE_PER_CPU(bool_t, vmxon); +DEFINE_PER_CPU(bool_t, vmxon); static u32 vmcs_revision_id __read_mostly; u64 __read_mostly vmx_basic_msr; --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -617,6 +617,14 @@ static void vmx_fpu_leave(struct vcpu *v static void vmx_ctxt_switch_from(struct vcpu *v) { + /* + * Return early if trying to do a context switch without VMX enabled, + * this can happen when the hypervisor shuts down with HVM guests + * still running. + */ + if ( unlikely(!this_cpu(vmxon)) ) + return; + vmx_fpu_leave(v); vmx_save_guest_msrs(v); vmx_restore_host_msrs(); --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -483,6 +483,8 @@ void virtual_vmcs_exit(void *vvmcs); u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding); void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val); +DECLARE_PER_CPU(bool_t, vmxon); + #endif /* ASM_X86_HVM_VMX_VMCS_H__ */ /* ++++++ 5396d818-avoid-crash-on-HVM-domain-destroy-with-PCI-passthrough.patch ++++++ # Commit b9ae60907e6dbc686403e52a7e61a6f856401a1b # Date 2014-06-10 12:04:08 +0200 # Author Juergen Gross <jgross@suse.com> # Committer Jan Beulich <jbeulich@suse.com> avoid crash on HVM domain destroy with PCI passthrough c/s bac6334b5 "move domain to cpupool0 before destroying it" introduced a problem when destroying a HVM domain with PCI passthrough enabled. The moving of the domain to cpupool0 includes moving the pirqs to the cpupool0 cpus, but the event channel infrastructure already is unusable for the domain. So just avoid moving pirqs for dying domains. Signed-off-by: Juergen Gross <jgross@suse.com> --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -297,7 +297,8 @@ int sched_move_domain(struct domain *d, spin_unlock_irq(lock); v->sched_priv = vcpu_priv[v->vcpu_id]; - evtchn_move_pirqs(v); + if ( !d->is_dying ) + evtchn_move_pirqs(v); new_p = cpumask_cycle(new_p, c->cpu_valid); ++++++ 5396e805-x86-HVM-refine-SMEP-test-in-HVM_CR4_GUEST_RESERVED_BITS.patch ++++++ # Commit 584287380baf81e5acdd9dc7dfc7ffccd1e9a856 # Date 2014-06-10 13:12:05 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/HVM: refine SMEP test in HVM_CR4_GUEST_RESERVED_BITS() Andrew validly points out that the use of the macro on the restore path can't rely on the CPUID bits for the guest already being in place (as their setting by the tool stack in turn requires the other restore operations already having taken place). And even worse, using hvm_cpuid() is invalid here because that function assumes to be used in the context of the vCPU in question. Reverting to the behavior prior to the change from checking cpu_has_sm?p to hvm_vcpu_has_sm?p() would break the other (non-restore) use of the macro. So let's revert to the prior behavior only for the restore path, by adding a respective second parameter to the macro. Obviously the two cpu_has_* uses in the macro should really also be converted to hvm_cpuid() based checks at least for the non-restore path. Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> Tested-by: David Vrabel <david.vrabel@citrix.com> --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -858,7 +858,7 @@ static int hvm_load_cpu_ctxt(struct doma return -EINVAL; } - if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS(v) ) + if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS(v, 1) ) { printk(XENLOG_G_ERR "HVM%d restore: bad CR4 %#" PRIx64 "\n", d->domain_id, ctxt.cr4); @@ -1977,7 +1977,7 @@ int hvm_set_cr4(unsigned long value) struct vcpu *v = current; unsigned long old_cr; - if ( value & HVM_CR4_GUEST_RESERVED_BITS(v) ) + if ( value & HVM_CR4_GUEST_RESERVED_BITS(v, 0) ) { HVM_DBG_LOG(DBG_LEVEL_1, "Guest attempts to set reserved bit in CR4: %lx", --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -373,18 +373,24 @@ static inline bool_t hvm_vcpu_has_smep(v (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE)) /* These bits in CR4 cannot be set by the guest. */ -#define HVM_CR4_GUEST_RESERVED_BITS(_v) \ +#define HVM_CR4_GUEST_RESERVED_BITS(v, restore) ({ \ + const struct vcpu *_v = (v); \ + bool_t _restore = !!(restore); \ + ASSERT((_restore) || _v == current); \ (~((unsigned long) \ (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \ X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT | \ - (hvm_vcpu_has_smep() ? X86_CR4_SMEP : 0) | \ + (((_restore) ? cpu_has_smep : \ + hvm_vcpu_has_smep()) ? \ + X86_CR4_SMEP : 0) | \ (cpu_has_fsgsbase ? X86_CR4_FSGSBASE : 0) | \ - ((nestedhvm_enabled((_v)->domain) && cpu_has_vmx)\ - ? X86_CR4_VMXE : 0) | \ - (cpu_has_pcid ? X86_CR4_PCIDE : 0) | \ - (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)))) + ((nestedhvm_enabled(_v->domain) && cpu_has_vmx) \ + ? X86_CR4_VMXE : 0) | \ + (cpu_has_pcid ? X86_CR4_PCIDE : 0) | \ + (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)))); \ +}) /* These exceptions must always be intercepted. */ #define HVM_TRAP_MASK ((1U << TRAP_machine_check) | (1U << TRAP_invalid_op)) ++++++ 539ebe62-x86-EFI-improve-boot-time-diagnostics.patch ++++++ # Commit ebbb51dc8c1790e5187442a808003298b6796762 # Date 2014-06-16 11:52:34 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/EFI: improve boot time diagnostics To aid analysis of eventual errors, print EFI status codes with error messages where available. Also remove a case where the status gets stored into a local variable without being used examined (which mis- guided me to add an error check there in try 1 of this patch). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/efi/boot.c +++ b/xen/arch/x86/efi/boot.c @@ -344,11 +344,12 @@ static EFI_FILE_HANDLE __init get_parent ret = efi_bs->HandleProtocol(loaded_image->DeviceHandle, &fs_protocol, (void **)&fio); if ( EFI_ERROR(ret) ) - blexit(L"Couldn't obtain the File System Protocol Interface"); + PrintErrMesg(L"Couldn't obtain the File System Protocol Interface", + ret); ret = fio->OpenVolume(fio, &dir_handle); } while ( ret == EFI_MEDIA_CHANGED ); if ( ret != EFI_SUCCESS ) - blexit(L"OpenVolume failure"); + PrintErrMesg(L"OpenVolume failure", ret); #define buffer ((CHAR16 *)keyhandler_scratch) #define BUFFERSIZE sizeof(keyhandler_scratch) @@ -967,8 +968,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY if ( !EFI_ERROR(efi_bs->LocateProtocol(&shim_lock_guid, NULL, (void **)&shim_lock)) && - shim_lock->Verify(kernel.ptr, kernel.size) != EFI_SUCCESS ) - blexit(L"Dom0 kernel image could not be verified."); + (status = shim_lock->Verify(kernel.ptr, kernel.size)) != EFI_SUCCESS ) + PrintErrMesg(L"Dom0 kernel image could not be verified", status); name.s = get_value(&cfg, section.s, "ramdisk"); if ( name.s ) @@ -1379,8 +1380,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY } } - status = efi_bs->GetMemoryMap(&efi_memmap_size, NULL, &map_key, - &efi_mdesc_size, &mdesc_ver); + efi_bs->GetMemoryMap(&efi_memmap_size, NULL, &map_key, + &efi_mdesc_size, &mdesc_ver); mbi.mem_upper -= efi_memmap_size; mbi.mem_upper &= -__alignof__(EFI_MEMORY_DESCRIPTOR); if ( mbi.mem_upper < xen_phys_start ) @@ -1389,7 +1390,7 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY status = efi_bs->GetMemoryMap(&efi_memmap_size, efi_memmap, &map_key, &efi_mdesc_size, &mdesc_ver); if ( EFI_ERROR(status) ) - blexit(L"Cannot obtain memory map"); + PrintErrMesg(L"Cannot obtain memory map", status); /* Populate E820 table and check trampoline area availability. */ e = e820map - 1; ++++++ 539ec004-x86-mce-don-t-spam-the-console-with-CPUx-Temperature-z.patch ++++++ # Commit 323338f86fb6cd6f6dba4f59a84eed71b3552d21 # Date 2014-06-16 11:59:32 +0200 # Author Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> # Committer Jan Beulich <jbeulich@suse.com> x86/mce: don't spam the console with "CPUx: Temperature z" If the machine has been quite busy it ends up with these messages printed on the hypervisor console: (XEN) CPU3: Temperature/speed normal (XEN) CPU1: Temperature/speed normal (XEN) CPU0: Temperature/speed normal (XEN) CPU1: Temperature/speed normal (XEN) CPU0: Temperature/speed normal (XEN) CPU2: Temperature/speed normal (XEN) CPU3: Temperature/speed normal (XEN) CPU0: Temperature/speed normal (XEN) CPU2: Temperature/speed normal (XEN) CPU3: Temperature/speed normal (XEN) CPU1: Temperature/speed normal (XEN) CPU0: Temperature above threshold (XEN) CPU0: Running in modulated clock mode (XEN) CPU1: Temperature/speed normal (XEN) CPU2: Temperature/speed normal (XEN) CPU3: Temperature/speed normal While the state changes are important, the non-altered state information is not needed. As such add a latch mechanism to only print the information if it has changed since the last update (and the hardware doesn't properly suppress redundant notifications). This was observed on Intel DQ67SW, BIOS SWQ6710H.86A.0066.2012.1105.1504 11/05/2012 Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> Acked-by: Christoph Egger <chegger@amazon.de> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c @@ -49,11 +49,15 @@ static int __read_mostly nr_intel_ext_ms #define INTEL_SRAR_INSTR_FETCH 0x150 #ifdef CONFIG_X86_MCE_THERMAL +#define MCE_RING 0x1 +static DEFINE_PER_CPU(int, last_state); + static void intel_thermal_interrupt(struct cpu_user_regs *regs) { uint64_t msr_content; unsigned int cpu = smp_processor_id(); static DEFINE_PER_CPU(s_time_t, next); + int *this_last_state; ack_APIC_irq(); @@ -62,13 +66,17 @@ static void intel_thermal_interrupt(stru per_cpu(next, cpu) = NOW() + MILLISECS(5000); rdmsrl(MSR_IA32_THERM_STATUS, msr_content); - if (msr_content & 0x1) { - printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); - printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", - cpu); + this_last_state = &per_cpu(last_state, cpu); + if ( *this_last_state == (msr_content & MCE_RING) ) + return; + *this_last_state = msr_content & MCE_RING; + if ( msr_content & MCE_RING ) + { + printk(KERN_EMERG "CPU%u: Temperature above threshold\n", cpu); + printk(KERN_EMERG "CPU%u: Running in modulated clock mode\n", cpu); add_taint(TAINT_MACHINE_CHECK); } else { - printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); + printk(KERN_INFO "CPU%u: Temperature/speed normal\n", cpu); } } @@ -802,6 +810,7 @@ static int cpu_mcabank_alloc(unsigned in per_cpu(no_cmci_banks, cpu) = cmci; per_cpu(mce_banks_owned, cpu) = owned; + per_cpu(last_state, cpu) = -1; return 0; out: ++++++ 53a040c6-page-alloc-scrub-pages-used-by-hypervisor-upon-freeing.patch ++++++ References: bnc#880751 CVE-2014-4021 XSA-100 # Commit 4bd78937ec324bcef4e29ef951e0ff9815770de1 # Date 2014-06-17 15:21:10 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> page-alloc: scrub pages used by hypervisor upon freeing ... unless they're part of a fully separate pool (and hence can't ever be used for guest allocations). This is CVE-2014-4021 / XSA-100. Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Ian Campbell <ian.campbell@citrix.com> Acked-by: Keir Fraser <keir@xen.org> --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -1409,7 +1409,10 @@ void free_xenheap_pages(void *v, unsigne pg = virt_to_page(v); for ( i = 0; i < (1u << order); i++ ) + { + scrub_one_page(&pg[i]); pg[i].count_info &= ~PGC_xen_heap; + } free_heap_pages(pg, order); } @@ -1579,6 +1582,8 @@ void free_domheap_pages(struct page_info else { /* Freeing anonymous domain-heap pages. */ + for ( i = 0; i < (1 << order); i++ ) + scrub_one_page(&pg[i]); free_heap_pages(pg, order); drop_dom_ref = 0; } ++++++ 53a1990a-IOMMU-prevent-VT-d-device-IOTLB-operations-on-wrong-IOMMU.patch ++++++ # Commit 84c340ba4c3eb99278b6ba885616bb183b88ad67 # Date 2014-06-18 15:50:02 +0200 # Author Malcolm Crossley <malcolm.crossley@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> IOMMU: prevent VT-d device IOTLB operations on wrong IOMMU PCIe ATS allows for devices to contain IOTLBs, the VT-d code was iterating around all ATS capable devices and issuing IOTLB operations for all IOMMUs, even though each ATS device is only accessible via one particular IOMMU. Issuing an IOMMU operation to a device not accessible via that IOMMU results in an IOMMU timeout because the device does not reply. VT-d IOMMU timeouts result in a Xen panic. Therefore this bug prevents any Intel system with 2 or more ATS enabled IOMMUs, each with an ATS device connected to them, from booting Xen. The patch adds a IOMMU pointer to the ATS device struct so the VT-d code can ensure it does not issue IOMMU ATS operations on the wrong IOMMU. A void pointer has to be used because AMD and Intel IOMMU implementations do not have a common IOMMU structure or indexing mechanism. Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> Acked-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -163,7 +163,7 @@ static void amd_iommu_setup_domain_devic !pci_ats_enabled(iommu->seg, bus, pdev->devfn) ) { if ( devfn == pdev->devfn ) - enable_ats_device(iommu->seg, bus, devfn); + enable_ats_device(iommu->seg, bus, devfn, iommu); amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0); } --- a/xen/drivers/passthrough/ats.h +++ b/xen/drivers/passthrough/ats.h @@ -24,6 +24,7 @@ struct pci_ats_dev { u8 bus; u8 devfn; u16 ats_queue_depth; /* ATS device invalidation queue depth */ + const void *iommu; /* No common IOMMU struct so use void pointer */ }; #define ATS_REG_CAP 4 @@ -34,7 +35,7 @@ struct pci_ats_dev { extern struct list_head ats_devices; extern bool_t ats_enabled; -int enable_ats_device(int seg, int bus, int devfn); +int enable_ats_device(int seg, int bus, int devfn, const void *iommu); void disable_ats_device(int seg, int bus, int devfn); struct pci_ats_dev *get_ats_device(int seg, int bus, int devfn); --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -1442,7 +1442,7 @@ static int domain_context_mapping( ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, pdev); if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) - enable_ats_device(seg, bus, devfn); + enable_ats_device(seg, bus, devfn, drhd->iommu); break; @@ -1930,7 +1930,7 @@ static int intel_iommu_enable_device(str if ( ret <= 0 ) return ret; - ret = enable_ats_device(pdev->seg, pdev->bus, pdev->devfn); + ret = enable_ats_device(pdev->seg, pdev->bus, pdev->devfn, drhd->iommu); return ret >= 0 ? 0 : ret; } --- a/xen/drivers/passthrough/vtd/x86/ats.c +++ b/xen/drivers/passthrough/vtd/x86/ats.c @@ -120,6 +120,10 @@ int dev_invalidate_iotlb(struct iommu *i { sid = (pdev->bus << 8) | pdev->devfn; + /* Only invalidate devices that belong to this IOMMU */ + if ( pdev->iommu != iommu ) + continue; + switch ( type ) { case DMA_TLB_DSI_FLUSH: if ( !device_in_domain(iommu, pdev, did) ) --- a/xen/drivers/passthrough/x86/ats.c +++ b/xen/drivers/passthrough/x86/ats.c @@ -23,7 +23,7 @@ LIST_HEAD(ats_devices); bool_t __read_mostly ats_enabled = 1; boolean_param("ats", ats_enabled); -int enable_ats_device(int seg, int bus, int devfn) +int enable_ats_device(int seg, int bus, int devfn, const void *iommu) { struct pci_ats_dev *pdev = NULL; u32 value; @@ -66,6 +66,7 @@ int enable_ats_device(int seg, int bus, pdev->seg = seg; pdev->bus = bus; pdev->devfn = devfn; + pdev->iommu = iommu; value = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos + ATS_REG_CAP); pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK ?: ++++++ 53a199d7-x86-EFI-allow-FPU-XMM-use-in-runtime-service-functions.patch ++++++ References: bnc#882127 # Commit e0fe297dabc96d8161d568f19a99722c4739b9f9 # Date 2014-06-18 15:53:27 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/EFI: allow FPU/XMM use in runtime service functions UEFI spec update 2.4B developed a requirement to enter runtime service functions with CR0.TS (and CR0.EM) clear, thus making feasible the already previously stated permission for these functions to use some of the XMM registers. Enforce this requirement (along with the connected ones on FPU control word and MXCSR) by going through a full FPU save cycle (if the FPU was dirty) in efi_rs_enter() (along with loading the specified values into the other two registers). Note that the UEFI spec mandates that extension registers other than XMM ones (for our purposes all that get restored eagerly) are preserved across runtime function calls, hence there's nothing we need to restore in efi_rs_leave() (they do get saved, but just for simplicity's sake). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/efi/runtime.c +++ b/xen/arch/x86/efi/runtime.c @@ -10,6 +10,8 @@ DEFINE_XEN_GUEST_HANDLE(CHAR16); #ifndef COMPAT +# include <asm/i387.h> +# include <asm/xstate.h> # include <public/platform.h> const bool_t efi_enabled = 1; @@ -45,8 +47,14 @@ const struct efi_pci_rom *__read_mostly unsigned long efi_rs_enter(void) { + static const u16 fcw = FCW_DEFAULT; + static const u32 mxcsr = MXCSR_DEFAULT; unsigned long cr3 = read_cr3(); + save_fpu_enable(); + asm volatile ( "fldcw %0" :: "m" (fcw) ); + asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) ); + spin_lock(&efi_rs_lock); /* prevent fixup_page_fault() from doing anything */ @@ -82,6 +90,7 @@ void efi_rs_leave(unsigned long cr3) } irq_exit(); spin_unlock(&efi_rs_lock); + stts(); } unsigned long efi_get_time(void) --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -266,10 +266,10 @@ void vcpu_restore_fpu_lazy(struct vcpu * * On each context switch, save the necessary FPU info of VCPU being switch * out. It dispatches saving operation based on CPU's capability. */ -void vcpu_save_fpu(struct vcpu *v) +static bool_t _vcpu_save_fpu(struct vcpu *v) { if ( !v->fpu_dirtied && !v->arch.nonlazy_xstate_used ) - return; + return 0; ASSERT(!is_idle_vcpu(v)); @@ -284,9 +284,22 @@ void vcpu_save_fpu(struct vcpu *v) fpu_fsave(v); v->fpu_dirtied = 0; + + return 1; +} + +void vcpu_save_fpu(struct vcpu *v) +{ + _vcpu_save_fpu(v); stts(); } +void save_fpu_enable(void) +{ + if ( !_vcpu_save_fpu(current) ) + clts(); +} + /* Initialize FPU's context save area */ int vcpu_init_fpu(struct vcpu *v) { --- a/xen/include/asm-x86/i387.h +++ b/xen/include/asm-x86/i387.h @@ -38,6 +38,7 @@ struct ix87_state { void vcpu_restore_fpu_eager(struct vcpu *v); void vcpu_restore_fpu_lazy(struct vcpu *v); void vcpu_save_fpu(struct vcpu *v); +void save_fpu_enable(void); int vcpu_init_fpu(struct vcpu *v); void vcpu_destroy_fpu(struct vcpu *v); ++++++ README.SuSE ++++++ --- /var/tmp/diff_new_pack.6kvNQC/_old 2014-07-08 16:59:09.000000000 +0200 +++ /var/tmp/diff_new_pack.6kvNQC/_new 2014-07-08 16:59:09.000000000 +0200 @@ -194,9 +194,9 @@ Once you have the VM configured, click "OK". The wizard will now create a configuration file for the VM, and create a disk image. The disk image will -exist in /var/lib/xen/images, and a corresponding config file will exist in -/etc/xen/vm. The operating system's installation program will then run within -the VM. +exist in /var/lib/xen/images, and a corresponding configuration file will exist +in /etc/xen/vm. The operating system's installation program will then run +within the VM. When the VM shuts down (because the installation -- or at least the first stage of it -- is done), the wizard finalizes the VM's configuration and @@ -265,7 +265,7 @@ intended to remain backwards compatible with existing xm domain configuration files. Most 'xm' commands can simply be replaced with 'xl'. One significant difference is that xl does not support the concept of Managed Domains. The xl -command can only modifiy running VMs. Once the VM is shutdown, there is no +command can only modify running VMs. Once the VM is shutdown, there is no preserved state information other than what is saved in the configuration file used to start the VM. In order to provide Managed Domains, users are encouraged to use libvirt and it's tools to create and modify VMs. These @@ -386,6 +386,25 @@ on wireless; you can't bridge Xen "ethernet" packets into 802.11 packets. +Network Troubleshooting +----------------------- +First ensure the VM server is configured correctly and can access the network. + +Do not use ifplugd or NetworkManager, neither are bridge aware. + +Specify a static virtual MAC in the VM's configuration file. Random MACs can +be problematic, since with each boot of the VM it appears that some hardware +has been removed (the previous random MAC) and new hardware is present (the +new random MAC). This can cause network configuration files (which were +intended for the old MAC) to not be matched up with the new virtual hardware. + +In the VM's filesystem, ensure the ifcfg-eth* files are named appropriately. +For example, if you do decide to use a randomly-selected MAC for the VM, the +ifcfg-eth* file must not include the MAC in its name; name it generically +("ifcfg-eth0") instead. If you use a static virtual MAC for the VM, be sure +that is reflected in the file's name. + + Thread-Local Storage -------------------- For some time now, the glibc thread library (NPTL) has used a shortcut to @@ -446,23 +465,57 @@ into /etc/modprobe.conf.local in domain 0. -Network Troubleshooting ------------------------ -First ensure the VM server is configured correctly and can access the network. +Upgrading the Host Operating System +----------------------------------- +When upgrading the host operating system from one major release to another +(for example, SLES 11 to SLES 12 or openSUSE 12.3 to openSUSE 13.1) or when +applying a service pack like SLES 11 SP3 to SLES 11 SP2 all running VMs must +be shut down before the upgrade process is begun. -Do not use ifplugd or NetworkManager, neither are bridge aware. +On versions of SLES 11 and openSUSE 12 you are using the xm/xend toolstack. +After upgrading to SLES 12 and newer openSUSE versions this toolstack will be +replaced with the xl toolstack. The xl toolstack does not support Managed +Domains. If you wish to continue using Managed Domains you must switch to +using libvirt and its command line interface 'virsh'. You may also use +virt-manager as a GUI interface to libvirt. After upgrading the host but +before you can begin using libvirt on VMs that were previously managed by +xm/xend, you must run a conversion tool called /usr/sbin/xen2libvirt for all +VMs. -Specify a static virtual MAC in the VM's configuration file. Random MACs can -be problematic, since with each boot of the VM it appears that some hardware -has been removed (the previous random MAC) and new hardware is present (the -new random MAC). This can cause network configuration files (which were -intended for the old MAC) to not be matched up with the new virtual hardware. +For example, to convert all managed xend domains: + xen2libvirt -r /var/lib/xend/domains/ + +Now typing 'virsh list --all' will show your previously xend managed domains +being managed by libvirt. Run 'xen2libvirt -h' to see additional options for +using this tool. + + +Memory Ballooning in VMs +------------------------ +Setting a VMs maximum memory value greater than the initial memory value +requires support for memory ballooning in the VMs operating system. Modern SLES +and openSUSE guests have this capability built-in. Windows installation media +does not support memory ballooning so you must first install the VM without +memory ballooning (maxmem equal to initial memory). After the installation, the +Virtual Machine Driver Pack (vmdp) must be installed. After this, the VMs +maxmem value may be increased. A reboot of the VM is required for this action +to take effect. -In the VM's filesystem, ensure the ifcfg-eth* files are named appropriately. -For example, if you do decide to use a randomly-selected MAC for the VM, the -ifcfg-eth* file must not include the MAC in its name; name it generically -("ifcfg-eth0") instead. If you use a static virtual MAC for the VM, be sure -that is reflected in the file's name. + +Dom0 Memory Ballooning +---------------------- +It is recommended that you dedicate a fixed amount of RAM to dom0 rather than +relying on dom0 ballooning. The amount of RAM dedicated to dom0 should never +be less that the recommended minimum amount for running your SUSE distribution +in native mode. The following example shows the xen.gz syntax for doing this. +This would be added to your grub1 or grub2 configuration; + +dom0_mem=1024M,max:1024M + +After modifying your grub configuration, you will need to edit /etc/xen/xl.conf +and set autoballoon=0. This will prevent xl from automatically adjusting the +amount of memory assigned to dom0. Reboot the host for these changes to take +effect. Troubleshooting @@ -509,36 +562,6 @@ http://forge.novell.com/modules/xfmod/project/?xenpreview -Upgrading the Host Operating System ------------------------------------ -When upgrading the host operating system from one major release to another -(for example, SLES 11 to SLES 12 or openSUSE 12.3 to openSUSE 13.1) or when -applying a service pack like SLES 11 SP3 to SLES 11 SP2 all running VMs must -be shut down before the upgrade process is begun. - -On versions of SLES 11 and openSUSE 12 you are using the xm/xend toolstack. -After upgrading to SLES 12 and newer openSUSE versions this toolstack will be -replaced with the xl toolstack. The xl toolstack does not support Managed -Domains. If you wish to continue using Managed Domains you must switch to -using libvirt and its command line interface 'virsh'. You may also use -virt-manager as a GUI interface to libvirt. After upgrading the host but -before you can begin using libvirt on VMs that were previously managed by -xm/xend, you must run a conversion tool called /usr/sbin/xen2libvirt for all -VMs. Run 'xen2libvirt -h' to see the syntax for using this tool. - - -Memory Ballooning in VMs ------------------------- -Setting a VMs maximum memory value greater than the initial memory value -requires support for memory ballooning in the VMs operating system. Modern SLES -and openSUSE guests have this capabilitity built-in. Windows installation media -does not support memory ballooning so you must first install the VM without -memory ballooning (maxmem equal to initial memory). After the installation, the -Virtual Machine Driver Pack (vmdp) must be installed. After this, the VMs -maxmem value may be increased. A reboot of the VM is required for this action -to take effect. - - Resources --------- https://www.suse.com/documentation/sles11/singlehtml/book_xen/book_xen.html ++++++ qemu-support-xen-hvm-direct-kernel-boot.patch ++++++
From dd708897cc5b21bc374cd44b6d58c1e74b04bd6e Mon Sep 17 00:00:00 2001 From: Chunyan Liu <cyliu@suse.com> Date: Wed, 28 May 2014 14:31:35 +0800 Subject: [PATCH 2/2] qemu: support xen hvm direct kernel boot
qemu side patch to support xen HVM direct kernel boot: if -kernel exists, calls xen_load_linux(), which will read kernel/initrd and add a linuxboot.bin or multiboot.bin option rom. The linuxboot.bin/multiboot.bin will load kernel/initrd and jump to execute kernel directly. It's working when xen uses seabios. Signed-off-by: Chunyan Liu <cyliu@suse.com> Index: xen-4.4.0-testing/tools/qemu-xen-dir-remote/hw/i386/pc.c =================================================================== --- xen-4.4.0-testing.orig/tools/qemu-xen-dir-remote/hw/i386/pc.c +++ xen-4.4.0-testing/tools/qemu-xen-dir-remote/hw/i386/pc.c @@ -1105,6 +1105,35 @@ void pc_acpi_init(const char *default_ds } } +FWCfgState *xen_load_linux(const char *kernel_filename, + const char *kernel_cmdline, + const char *initrd_filename, + ram_addr_t below_4g_mem_size, + PcGuestInfo *guest_info) +{ + int i; + FWCfgState *fw_cfg; + + assert(kernel_filename != NULL); + + fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0); + rom_set_fw(fw_cfg); + + load_linux(fw_cfg, kernel_filename, initrd_filename, kernel_cmdline, below_4g_mem_size); + for (i = 0; i < nb_option_roms; i++) { + /* For xen, we only want to add the linuxboot.bin/multiboot.bin option rom. + * But in option_rom, there is still kvmvapic.bin. We don't want to add it. + */ + if (strcmp(option_rom[i].name, "linuxboot.bin") && + strcmp(option_rom[i].name, "multiboot.bin")) { + continue; + } + rom_add_option(option_rom[i].name, option_rom[i].bootindex); + } + guest_info->fw_cfg = fw_cfg; + return fw_cfg; +} + FWCfgState *pc_memory_init(MemoryRegion *system_memory, const char *kernel_filename, const char *kernel_cmdline, Index: xen-4.4.0-testing/tools/qemu-xen-dir-remote/hw/i386/pc_piix.c =================================================================== --- xen-4.4.0-testing.orig/tools/qemu-xen-dir-remote/hw/i386/pc_piix.c +++ xen-4.4.0-testing/tools/qemu-xen-dir-remote/hw/i386/pc_piix.c @@ -135,6 +135,13 @@ static void pc_init1(MemoryRegion *syste kernel_filename, kernel_cmdline, initrd_filename, below_4g_mem_size, above_4g_mem_size, rom_memory, &ram_memory, guest_info); + } else if (kernel_filename != NULL) { + /* For xen HVM direct kernel boot, load linux here */ + fw_cfg = xen_load_linux(kernel_filename, + kernel_cmdline, + initrd_filename, + below_4g_mem_size, + guest_info); } gsi_state = g_malloc0(sizeof(*gsi_state)); Index: xen-4.4.0-testing/tools/qemu-xen-dir-remote/include/hw/i386/pc.h =================================================================== --- xen-4.4.0-testing.orig/tools/qemu-xen-dir-remote/include/hw/i386/pc.h +++ xen-4.4.0-testing/tools/qemu-xen-dir-remote/include/hw/i386/pc.h @@ -120,6 +120,11 @@ static inline uint64_t pci_host_get_hole void pc_init_pci64_hole(PcPciInfo *pci_info, uint64_t pci_hole64_start, uint64_t pci_hole64_size); +FWCfgState *xen_load_linux(const char *kernel_filename, + const char *kernel_cmdline, + const char *initrd_filename, + ram_addr_t below_4g_mem_size, + PcGuestInfo *guest_info); FWCfgState *pc_memory_init(MemoryRegion *system_memory, const char *kernel_filename, const char *kernel_cmdline, ++++++ xen-pass-kernel-initrd-to-qemu.patch ++++++
From 865406533fe7a163acd5ed299628f1dc8d475803 Mon Sep 17 00:00:00 2001 From: Chunyan Liu <cyliu@suse.com> Date: Wed, 28 May 2014 14:36:54 +0800 Subject: [PATCH 1/2] xen: pass kernel initrd to qemu
xen side patch to support xen HVM direct kernel boot: support 'kernel', 'ramdisk', 'root', 'extra' in HVM config file, parse config file, pass -kernel, -initrd, -append parameters to qemu. It's working with seabios and non-stubdom. Rombios and stubdom cases are currently not supported. [config example] kernel="/mnt/vmlinuz-3.0.13-0.27-default" ramdisk="/mnt/initrd-3.0.13-0.27-default" root="/dev/hda2" extra="console=tty0 console=ttyS0" disk=[ 'file:/mnt/images/bjz_04_sles11_sp2/disk0.raw,hda,w', ] Signed-off-by: Chunyan Liu <cyliu@suse.com> --- docs/man/xl.cfg.pod.5 | 50 ++++++++++++++++++++++++---------------- tools/libxl/libxl_dm.c | 15 ++++++++++++ tools/libxl/libxl_types.idl | 3 +++ tools/libxl/xl_cmdimpl.c | 56 +++++++++++++++++++++++++++------------------ 4 files changed, 82 insertions(+), 42 deletions(-) Index: xen-4.4.0-testing/docs/man/xl.cfg.pod.5 =================================================================== --- xen-4.4.0-testing.orig/docs/man/xl.cfg.pod.5 +++ xen-4.4.0-testing/docs/man/xl.cfg.pod.5 @@ -296,6 +296,34 @@ Action to take if the domain crashes. D =back +=head3 Direct Kernel Boot + +Currently, direct kernel boot can be supported by PV guests, and HVM guests +in some configuration. For HVM guests, in case of stubdom-dm and old rombios, +direct kernel boot is not supported. + +=over 4 + +=item B<kernel="PATHNAME"> + +Load the specified file as the kernel image. + +=item B<ramdisk="PATHNAME"> + +Load the specified file as the ramdisk. + +=item B<root="STRING"> + +Append B<root="STRING"> to the kernel command line (Note: it is guest +specific what meaning this has). + +=item B<extra="STRING"> + +Append B<STRING> to the kernel command line. (Note: it is guest +specific what meaning this has). + +=back + =head3 Other Options =over 4 @@ -655,20 +683,12 @@ The following options apply only to Para =over 4 -=item B<kernel="PATHNAME"> - -Load the specified file as the kernel image. Either B<kernel> or -B<bootloader> must be specified for PV guests. - -=item B<ramdisk="PATHNAME"> - -Load the specified file as the ramdisk. - =item B<bootloader="PROGRAM"> Run C<PROGRAM> to find the kernel image and ramdisk to use. Normally C<PROGRAM> would be C<pygrub>, which is an emulation of -grub/grub2/syslinux. +grub/grub2/syslinux. Either B<kernel> or B<bootloader> must be specified +for PV guests. =item B<bootloader_args=[ "ARG", "ARG", ...]> @@ -676,16 +696,6 @@ Append B<ARG>s to the arguments to the B program. Alternatively if the argument is a simple string then it will be split into words at whitespace (this second option is deprecated). -=item B<root="STRING"> - -Append B<root="STRING"> to the kernel command line (Note: it is guest -specific what meaning this has). - -=item B<extra="STRING"> - -Append B<STRING> to the kernel command line. Note: it is guest -specific what meaning this has). - =item B<e820_host=BOOLEAN> Selects whether to expose the host e820 (memory map) to the guest via Index: xen-4.4.0-testing/tools/libxl/libxl_dm.c =================================================================== --- xen-4.4.0-testing.orig/tools/libxl/libxl_dm.c +++ xen-4.4.0-testing/tools/libxl/libxl_dm.c @@ -196,6 +196,12 @@ static char ** libxl__build_device_model int nr_set_cpus = 0; char *s; + if (b_info->u.hvm.kernel) { + LOG(ERROR, "direct kernel boot is not supported by %s", + dm); + return NULL; + } + if (b_info->u.hvm.serial) { flexarray_vappend(dm_args, "-serial", b_info->u.hvm.serial, NULL); } @@ -487,6 +493,15 @@ static char ** libxl__build_device_model if (b_info->type == LIBXL_DOMAIN_TYPE_HVM) { int ioemu_nics = 0; + if (b_info->u.hvm.kernel) + flexarray_vappend(dm_args, "-kernel", b_info->u.hvm.kernel, NULL); + + if (b_info->u.hvm.ramdisk) + flexarray_vappend(dm_args, "-initrd", b_info->u.hvm.ramdisk, NULL); + + if (b_info->u.hvm.cmdline) + flexarray_vappend(dm_args, "-append", b_info->u.hvm.cmdline, NULL); + if (b_info->u.hvm.serial) { flexarray_vappend(dm_args, "-serial", b_info->u.hvm.serial, NULL); } Index: xen-4.4.0-testing/tools/libxl/libxl_types.idl =================================================================== --- xen-4.4.0-testing.orig/tools/libxl/libxl_types.idl +++ xen-4.4.0-testing/tools/libxl/libxl_types.idl @@ -335,6 +335,9 @@ libxl_domain_build_info = Struct("domain ("event_channels", uint32), ("u", KeyedUnion(None, libxl_domain_type, "type", [("hvm", Struct(None, [("firmware", string), + ("kernel", string), + ("cmdline", string), + ("ramdisk", string), ("bios", libxl_bios_type), ("pae", libxl_defbool), ("apic", libxl_defbool), Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c =================================================================== --- xen-4.4.0-testing.orig/tools/libxl/xl_cmdimpl.c +++ xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c @@ -848,6 +848,29 @@ static void parse_top_level_sdl_options( xlu_cfg_replace_string (config, "xauthority", &sdl->xauthority, 0); } +static char *parse_cmdline(XLU_Config *config) +{ + char *cmdline = NULL; + const char *root = NULL, *extra = ""; + + xlu_cfg_get_string (config, "root", &root, 0); + xlu_cfg_get_string (config, "extra", &extra, 0); + + if (root) { + if (asprintf(&cmdline, "root=%s %s", root, extra) == -1) + cmdline = NULL; + } else { + cmdline = strdup(extra); + } + + if ((root || extra) && !cmdline) { + fprintf(stderr, "Failed to allocate memory for cmdline\n"); + exit(1); + } + + return cmdline; +} + static void parse_config_data(const char *config_source, const char *config_data, int config_len, @@ -1129,9 +1152,16 @@ static void parse_config_data(const char switch(b_info->type) { case LIBXL_DOMAIN_TYPE_HVM: - if (!xlu_cfg_get_string (config, "kernel", &buf, 0)) - fprintf(stderr, "WARNING: ignoring \"kernel\" directive for HVM guest. " - "Use \"firmware_override\" instead if you really want a non-default firmware\n"); + if (!xlu_cfg_get_string (config, "kernel", &buf, 0)) { + if (strstr(buf, "hvmloader")) + fprintf(stderr, "WARNING: ignoring \"kernel\" directive for HVM guest. " + "Use \"firmware_override\" instead if you really want a non-default firmware\n"); + else + b_info->u.hvm.kernel = strdup(buf); + } + + b_info->u.hvm.cmdline = parse_cmdline(config); + xlu_cfg_replace_string (config, "ramdisk", &b_info->u.hvm.ramdisk, 0); xlu_cfg_replace_string (config, "firmware_override", &b_info->u.hvm.firmware, 0); @@ -1183,26 +1213,8 @@ static void parse_config_data(const char break; case LIBXL_DOMAIN_TYPE_PV: { - char *cmdline = NULL; - const char *root = NULL, *extra = ""; - xlu_cfg_replace_string (config, "kernel", &b_info->u.pv.kernel, 0); - xlu_cfg_get_string (config, "root", &root, 0); - xlu_cfg_get_string (config, "extra", &extra, 0); - - if (root) { - if (asprintf(&cmdline, "root=%s %s", root, extra) == -1) - cmdline = NULL; - } else { - cmdline = strdup(extra); - } - - if ((root || extra) && !cmdline) { - fprintf(stderr, "Failed to allocate memory for cmdline\n"); - exit(1); - } - xlu_cfg_replace_string (config, "bootloader", &b_info->u.pv.bootloader, 0); switch (xlu_cfg_get_list_as_string_list(config, "bootloader_args", &b_info->u.pv.bootloader_args, 1)) @@ -1230,7 +1242,7 @@ static void parse_config_data(const char exit(1); } - b_info->u.pv.cmdline = cmdline; + b_info->u.pv.cmdline = parse_cmdline(config); xlu_cfg_replace_string (config, "ramdisk", &b_info->u.pv.ramdisk, 0); break; } ++++++ xen2libvirt.py ++++++ --- /var/tmp/diff_new_pack.6kvNQC/_old 2014-07-08 16:59:09.000000000 +0200 +++ /var/tmp/diff_new_pack.6kvNQC/_new 2014-07-08 16:59:09.000000000 +0200 @@ -26,6 +26,7 @@ import os import argparse import re +from xml.etree import ElementTree try: import libvirt @@ -77,6 +78,13 @@ f.close() + # domUloader is no longer available in SLES12, replace with pygrub + tree = ElementTree.fromstring(xml) + bl = tree.find('.//bootloader') + if bl is not None and 'domUloader' in bl.text: + bl.text = 'pygrub' + xml = ElementTree.tostring(tree) + print_verbose('Successfully converted Xen domain configuration to ' 'libvirt domXML:\n %s' % xml) if convert_only: -- To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org For additional commands, e-mail: opensuse-commit+help@opensuse.org
participants (1)
-
root@hilbert.suse.de