Hello community, here is the log from the commit of package xen for openSUSE:Factory checked in at 2015-08-31 22:57:42 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/xen (Old) and /work/SRC/openSUSE:Factory/.xen.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "xen" Changes: -------- --- /work/SRC/openSUSE:Factory/xen/xen.changes 2015-08-21 08:24:25.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.xen.new/xen.changes 2015-08-31 22:57:44.000000000 +0200 @@ -1,0 +2,47 @@ +Wed Aug 26 16:18:58 MDT 2015 - carnold@suse.com + +- bnc#935634 - VUL-0: CVE-2015-3259: xen: XSA-137: xl command line + config handling stack overflow + 55a62eb0-xl-correct-handling-of-extra_config-in-main_cpupoolcreate.patch + +------------------------------------------------------------------- +Tue Aug 18 10:18:20 MDT 2015 - carnold@suse.com + +- bsc#907514 - Bus fatal error & sles12 sudden reboot has been + observed +- bsc#910258 - SLES12 Xen host crashes with FATAL NMI after + shutdown of guest with VT-d NIC +- bsc#918984 - Bus fatal error & sles11-SP4 sudden reboot has been + observed +- bsc#923967 - Partner-L3: Bus fatal error & sles11-SP3 sudden + reboot has been observed + 552d293b-x86-vMSI-X-honor-all-mask-requests.patch + 552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch + 5576f143-x86-adjust-PV-I-O-emulation-functions-types.patch + 55795a52-x86-vMSI-X-support-qword-MMIO-access.patch + 5583d9c5-x86-MSI-X-cleanup.patch + 5583da09-x86-MSI-track-host-and-guest-masking-separately.patch + 55b0a218-x86-PCI-CFG-write-intercept.patch + 55b0a255-x86-MSI-X-maskall.patch + 55b0a283-x86-MSI-X-teardown.patch + 55b0a2ab-x86-MSI-X-enable.patch + 55b0a2db-x86-MSI-track-guest-masking.patch +- Upstream patches from Jan + 552d0f49-x86-traps-identify-the-vcpu-in-context-when-dumping-regs.patch + 559bc633-x86-cpupool-clear-proper-cpu_valid-bit-on-CPU-teardown.patch + 559bc64e-credit1-properly-deal-with-CPUs-not-in-any-pool.patch + 559bc87f-x86-hvmloader-avoid-data-corruption-with-xenstore-rw.patch + 55a66a1e-make-rangeset_report_ranges-report-all-ranges.patch + 55a77e4f-dmar-device-scope-mem-leak-fix.patch + 55c1d83d-x86-gdt-Drop-write-only-xalloc-d-array.patch + 55c3232b-x86-mm-Make-hap-shadow-teardown-preemptible.patch +- Dropped for upstream version + x86-MSI-mask.patch + x86-MSI-pv-unmask.patch + x86-MSI-X-enable.patch + x86-MSI-X-maskall.patch + x86-MSI-X-teardown.patch + x86-pci_cfg_okay.patch + x86-PCI-CFG-write-intercept.patch + +------------------------------------------------------------------- @@ -94,4 +140,0 @@ -- Dropped - qemu-MSI-X-enable-maskall.patch - qemu-MSI-X-latch-writes.patch - x86-MSI-X-guest-mask.patch @@ -130,0 +174,3 @@ + qemu-MSI-X-enable-maskall.patch + qemu-MSI-X-latch-writes.patch + x86-MSI-X-guest-mask.patch Old: ---- x86-MSI-X-enable.patch x86-MSI-X-maskall.patch x86-MSI-X-teardown.patch x86-MSI-mask.patch x86-MSI-pv-unmask.patch x86-PCI-CFG-write-intercept.patch x86-pci_cfg_okay.patch New: ---- 552d0f49-x86-traps-identify-the-vcpu-in-context-when-dumping-regs.patch 5576f143-x86-adjust-PV-I-O-emulation-functions-types.patch 559bc633-x86-cpupool-clear-proper-cpu_valid-bit-on-CPU-teardown.patch 559bc64e-credit1-properly-deal-with-CPUs-not-in-any-pool.patch 559bc87f-x86-hvmloader-avoid-data-corruption-with-xenstore-rw.patch 55a62eb0-xl-correct-handling-of-extra_config-in-main_cpupoolcreate.patch 55a66a1e-make-rangeset_report_ranges-report-all-ranges.patch 55a77e4f-dmar-device-scope-mem-leak-fix.patch 55b0a218-x86-PCI-CFG-write-intercept.patch 55b0a255-x86-MSI-X-maskall.patch 55b0a283-x86-MSI-X-teardown.patch 55b0a2ab-x86-MSI-X-enable.patch 55b0a2db-x86-MSI-track-guest-masking.patch 55c1d83d-x86-gdt-Drop-write-only-xalloc-d-array.patch 55c3232b-x86-mm-Make-hap-shadow-teardown-preemptible.patch ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ xen.spec ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -1,7 +1,7 @@ # # spec file for package xen # -# Copyright (c) 2015 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2015 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -15,6 +15,7 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # + # needssslcertforbuild Name: xen @@ -158,7 +159,7 @@ %endif %endif -Version: 4.5.1_02 +Version: 4.5.1_07 Release: 0 Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) License: GPL-2.0 @@ -203,16 +204,25 @@ # Upstream patches Patch1: 55103616-vm-assist-prepare-for-discontiguous-used-bit-numbers.patch Patch2: 551ac326-xentop-add-support-for-qdisk.patch -Patch3: 5548e903-domctl-don-t-truncate-XEN_DOMCTL_max_mem-requests.patch -Patch4: 5548e95d-x86-allow-to-suppress-M2P-user-mode-exposure.patch -Patch5: 554cc211-libxl-add-qxl.patch -Patch6: 556d973f-unmodified-drivers-tolerate-IRQF_DISABLED-being-undefined.patch -Patch7: 5576f178-kexec-add-more-pages-to-v1-environment.patch -Patch8: 55780be1-x86-EFI-adjust-EFI_MEMORY_WP-handling-for-spec-version-2.5.patch -Patch9: 558bfaa0-x86-traps-avoid-using-current-too-early.patch -Patch10: 5592a116-nested-EPT-fix-the-handling-of-nested-EPT.patch -Patch11: 559b9dd6-x86-p2m-ept-don-t-unmap-in-use-EPT-pagetable.patch -Patch12: 559bdde5-pull-in-latest-linux-earlycpio.patch +Patch3: 552d0f49-x86-traps-identify-the-vcpu-in-context-when-dumping-regs.patch +Patch4: 5548e903-domctl-don-t-truncate-XEN_DOMCTL_max_mem-requests.patch +Patch5: 5548e95d-x86-allow-to-suppress-M2P-user-mode-exposure.patch +Patch6: 554cc211-libxl-add-qxl.patch +Patch7: 556d973f-unmodified-drivers-tolerate-IRQF_DISABLED-being-undefined.patch +Patch8: 5576f178-kexec-add-more-pages-to-v1-environment.patch +Patch9: 55780be1-x86-EFI-adjust-EFI_MEMORY_WP-handling-for-spec-version-2.5.patch +Patch10: 558bfaa0-x86-traps-avoid-using-current-too-early.patch +Patch11: 5592a116-nested-EPT-fix-the-handling-of-nested-EPT.patch +Patch12: 559b9dd6-x86-p2m-ept-don-t-unmap-in-use-EPT-pagetable.patch +Patch13: 559bc633-x86-cpupool-clear-proper-cpu_valid-bit-on-CPU-teardown.patch +Patch14: 559bc64e-credit1-properly-deal-with-CPUs-not-in-any-pool.patch +Patch15: 559bc87f-x86-hvmloader-avoid-data-corruption-with-xenstore-rw.patch +Patch16: 559bdde5-pull-in-latest-linux-earlycpio.patch +Patch17: 55a62eb0-xl-correct-handling-of-extra_config-in-main_cpupoolcreate.patch +Patch18: 55a66a1e-make-rangeset_report_ranges-report-all-ranges.patch +Patch19: 55a77e4f-dmar-device-scope-mem-leak-fix.patch +Patch20: 55c1d83d-x86-gdt-Drop-write-only-xalloc-d-array.patch +Patch21: 55c3232b-x86-mm-Make-hap-shadow-teardown-preemptible.patch Patch131: CVE-2015-4106-xsa131-9.patch Patch137: CVE-2015-3259-xsa137.patch Patch139: xsa139-qemuu.patch @@ -329,40 +339,42 @@ Patch606: xen.build-compare.seabios.patch Patch607: xen.build-compare.man.patch Patch608: ipxe-no-error-logical-not-parentheses.patch -# Extra patches pending review -Patch801: 552d0fd2-x86-hvm-don-t-include-asm-spinlock-h.patch -Patch802: 552d0fe8-x86-mtrr-include-asm-atomic.h.patch -Patch803: 552d293b-x86-vMSI-X-honor-all-mask-requests.patch -Patch804: 552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch -Patch805: 554c7aee-x86-provide-arch_fetch_and_add.patch -Patch806: 554c7b00-arm-provide-arch_fetch_and_add.patch -Patch807: 55534b0a-x86-provide-add_sized.patch -Patch808: 55534b25-arm-provide-add_sized.patch -Patch809: 5555a4f8-use-ticket-locks-for-spin-locks.patch -Patch810: 5555a5b9-x86-arm-remove-asm-spinlock-h.patch -Patch811: 5555a8ec-introduce-non-contiguous-allocation.patch -Patch812: 55795a52-x86-vMSI-X-support-qword-MMIO-access.patch -Patch813: 557eb55f-gnttab-per-active-entry-locking.patch -Patch814: 557eb5b6-gnttab-introduce-maptrack-lock.patch -Patch815: 557eb620-gnttab-make-the-grant-table-lock-a-read-write-lock.patch -Patch816: 557ffab8-evtchn-factor-out-freeing-an-event-channel.patch -Patch817: 5582bf43-evtchn-simplify-port_is_valid.patch -Patch818: 5582bf81-evtchn-remove-the-locking-when-unmasking-an-event-channel.patch -Patch819: 5583d9c5-x86-MSI-X-cleanup.patch -Patch820: 5583da09-x86-MSI-track-host-and-guest-masking-separately.patch -Patch821: 5583da64-gnttab-use-per-VCPU-maptrack-free-lists.patch -Patch822: 5583da8c-gnttab-steal-maptrack-entries-from-other-VCPUs.patch -Patch823: 5587d711-evtchn-clear-xen_consumer-when-clearing-state.patch -Patch824: 5587d779-evtchn-defer-freeing-struct-evtchn-s-until-evtchn_destroy_final.patch -Patch825: 5587d7b7-evtchn-use-a-per-event-channel-lock-for-sending-events.patch -Patch826: 5587d7e2-evtchn-pad-struct-evtchn-to-64-bytes.patch -Patch850: x86-MSI-pv-unmask.patch -Patch851: x86-pci_cfg_okay.patch -Patch852: x86-PCI-CFG-write-intercept.patch -Patch853: x86-MSI-X-maskall.patch -Patch854: x86-MSI-X-teardown.patch -Patch855: x86-MSI-X-enable.patch -Patch856: x86-MSI-mask.patch +# MSI issues (bsc#907514 bsc#910258 bsc#918984 bsc#923967) +Patch700: 552d293b-x86-vMSI-X-honor-all-mask-requests.patch +Patch701: 552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch +Patch702: 5576f143-x86-adjust-PV-I-O-emulation-functions-types.patch +Patch703: 55795a52-x86-vMSI-X-support-qword-MMIO-access.patch +Patch704: 5583d9c5-x86-MSI-X-cleanup.patch +Patch705: 5583da09-x86-MSI-track-host-and-guest-masking-separately.patch +Patch706: 55b0a218-x86-PCI-CFG-write-intercept.patch +Patch707: 55b0a255-x86-MSI-X-maskall.patch +Patch708: 55b0a283-x86-MSI-X-teardown.patch +Patch709: 55b0a2ab-x86-MSI-X-enable.patch +Patch710: 55b0a2db-x86-MSI-track-guest-masking.patch +# ticket locks +Patch720: 552d0fd2-x86-hvm-don-t-include-asm-spinlock-h.patch +Patch721: 552d0fe8-x86-mtrr-include-asm-atomic.h.patch +Patch722: 554c7aee-x86-provide-arch_fetch_and_add.patch +Patch723: 554c7b00-arm-provide-arch_fetch_and_add.patch +Patch724: 55534b0a-x86-provide-add_sized.patch +Patch725: 55534b25-arm-provide-add_sized.patch +Patch726: 5555a4f8-use-ticket-locks-for-spin-locks.patch +Patch727: 5555a5b9-x86-arm-remove-asm-spinlock-h.patch +# grant table scalability +Patch730: 5555a8ec-introduce-non-contiguous-allocation.patch +Patch731: 557eb55f-gnttab-per-active-entry-locking.patch +Patch732: 557eb5b6-gnttab-introduce-maptrack-lock.patch +Patch733: 557eb620-gnttab-make-the-grant-table-lock-a-read-write-lock.patch +Patch734: 5583da64-gnttab-use-per-VCPU-maptrack-free-lists.patch +Patch735: 5583da8c-gnttab-steal-maptrack-entries-from-other-VCPUs.patch +# event channel scalability +Patch740: 557ffab8-evtchn-factor-out-freeing-an-event-channel.patch +Patch741: 5582bf43-evtchn-simplify-port_is_valid.patch +Patch742: 5582bf81-evtchn-remove-the-locking-when-unmasking-an-event-channel.patch +Patch743: 5587d711-evtchn-clear-xen_consumer-when-clearing-state.patch +Patch744: 5587d779-evtchn-defer-freeing-struct-evtchn-s-until-evtchn_destroy_final.patch +Patch745: 5587d7b7-evtchn-use-a-per-event-channel-lock-for-sending-events.patch +Patch746: 5587d7e2-evtchn-pad-struct-evtchn-to-64-bytes.patch # Build patches Patch99996: xen.stubdom.newlib.patch Patch99998: tmp_build.patch @@ -581,6 +593,15 @@ %patch10 -p1 %patch11 -p1 %patch12 -p1 +%patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 %patch131 -p1 %patch137 -p1 %patch139 -p1 @@ -696,40 +717,42 @@ %patch606 -p1 %patch607 -p1 %patch608 -p1 -# Extra patches pending review -%patch801 -p1 -%patch802 -p1 -%patch803 -p1 -%patch804 -p1 -%patch805 -p1 -%patch806 -p1 -%patch807 -p1 -%patch808 -p1 -%patch809 -p1 -%patch810 -p1 -%patch811 -p1 -%patch812 -p1 -%patch813 -p1 -%patch814 -p1 -%patch815 -p1 -%patch816 -p1 -%patch817 -p1 -%patch818 -p1 -%patch819 -p1 -%patch820 -p1 -%patch821 -p1 -%patch822 -p1 -%patch823 -p1 -%patch824 -p1 -%patch825 -p1 -%patch826 -p1 -%patch850 -p1 -%patch851 -p1 -%patch852 -p1 -%patch853 -p1 -%patch854 -p1 -%patch855 -p1 -%patch856 -p1 +# MSI issues (bsc#907514 bsc#910258 bsc#918984 bsc#923967) +%patch700 -p1 +%patch701 -p1 +%patch702 -p1 +%patch703 -p1 +%patch704 -p1 +%patch705 -p1 +%patch706 -p1 +%patch707 -p1 +%patch708 -p1 +%patch709 -p1 +%patch710 -p1 +# ticket locks +%patch720 -p1 +%patch721 -p1 +%patch722 -p1 +%patch723 -p1 +%patch724 -p1 +%patch725 -p1 +%patch726 -p1 +%patch727 -p1 +# grant table scalability +%patch730 -p1 +%patch731 -p1 +%patch732 -p1 +%patch733 -p1 +%patch734 -p1 +%patch735 -p1 +# event channel scalability +%patch740 -p1 +%patch741 -p1 +%patch742 -p1 +%patch743 -p1 +%patch744 -p1 +%patch745 -p1 +%patch746 -p1 # Build patches %patch99996 -p1 %patch99998 -p1 ++++++ 552d0f49-x86-traps-identify-the-vcpu-in-context-when-dumping-regs.patch ++++++ # Commit e59abf8c8c9c1d99a531292c6a548d6dfd0ceacc # Date 2015-04-14 14:59:53 +0200 # Author Andrew Cooper <andrew.cooper3@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86/traps: identify the vcpu in context when dumping registers Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -53,9 +53,11 @@ static void _show_registers( printk("\nRFLAGS: %016lx ", regs->rflags); if ( (context == CTXT_pv_guest) && v && v->vcpu_info ) printk("EM: %d ", !!vcpu_info(v, evtchn_upcall_mask)); - printk("CONTEXT: %s\n", context_names[context]); + printk("CONTEXT: %s", context_names[context]); + if ( v && !is_idle_vcpu(v) ) + printk(" (%pv)", v); - printk("rax: %016lx rbx: %016lx rcx: %016lx\n", + printk("\nrax: %016lx rbx: %016lx rcx: %016lx\n", regs->rax, regs->rbx, regs->rcx); printk("rdx: %016lx rsi: %016lx rdi: %016lx\n", regs->rdx, regs->rsi, regs->rdi); ++++++ 552d0fd2-x86-hvm-don-t-include-asm-spinlock-h.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -9,8 +9,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> ---- sle12sp1.orig/xen/arch/x86/hvm/hvm.c 2015-07-08 14:13:16.000000000 +0200 -+++ sle12sp1/xen/arch/x86/hvm/hvm.c 2015-07-08 14:13:38.000000000 +0200 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c @@ -52,7 +52,6 @@ #include <asm/xstate.h> #include <asm/traps.h> @@ -19,8 +19,8 @@ #include <asm/mce.h> #include <asm/hvm/hvm.h> #include <asm/hvm/vpt.h> ---- sle12sp1.orig/xen/arch/x86/hvm/svm/svm.c 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/arch/x86/hvm/svm/svm.c 2015-07-08 14:13:38.000000000 +0200 +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c @@ -41,7 +41,6 @@ #include <asm/msr.h> #include <asm/i387.h> @@ -29,8 +29,8 @@ #include <asm/hvm/emulate.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> ---- sle12sp1.orig/xen/arch/x86/hvm/vmx/vmx.c 2015-05-19 23:16:48.000000000 +0200 -+++ sle12sp1/xen/arch/x86/hvm/vmx/vmx.c 2015-07-08 14:13:38.000000000 +0200 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -35,7 +35,6 @@ #include <asm/types.h> #include <asm/debugreg.h> ++++++ 552d0fe8-x86-mtrr-include-asm-atomic.h.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -10,8 +10,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> ---- sle12sp1.orig/xen/arch/x86/cpu/mtrr/main.c 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/arch/x86/cpu/mtrr/main.c 2015-07-08 14:13:42.000000000 +0200 +--- a/xen/arch/x86/cpu/mtrr/main.c ++++ b/xen/arch/x86/cpu/mtrr/main.c @@ -36,6 +36,7 @@ #include <xen/lib.h> #include <xen/smp.h> ++++++ 552d293b-x86-vMSI-X-honor-all-mask-requests.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -1,3 +1,5 @@ +References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 + # Commit 70a3cbb8c9cb17a61fa25c48ba3d7b44fd059c90 # Date 2015-04-14 16:50:35 +0200 # Author Jan Beulich <jbeulich@suse.com> @@ -24,8 +26,8 @@ Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> ---- sle12sp1.orig/xen/arch/x86/hvm/vmsi.c 2015-07-08 11:22:13.000000000 +0200 -+++ sle12sp1/xen/arch/x86/hvm/vmsi.c 2015-04-20 09:30:29.000000000 +0200 +--- a/xen/arch/x86/hvm/vmsi.c ++++ b/xen/arch/x86/hvm/vmsi.c @@ -286,11 +286,11 @@ static int msixtbl_write(struct vcpu *v, goto out; } ++++++ 552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -1,3 +1,5 @@ +References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 + # Commit df9f5676b3711c95127d44e871ad7ca38d6ed28a # Date 2015-04-14 16:51:18 +0200 # Author Jan Beulich <jbeulich@suse.com> @@ -15,8 +17,8 @@ Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> ---- sle12sp1.orig/xen/arch/x86/hvm/vmsi.c 2015-04-20 09:30:29.000000000 +0200 -+++ sle12sp1/xen/arch/x86/hvm/vmsi.c 2015-04-20 09:32:57.000000000 +0200 +--- a/xen/arch/x86/hvm/vmsi.c ++++ b/xen/arch/x86/hvm/vmsi.c @@ -154,11 +154,14 @@ struct msixtbl_entry struct pci_dev *pdev; unsigned long gtable; /* gpa of msix table */ ++++++ 554c7aee-x86-provide-arch_fetch_and_add.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -11,8 +11,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> ---- sle12sp1.orig/xen/include/asm-x86/system.h 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/include/asm-x86/system.h 2015-07-08 12:35:11.000000000 +0200 +--- a/xen/include/asm-x86/system.h ++++ b/xen/include/asm-x86/system.h @@ -118,6 +118,52 @@ static always_inline unsigned long __cmp }) ++++++ 554c7b00-arm-provide-arch_fetch_and_add.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -16,8 +16,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Acked-by: Ian Campbell <ian.campbell@citrix.com> ---- sle12sp1.orig/xen/include/asm-arm/system.h 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/include/asm-arm/system.h 2015-07-08 12:35:16.000000000 +0200 +--- a/xen/include/asm-arm/system.h ++++ b/xen/include/asm-arm/system.h @@ -51,6 +51,8 @@ # error "unknown ARM variant" #endif ++++++ 55534b0a-x86-provide-add_sized.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -12,8 +12,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> ---- sle12sp1.orig/xen/include/asm-x86/atomic.h 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/include/asm-x86/atomic.h 2015-07-08 12:35:20.000000000 +0200 +--- a/xen/include/asm-x86/atomic.h ++++ b/xen/include/asm-x86/atomic.h @@ -14,6 +14,14 @@ static inline void name(volatile type *a { asm volatile("mov" size " %1,%0": "=m" (*(volatile type *)addr) \ :reg (val) barrier); } ++++++ 55534b25-arm-provide-add_sized.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -13,8 +13,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Acked-by: Ian Campbell <ian.campbell@citrix.com> ---- sle12sp1.orig/xen/include/asm-arm/atomic.h 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/include/asm-arm/atomic.h 2015-07-08 12:35:55.000000000 +0200 +--- a/xen/include/asm-arm/atomic.h ++++ b/xen/include/asm-arm/atomic.h @@ -23,6 +23,17 @@ static inline void name(volatile type *a : reg (val)); \ } @@ -58,7 +58,7 @@ + default: __bad_atomic_size(); break; \ + } \ +}) -+ ++ /* * NB. I've pushed the volatile qualifier into the operations. This allows * fast accessors such as _atomic_read() and _atomic_set() which don't give ++++++ 5555a4f8-use-ticket-locks-for-spin-locks.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -25,8 +25,8 @@ Reviewed-by: Tim Deegan <tim@xen.org> Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- sle12sp1.orig/xen/common/spinlock.c 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/common/spinlock.c 2015-07-08 12:37:59.000000000 +0200 +--- a/xen/common/spinlock.c ++++ b/xen/common/spinlock.c @@ -115,125 +115,134 @@ void spin_debug_disable(void) #endif @@ -229,8 +229,8 @@ } void _spin_unlock_recursive(spinlock_t *lock) ---- sle12sp1.orig/xen/include/asm-arm/system.h 2015-07-08 12:35:16.000000000 +0200 -+++ sle12sp1/xen/include/asm-arm/system.h 2015-07-08 12:37:59.000000000 +0200 +--- a/xen/include/asm-arm/system.h ++++ b/xen/include/asm-arm/system.h @@ -53,6 +53,9 @@ #define arch_fetch_and_add(x, v) __sync_fetch_and_add(x, v) @@ -241,8 +241,8 @@ extern struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next); #endif ---- sle12sp1.orig/xen/include/asm-x86/system.h 2015-07-08 12:35:11.000000000 +0200 -+++ sle12sp1/xen/include/asm-x86/system.h 2015-07-08 12:37:59.000000000 +0200 +--- a/xen/include/asm-x86/system.h ++++ b/xen/include/asm-x86/system.h @@ -185,6 +185,17 @@ static always_inline unsigned long __xad #define set_mb(var, value) do { xchg(&var, value); } while (0) #define set_wmb(var, value) do { var = value; wmb(); } while (0) @@ -261,8 +261,8 @@ #define local_irq_disable() asm volatile ( "cli" : : : "memory" ) #define local_irq_enable() asm volatile ( "sti" : : : "memory" ) ---- sle12sp1.orig/xen/include/xen/spinlock.h 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/include/xen/spinlock.h 2015-07-08 12:37:59.000000000 +0200 +--- a/xen/include/xen/spinlock.h ++++ b/xen/include/xen/spinlock.h @@ -80,8 +80,7 @@ struct lock_profile_qhead { static struct lock_profile *__lock_profile_##name \ __used_section(".lockprofile.data") = \ ++++++ 5555a5b9-x86-arm-remove-asm-spinlock-h.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -12,8 +12,8 @@ Acked-by: Jan Beulich <jbeulich@suse.com> Acked-by: Ian Campbell <ian.campbell@citrix.com> ---- sle12sp1.orig/xen/arch/arm/README.LinuxPrimitives 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/arch/arm/README.LinuxPrimitives 2015-07-08 12:41:16.000000000 +0200 +--- a/xen/arch/arm/README.LinuxPrimitives ++++ b/xen/arch/arm/README.LinuxPrimitives @@ -25,16 +25,6 @@ linux/arch/arm64/include/asm/atomic.h --------------------------------------------------------------------- @@ -56,8 +56,8 @@ mem*: last sync @ v3.16-rc6 (last commit: d98b90ea22b0) linux/arch/arm/lib/copy_template.S xen/arch/arm/arm32/lib/copy_template.S ---- sle12sp1.orig/xen/include/asm-arm/arm32/spinlock.h 2015-01-14 18:44:18.000000000 +0100 -+++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +--- a/xen/include/asm-arm/arm32/spinlock.h ++++ /dev/null @@ -1,66 +0,0 @@ -#ifndef __ASM_ARM32_SPINLOCK_H -#define __ASM_ARM32_SPINLOCK_H @@ -125,8 +125,8 @@ - * indent-tabs-mode: nil - * End: - */ ---- sle12sp1.orig/xen/include/asm-arm/arm64/spinlock.h 2015-01-14 18:44:18.000000000 +0100 -+++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +--- a/xen/include/asm-arm/arm64/spinlock.h ++++ /dev/null @@ -1,63 +0,0 @@ -/* - * Derived from Linux arch64 spinlock.h which is: @@ -191,8 +191,8 @@ - * indent-tabs-mode: nil - * End: - */ ---- sle12sp1.orig/xen/include/asm-arm/spinlock.h 2013-07-09 20:57:12.000000000 +0200 -+++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +--- a/xen/include/asm-arm/spinlock.h ++++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __ASM_SPINLOCK_H -#define __ASM_SPINLOCK_H @@ -217,8 +217,8 @@ - * indent-tabs-mode: nil - * End: - */ ---- sle12sp1.orig/xen/include/asm-x86/spinlock.h 2015-01-14 18:44:18.000000000 +0100 -+++ /dev/null 1970-01-01 00:00:00.000000000 +0000 +--- a/xen/include/asm-x86/spinlock.h ++++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __ASM_SPINLOCK_H -#define __ASM_SPINLOCK_H @@ -254,8 +254,8 @@ -} - -#endif /* __ASM_SPINLOCK_H */ ---- sle12sp1.orig/xen/include/xen/spinlock.h 2015-07-08 12:37:59.000000000 +0200 -+++ sle12sp1/xen/include/xen/spinlock.h 2015-07-08 12:41:16.000000000 +0200 +--- a/xen/include/xen/spinlock.h ++++ b/xen/include/xen/spinlock.h @@ -2,7 +2,6 @@ #define __SPINLOCK_H__ ++++++ 5555a8ec-introduce-non-contiguous-allocation.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -27,8 +27,8 @@ Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> Acked-by: Tim Deegan <tim@xen.org> ---- sle12sp1.orig/xen/common/vmap.c 2013-10-31 22:33:32.000000000 +0100 -+++ sle12sp1/xen/common/vmap.c 2015-07-08 14:18:50.000000000 +0200 +--- a/xen/common/vmap.c ++++ b/xen/common/vmap.c @@ -215,4 +215,75 @@ void vunmap(const void *va) #endif vm_free(va); @@ -105,8 +105,8 @@ + free_domheap_page(pg); +} #endif ---- sle12sp1.orig/xen/include/asm-arm/mm.h 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/include/asm-arm/mm.h 2015-07-08 14:18:50.000000000 +0200 +--- a/xen/include/asm-arm/mm.h ++++ b/xen/include/asm-arm/mm.h @@ -208,6 +208,8 @@ static inline void __iomem *ioremap_wc(p #define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT) #define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT)) @@ -116,8 +116,8 @@ /* Page-align address and convert to frame number format */ #define paddr_to_pfn_aligned(paddr) paddr_to_pfn(PAGE_ALIGN(paddr)) ---- sle12sp1.orig/xen/include/asm-x86/page.h 2015-06-03 16:55:05.000000000 +0200 -+++ sle12sp1/xen/include/asm-x86/page.h 2015-07-08 14:18:50.000000000 +0200 +--- a/xen/include/asm-x86/page.h ++++ b/xen/include/asm-x86/page.h @@ -262,6 +262,8 @@ void copy_page_sse2(void *, const void * #define pfn_to_paddr(pfn) __pfn_to_paddr(pfn) #define paddr_to_pfn(pa) __paddr_to_pfn(pa) @@ -127,8 +127,8 @@ #endif /* !defined(__ASSEMBLY__) */ ---- sle12sp1.orig/xen/include/xen/vmap.h 2013-07-09 20:57:12.000000000 +0200 -+++ sle12sp1/xen/include/xen/vmap.h 2015-07-08 14:18:50.000000000 +0200 +--- a/xen/include/xen/vmap.h ++++ b/xen/include/xen/vmap.h @@ -11,6 +11,9 @@ void *__vmap(const unsigned long *mfn, u unsigned int nr, unsigned int align, unsigned int flags); void *vmap(const unsigned long *mfn, unsigned int nr); ++++++ 5576f143-x86-adjust-PV-I-O-emulation-functions-types.patch ++++++ References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 # Commit 85baced14dec2fafa9fe560969dba2ae28e8bebb # Date 2015-06-09 15:59:31 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86: adjust PV I/O emulation functions' types admin_io_okay(), guest_io_read(), and guest_io_write() all don't need their current "regs" parameter at all, and they don't use the vCPU passed to them for other than obtaining its domain. Drop the former and replace the latter by a struct domain pointer. pci_cfg_okay() returns a boolean type, and its "write" parameter is of boolean kind too. All of them get called for the current vCPU (and hence current domain) only, so name the domain parameters accordingly except in the admin_io_okay() case, which a subsequent patch will use for simplifying setup_io_bitmap(). Latch current->domain into a local variable in emulate_privileged_op(). Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> # Commit 2d67a7a4d37a4759bcd7f2ee2d740497ad669c7d # Date 2015-06-18 15:07:10 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86: synchronize PCI config space access decoding Both PV and HVM logic have similar but not similar enough code here. Synchronize the two so that - in the HVM case we don't unconditionally try to access extended config space - in the PV case we pass a correct range to the XSM hook - in the PV case we don't needlessly deny access when the operation isn't really on PCI config space All this along with sharing the macros HVM already had here. Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> Backport stripped down to just the pci_cfg_ok() adjustments. --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1708,14 +1708,18 @@ static int admin_io_okay( return ioports_access_permitted(v->domain, port, port + bytes - 1); } -static int pci_cfg_ok(struct domain *d, int write, int size) +static bool_t pci_cfg_ok(struct domain *currd, bool_t write, + unsigned int start, unsigned int size) { uint32_t machine_bdf; - uint16_t start, end; - if (!is_hardware_domain(d)) + + if ( !is_hardware_domain(currd) ) return 0; - machine_bdf = (d->arch.pci_cf8 >> 8) & 0xFFFF; + if ( !CF8_ENABLED(currd->arch.pci_cf8) ) + return 1; + + machine_bdf = CF8_BDF(currd->arch.pci_cf8); if ( write ) { const unsigned long *ro_map = pci_get_ro_map(0); @@ -1723,9 +1727,9 @@ static int pci_cfg_ok(struct domain *d, if ( ro_map && test_bit(machine_bdf, ro_map) ) return 0; } - start = d->arch.pci_cf8 & 0xFF; + start |= CF8_ADDR_LO(currd->arch.pci_cf8); /* AMD extended configuration space access? */ - if ( (d->arch.pci_cf8 & 0x0F000000) && + if ( CF8_ADDR_HI(currd->arch.pci_cf8) && boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 >= 0x10 && boot_cpu_data.x86 <= 0x17 ) { @@ -1734,12 +1738,11 @@ static int pci_cfg_ok(struct domain *d, if ( rdmsr_safe(MSR_AMD64_NB_CFG, msr_val) ) return 0; if ( msr_val & (1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT) ) - start |= (d->arch.pci_cf8 >> 16) & 0xF00; + start |= CF8_ADDR_HI(currd->arch.pci_cf8); } - end = start + size - 1; - if (xsm_pci_config_permission(XSM_HOOK, d, machine_bdf, start, end, write)) - return 0; - return 1; + + return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf, + start, start + size - 1, write); } uint32_t guest_io_read( @@ -1793,7 +1796,7 @@ uint32_t guest_io_read( size = min(bytes, 4 - (port & 3)); if ( size == 3 ) size = 2; - if ( pci_cfg_ok(v->domain, 0, size) ) + if ( pci_cfg_ok(v->domain, 0, port & 3, size) ) sub_data = pci_conf_read(v->domain->arch.pci_cf8, port & 3, size); } @@ -1866,7 +1869,7 @@ void guest_io_write( size = min(bytes, 4 - (port & 3)); if ( size == 3 ) size = 2; - if ( pci_cfg_ok(v->domain, 1, size) ) + if ( pci_cfg_ok(v->domain, 1, port & 3, size) ) pci_conf_write(v->domain->arch.pci_cf8, port & 3, size, data); } --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2357,11 +2357,6 @@ void hvm_vcpu_down(struct vcpu *v) static struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d, ioreq_t *p) { -#define CF8_BDF(cf8) (((cf8) & 0x00ffff00) >> 8) -#define CF8_ADDR_LO(cf8) ((cf8) & 0x000000fc) -#define CF8_ADDR_HI(cf8) (((cf8) & 0x0f000000) >> 16) -#define CF8_ENABLED(cf8) (!!((cf8) & 0x80000000)) - struct hvm_ioreq_server *s; uint32_t cf8; uint8_t type; @@ -2446,11 +2441,6 @@ static struct hvm_ioreq_server *hvm_sele } return d->arch.hvm_domain.default_ioreq_server; - -#undef CF8_ADDR_ENABLED -#undef CF8_ADDR_HI -#undef CF8_ADDR_LO -#undef CF8_BDF } int hvm_buffered_io_send(ioreq_t *p) --- a/xen/include/asm-x86/pci.h +++ b/xen/include/asm-x86/pci.h @@ -1,6 +1,11 @@ #ifndef __X86_PCI_H__ #define __X86_PCI_H__ +#define CF8_BDF(cf8) ( ((cf8) & 0x00ffff00) >> 8) +#define CF8_ADDR_LO(cf8) ( (cf8) & 0x000000fc) +#define CF8_ADDR_HI(cf8) ( ((cf8) & 0x0f000000) >> 16) +#define CF8_ENABLED(cf8) (!!((cf8) & 0x80000000)) + #define IS_SNB_GFX(id) (id == 0x01068086 || id == 0x01168086 \ || id == 0x01268086 || id == 0x01028086 \ || id == 0x01128086 || id == 0x01228086 \ ++++++ 55795a52-x86-vMSI-X-support-qword-MMIO-access.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -1,3 +1,5 @@ +References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 + # Commit 284ffb4f9b0d5c3a33c4c5bd87645d0cc342ca96 # Date 2015-06-11 11:52:18 +0200 # Author Jan Beulich <jbeulich@suse.com> @@ -49,7 +51,7 @@ } r = X86EMUL_OKAY; -@@ -268,7 +280,7 @@ static int msixtbl_write(struct vcpu *v, unsigned long address, +@@ -268,7 +280,7 @@ static int msixtbl_write(struct vcpu *v, unsigned long flags, orig; struct irq_desc *desc; @@ -58,7 +60,7 @@ return r; rcu_read_lock(&msixtbl_rcu_lock); -@@ -279,16 +291,23 @@ static int msixtbl_write(struct vcpu *v, unsigned long address, +@@ -279,16 +291,23 @@ static int msixtbl_write(struct vcpu *v, nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE; offset = address & (PCI_MSIX_ENTRY_SIZE - 1); @@ -85,7 +87,7 @@ } /* Exit to device model when unmasking and address/data got modified. */ -@@ -352,7 +371,8 @@ static int msixtbl_write(struct vcpu *v, unsigned long address, +@@ -352,7 +371,8 @@ static int msixtbl_write(struct vcpu *v, unlock: spin_unlock_irqrestore(&desc->lock, flags); ++++++ 557eb55f-gnttab-per-active-entry-locking.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -17,8 +17,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- sle12sp1.orig/docs/misc/grant-tables.txt 2008-10-14 19:44:06.000000000 +0200 -+++ sle12sp1/docs/misc/grant-tables.txt 2015-07-08 13:49:42.000000000 +0200 +--- a/docs/misc/grant-tables.txt ++++ b/docs/misc/grant-tables.txt @@ -63,6 +63,7 @@ is complete. act->domid : remote domain being granted rights act->frame : machine frame being granted @@ -75,8 +75,8 @@ ******************************************************************************** ---- sle12sp1.orig/xen/common/grant_table.c 2015-06-26 15:38:17.000000000 +0200 -+++ sle12sp1/xen/common/grant_table.c 2015-07-08 13:49:42.000000000 +0200 +--- a/xen/common/grant_table.c ++++ b/xen/common/grant_table.c @@ -157,10 +157,13 @@ struct active_grant_entry { in the page. */ unsigned length:16; /* For sub-page grants, the length of the ++++++ 557eb5b6-gnttab-introduce-maptrack-lock.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -12,8 +12,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- sle12sp1.orig/docs/misc/grant-tables.txt 2015-07-08 13:49:42.000000000 +0200 -+++ sle12sp1/docs/misc/grant-tables.txt 2015-07-08 13:49:46.000000000 +0200 +--- a/docs/misc/grant-tables.txt ++++ b/docs/misc/grant-tables.txt @@ -87,6 +87,7 @@ is complete. inconsistent grant table state such as current version, partially initialized active table pages, @@ -32,8 +32,8 @@ Active entries are obtained by calling active_entry_acquire(gt, ref). This function returns a pointer to the active entry after locking its spinlock. The caller must hold the grant table lock for the gt in ---- sle12sp1.orig/xen/common/grant_table.c 2015-07-08 13:49:42.000000000 +0200 -+++ sle12sp1/xen/common/grant_table.c 2015-07-08 13:49:46.000000000 +0200 +--- a/xen/common/grant_table.c ++++ b/xen/common/grant_table.c @@ -288,10 +288,10 @@ static inline void put_maptrack_handle( struct grant_table *t, int handle) @@ -73,8 +73,8 @@ t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES; /* Active grant table. */ ---- sle12sp1.orig/xen/include/xen/grant_table.h 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/include/xen/grant_table.h 2015-07-08 13:49:46.000000000 +0200 +--- a/xen/include/xen/grant_table.h ++++ b/xen/include/xen/grant_table.h @@ -82,6 +82,8 @@ struct grant_table { struct grant_mapping **maptrack; unsigned int maptrack_head; ++++++ 557eb620-gnttab-make-the-grant-table-lock-a-read-write-lock.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -23,8 +23,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- sle12sp1.orig/docs/misc/grant-tables.txt 2015-07-08 13:49:46.000000000 +0200 -+++ sle12sp1/docs/misc/grant-tables.txt 2015-07-08 13:49:47.000000000 +0200 +--- a/docs/misc/grant-tables.txt ++++ b/docs/misc/grant-tables.txt @@ -83,7 +83,7 @@ is complete. ~~~~~~~ Xen uses several locks to serialize access to the internal grant table state. @@ -91,8 +91,8 @@ ******************************************************************************** ---- sle12sp1.orig/xen/arch/arm/mm.c 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/arch/arm/mm.c 2015-07-08 13:49:47.000000000 +0200 +--- a/xen/arch/arm/mm.c ++++ b/xen/arch/arm/mm.c @@ -1037,7 +1037,7 @@ int xenmem_add_to_physmap_one( switch ( space ) { @@ -111,9 +111,9 @@ break; case XENMAPSPACE_shared_info: if ( idx != 0 ) ---- sle12sp1.orig/xen/arch/x86/mm.c 2015-07-08 00:00:00.000000000 +0200 -+++ sle12sp1/xen/arch/x86/mm.c 2015-07-08 13:49:47.000000000 +0200 -@@ -4594,7 +4594,7 @@ int xenmem_add_to_physmap_one( +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -4587,7 +4587,7 @@ int xenmem_add_to_physmap_one( mfn = virt_to_mfn(d->shared_info); break; case XENMAPSPACE_grant_table: @@ -122,7 +122,7 @@ if ( d->grant_table->gt_version == 0 ) d->grant_table->gt_version = 1; -@@ -4616,7 +4616,7 @@ int xenmem_add_to_physmap_one( +@@ -4609,7 +4609,7 @@ int xenmem_add_to_physmap_one( mfn = virt_to_mfn(d->grant_table->shared_raw[idx]); } @@ -131,8 +131,8 @@ break; case XENMAPSPACE_gmfn_range: case XENMAPSPACE_gmfn: ---- sle12sp1.orig/xen/common/grant_table.c 2015-07-08 13:49:46.000000000 +0200 -+++ sle12sp1/xen/common/grant_table.c 2015-07-08 13:49:47.000000000 +0200 +--- a/xen/common/grant_table.c ++++ b/xen/common/grant_table.c @@ -196,7 +196,7 @@ active_entry_acquire(struct grant_table { struct active_grant_entry *act; @@ -699,8 +699,8 @@ if ( first ) printk("grant-table for remote domain:%5d ... " ---- sle12sp1.orig/xen/include/xen/grant_table.h 2015-07-08 13:49:46.000000000 +0200 -+++ sle12sp1/xen/include/xen/grant_table.h 2015-07-08 13:49:47.000000000 +0200 +--- a/xen/include/xen/grant_table.h ++++ b/xen/include/xen/grant_table.h @@ -64,6 +64,11 @@ struct grant_mapping { /* Per-domain grant information. */ ++++++ 557ffab8-evtchn-factor-out-freeing-an-event-channel.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -9,8 +9,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> ---- sle12sp1.orig/xen/common/event_channel.c 2015-07-08 12:33:47.000000000 +0200 -+++ sle12sp1/xen/common/event_channel.c 2015-07-08 13:53:49.000000000 +0200 +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c @@ -194,6 +194,17 @@ static int get_free_port(struct domain * return port; } @@ -29,7 +29,7 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc) { -@@ -571,14 +582,7 @@ static long __evtchn_close(struct domain +@@ -568,14 +579,7 @@ static long __evtchn_close(struct domain BUG(); } ++++++ 5582bf43-evtchn-simplify-port_is_valid.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -13,10 +13,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> -Index: xen-4.5.1-testing/xen/common/event_channel.c -=================================================================== ---- xen-4.5.1-testing.orig/xen/common/event_channel.c -+++ xen-4.5.1-testing/xen/common/event_channel.c +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c @@ -191,6 +191,8 @@ static int get_free_port(struct domain * return -ENOMEM; bucket_from_port(d, port) = chn; @@ -26,7 +24,7 @@ return port; } -@@ -1267,6 +1269,7 @@ int evtchn_init(struct domain *d) +@@ -1264,6 +1266,7 @@ int evtchn_init(struct domain *d) d->evtchn = alloc_evtchn_bucket(d, 0); if ( !d->evtchn ) return -ENOMEM; @@ -34,10 +32,8 @@ spin_lock_init(&d->event_lock); if ( get_free_port(d) != 0 ) -Index: xen-4.5.1-testing/xen/include/xen/event.h -=================================================================== ---- xen-4.5.1-testing.orig/xen/include/xen/event.h -+++ xen-4.5.1-testing/xen/include/xen/event.h +--- a/xen/include/xen/event.h ++++ b/xen/include/xen/event.h @@ -90,11 +90,7 @@ static inline bool_t port_is_valid(struc { if ( p >= d->max_evtchns ) @@ -51,11 +47,9 @@ } static inline struct evtchn *evtchn_from_port(struct domain *d, unsigned int p) -Index: xen-4.5.1-testing/xen/include/xen/sched.h -=================================================================== ---- xen-4.5.1-testing.orig/xen/include/xen/sched.h -+++ xen-4.5.1-testing/xen/include/xen/sched.h -@@ -335,8 +335,9 @@ struct domain +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h +@@ -339,8 +339,9 @@ struct domain /* Event channel information. */ struct evtchn *evtchn; /* first bucket only */ struct evtchn **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */ ++++++ 5582bf81-evtchn-remove-the-locking-when-unmasking-an-event-channel.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -9,9 +9,9 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> ---- sle12sp1.orig/xen/common/event_channel.c 2015-07-08 13:53:50.000000000 +0200 -+++ sle12sp1/xen/common/event_channel.c 2015-07-08 13:54:42.000000000 +0200 -@@ -934,8 +934,6 @@ int evtchn_unmask(unsigned int port) +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c +@@ -931,8 +931,6 @@ int evtchn_unmask(unsigned int port) struct domain *d = current->domain; struct evtchn *evtchn; @@ -20,7 +20,7 @@ if ( unlikely(!port_is_valid(d, port)) ) return -EINVAL; -@@ -1102,9 +1100,7 @@ long do_event_channel_op(int cmd, XEN_GU +@@ -1099,9 +1097,7 @@ long do_event_channel_op(int cmd, XEN_GU struct evtchn_unmask unmask; if ( copy_from_guest(&unmask, arg, 1) != 0 ) return -EFAULT; ++++++ 5583d9c5-x86-MSI-X-cleanup.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -1,3 +1,5 @@ +References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 + # Commit 236e13ce60e1c0eb0535ad258e74a3789bc0d074 # Date 2015-06-19 10:58:45 +0200 # Author Jan Beulich <jbeulich@suse.com> @@ -28,7 +30,7 @@ /* bitmap indicate which fixed map is free */ static DEFINE_SPINLOCK(msix_fixmap_lock); static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES); -@@ -129,12 +131,14 @@ void msi_compose_msg(unsigned vector, const cpumask_t *cpu_mask, struct msi_msg +@@ -129,12 +131,14 @@ void msi_compose_msg(unsigned vector, co unsigned dest; memset(msg, 0, sizeof(*msg)); @@ -45,7 +47,7 @@ cpumask_t *mask = this_cpu(scratch_mask); cpumask_and(mask, cpu_mask, &cpu_online_map); -@@ -195,8 +199,7 @@ static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) +@@ -195,8 +199,7 @@ static void read_msi_msg(struct msi_desc } case PCI_CAP_ID_MSIX: { @@ -55,7 +57,7 @@ msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); -@@ -257,8 +260,7 @@ static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) +@@ -257,8 +260,7 @@ static int write_msi_msg(struct msi_desc } case PCI_CAP_ID_MSIX: { @@ -65,7 +67,7 @@ writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); -@@ -281,7 +283,7 @@ void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask) +@@ -281,7 +283,7 @@ void set_msi_affinity(struct irq_desc *d struct msi_desc *msi_desc = desc->msi_desc; dest = set_desc_affinity(desc, mask); @@ -74,7 +76,7 @@ return; ASSERT(spin_is_locked(&desc->lock)); -@@ -332,11 +334,11 @@ static void msix_set_enable(struct pci_dev *dev, int enable) +@@ -332,11 +334,11 @@ static void msix_set_enable(struct pci_d pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX); if ( pos ) { @@ -88,7 +90,7 @@ } } -@@ -353,9 +355,11 @@ static void msi_set_mask_bit(struct irq_desc *desc, int flag) +@@ -353,9 +355,11 @@ static void msi_set_mask_bit(struct irq_ ASSERT(spin_is_locked(&desc->lock)); BUG_ON(!entry || !entry->dev); @@ -102,7 +104,7 @@ u32 mask_bits; u16 seg = entry->dev->seg; u8 bus = entry->dev->bus; -@@ -701,13 +705,14 @@ static u64 read_pci_mem_bar(u16 seg, u8 bus, u8 slot, u8 func, u8 bir, int vf) +@@ -701,13 +705,14 @@ static u64 read_pci_mem_bar(u16 seg, u8 * requested MSI-X entries with allocated irqs or non-zero for otherwise. **/ static int msix_capability_init(struct pci_dev *dev, @@ -118,7 +120,7 @@ u16 control; u64 table_paddr; u32 table_offset; -@@ -719,7 +724,6 @@ static int msix_capability_init(struct pci_dev *dev, +@@ -719,7 +724,6 @@ static int msix_capability_init(struct p ASSERT(spin_is_locked(&pcidevs_lock)); @@ -126,7 +128,7 @@ control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos)); msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */ -@@ -884,10 +888,9 @@ static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) +@@ -884,10 +888,9 @@ static int __pci_enable_msi(struct msi_i old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI); if ( old_desc ) { @@ -140,7 +142,7 @@ *desc = old_desc; return 0; } -@@ -895,10 +898,10 @@ static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) +@@ -895,10 +898,10 @@ static int __pci_enable_msi(struct msi_i old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX); if ( old_desc ) { @@ -155,7 +157,7 @@ } return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr); -@@ -912,7 +915,6 @@ static void __pci_disable_msi(struct msi_desc *entry) +@@ -912,7 +915,6 @@ static void __pci_disable_msi(struct msi msi_set_enable(dev, 0); BUG_ON(list_empty(&dev->msi_list)); @@ -163,7 +165,7 @@ } /** -@@ -932,7 +934,7 @@ static void __pci_disable_msi(struct msi_desc *entry) +@@ -932,7 +934,7 @@ static void __pci_disable_msi(struct msi **/ static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc) { @@ -172,7 +174,7 @@ struct pci_dev *pdev; u16 control; u8 slot = PCI_SLOT(msi->devfn); -@@ -941,23 +943,22 @@ static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc) +@@ -941,23 +943,22 @@ static int __pci_enable_msix(struct msi_ ASSERT(spin_is_locked(&pcidevs_lock)); pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); @@ -202,7 +204,7 @@ *desc = old_desc; return 0; } -@@ -965,15 +966,13 @@ static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc) +@@ -965,15 +966,13 @@ static int __pci_enable_msix(struct msi_ old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI); if ( old_desc ) { @@ -223,7 +225,7 @@ } static void _pci_cleanup_msix(struct arch_msix *msix) -@@ -991,19 +990,16 @@ static void _pci_cleanup_msix(struct arch_msix *msix) +@@ -991,19 +990,16 @@ static void _pci_cleanup_msix(struct arc static void __pci_disable_msix(struct msi_desc *entry) { @@ -252,7 +254,7 @@ msix_set_enable(dev, 0); BUG_ON(list_empty(&dev->msi_list)); -@@ -1045,7 +1041,7 @@ int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off) +@@ -1045,7 +1041,7 @@ int pci_prepare_msix(u16 seg, u8 bus, u8 u16 control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos)); @@ -261,7 +263,7 @@ multi_msix_capable(control)); } spin_unlock(&pcidevs_lock); -@@ -1064,8 +1060,8 @@ int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) +@@ -1064,8 +1060,8 @@ int pci_enable_msi(struct msi_info *msi, if ( !use_msi ) return -EPERM; @@ -272,7 +274,7 @@ } /* -@@ -1115,7 +1111,9 @@ int pci_restore_msi_state(struct pci_dev *pdev) +@@ -1115,7 +1111,9 @@ int pci_restore_msi_state(struct pci_dev if ( !pdev ) return -EINVAL; ++++++ 5583da09-x86-MSI-track-host-and-guest-masking-separately.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -1,3 +1,5 @@ +References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 + # Commit ad28e42bd1d28d746988ed71654e8aa670629753 # Date 2015-06-19 10:59:53 +0200 # Author Jan Beulich <jbeulich@suse.com> @@ -11,8 +13,28 @@ Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> ---- sle12sp1.orig/xen/arch/x86/hpet.c 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/arch/x86/hpet.c 2015-07-08 00:00:00.000000000 +0200 +# Commit 84d6add5593d865736831d150da7c38588f669f6 +# Date 2015-07-10 12:36:24 +0200 +# Author Jan Beulich <jbeulich@suse.com> +# Committer Jan Beulich <jbeulich@suse.com> +x86/MSI: fix guest unmasking when handling IRQ via event channel + +Rather than assuming only PV guests need special treatment (and +dealing with that directly when an IRQ gets set up), keep all guest MSI +IRQs masked until either the (HVM) guest unmasks them via vMSI or the +(PV, PVHVM, or PVH) guest sets up an event channel for it. + +To not further clutter the common evtchn_bind_pirq() with x86-specific +code, introduce an arch_evtchn_bind_pirq() hook instead. + +Reported-by: Sander Eikelenboom <linux@eikelenboom.it> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Tested-by: Sander Eikelenboom <linux@eikelenboom.it> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> +Acked-by: Ian Campbell <ian.campbell@citrix.com> + +--- a/xen/arch/x86/hpet.c ++++ b/xen/arch/x86/hpet.c @@ -240,7 +240,7 @@ static void hpet_msi_unmask(struct irq_d cfg = hpet_read32(HPET_Tn_CFG(ch->idx)); cfg |= HPET_TN_ENABLE; @@ -31,8 +53,8 @@ } static int hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg) ---- sle12sp1.orig/xen/arch/x86/hvm/vmsi.c 2015-07-08 00:00:00.000000000 +0200 -+++ sle12sp1/xen/arch/x86/hvm/vmsi.c 2015-07-08 00:00:00.000000000 +0200 +--- a/xen/arch/x86/hvm/vmsi.c ++++ b/xen/arch/x86/hvm/vmsi.c @@ -219,7 +219,6 @@ static int msixtbl_read( { unsigned long offset; @@ -113,8 +135,36 @@ unlock: spin_unlock_irqrestore(&desc->lock, flags); ---- sle12sp1.orig/xen/arch/x86/msi.c 2015-07-08 00:00:00.000000000 +0200 -+++ sle12sp1/xen/arch/x86/msi.c 2015-07-08 00:00:00.000000000 +0200 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2502,6 +2502,25 @@ int unmap_domain_pirq_emuirq(struct doma + return ret; + } + ++void arch_evtchn_bind_pirq(struct domain *d, int pirq) ++{ ++ int irq = domain_pirq_to_irq(d, pirq); ++ struct irq_desc *desc; ++ unsigned long flags; ++ ++ if ( irq <= 0 ) ++ return; ++ ++ if ( is_hvm_domain(d) ) ++ map_domain_emuirq_pirq(d, pirq, IRQ_PT); ++ ++ desc = irq_to_desc(irq); ++ spin_lock_irqsave(&desc->lock, flags); ++ if ( desc->msi_desc ) ++ guest_mask_msi_irq(desc, 0); ++ spin_unlock_irqrestore(&desc->lock, flags); ++} ++ + bool_t hvm_domain_use_pirq(const struct domain *d, const struct pirq *pirq) + { + return is_hvm_domain(d) && pirq && +--- a/xen/arch/x86/msi.c ++++ b/xen/arch/x86/msi.c @@ -349,9 +349,10 @@ int msi_maskable_irq(const struct msi_de || entry->msi_attrib.maskbit; } @@ -137,7 +187,7 @@ } static int msi_get_mask_bit(const struct msi_desc *entry) -@@ -405,20 +407,33 @@ static int msi_get_mask_bit(const struct +@@ -405,20 +407,30 @@ static int msi_get_mask_bit(const struct void mask_msi_irq(struct irq_desc *desc) { @@ -159,10 +209,7 @@ static unsigned int startup_msi_irq(struct irq_desc *desc) { - unmask_msi_irq(desc); -+ bool_t guest_masked = (desc->status & IRQ_GUEST) && -+ is_hvm_domain(desc->msi_desc->dev->domain); -+ -+ msi_set_mask_bit(desc, 0, guest_masked); ++ msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST)); return 0; } @@ -174,7 +221,7 @@ void ack_nonmaskable_msi_irq(struct irq_desc *desc) { irq_complete_move(desc); -@@ -443,7 +458,7 @@ void end_nonmaskable_msi_irq(struct irq_ +@@ -443,7 +455,7 @@ void end_nonmaskable_msi_irq(struct irq_ static hw_irq_controller pci_msi_maskable = { .typename = "PCI-MSI/-X", .startup = startup_msi_irq, @@ -183,7 +230,7 @@ .enable = unmask_msi_irq, .disable = mask_msi_irq, .ack = ack_maskable_msi_irq, -@@ -591,7 +606,8 @@ static int msi_capability_init(struct pc +@@ -591,7 +603,8 @@ static int msi_capability_init(struct pc entry[i].msi_attrib.is_64 = is_64bit_address(control); entry[i].msi_attrib.entry_nr = i; entry[i].msi_attrib.maskbit = is_mask_bit_support(control); @@ -193,7 +240,7 @@ entry[i].msi_attrib.pos = pos; if ( entry[i].msi_attrib.maskbit ) entry[i].msi.mpos = mpos; -@@ -817,7 +833,8 @@ static int msix_capability_init(struct p +@@ -817,7 +830,8 @@ static int msix_capability_init(struct p entry->msi_attrib.is_64 = 1; entry->msi_attrib.entry_nr = msi->entry_nr; entry->msi_attrib.maskbit = 1; @@ -203,7 +250,7 @@ entry->msi_attrib.pos = pos; entry->irq = msi->irq; entry->dev = dev; -@@ -1152,7 +1169,8 @@ int pci_restore_msi_state(struct pci_dev +@@ -1152,7 +1166,8 @@ int pci_restore_msi_state(struct pci_dev for ( i = 0; ; ) { @@ -213,7 +260,7 @@ if ( !--nr ) break; -@@ -1304,7 +1322,7 @@ static void dump_msi(unsigned char key) +@@ -1304,7 +1319,7 @@ static void dump_msi(unsigned char key) else mask = '?'; printk(" %-6s%4u vec=%02x%7s%6s%3sassert%5s%7s" @@ -222,7 +269,7 @@ type, irq, (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT, data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed", -@@ -1312,7 +1330,10 @@ static void dump_msi(unsigned char key) +@@ -1312,7 +1327,10 @@ static void dump_msi(unsigned char key) data & MSI_DATA_LEVEL_ASSERT ? "" : "de", addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys", addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "cpu", @@ -234,8 +281,22 @@ } } ---- sle12sp1.orig/xen/drivers/passthrough/amd/iommu_init.c 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/drivers/passthrough/amd/iommu_init.c 2015-07-08 00:00:00.000000000 +0200 +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c +@@ -445,10 +445,7 @@ static long evtchn_bind_pirq(evtchn_bind + + bind->port = port; + +-#ifdef CONFIG_X86 +- if ( is_hvm_domain(d) && domain_pirq_to_irq(d, pirq) > 0 ) +- map_domain_emuirq_pirq(d, pirq, IRQ_PT); +-#endif ++ arch_evtchn_bind_pirq(d, pirq); + + out: + spin_unlock(&d->event_lock); +--- a/xen/drivers/passthrough/amd/iommu_init.c ++++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -451,7 +451,7 @@ static void iommu_msi_unmask(struct irq_ spin_lock_irqsave(&iommu->lock, flags); amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED); @@ -254,8 +315,8 @@ } static unsigned int iommu_msi_startup(struct irq_desc *desc) ---- sle12sp1.orig/xen/drivers/passthrough/vtd/iommu.c 2015-05-19 23:16:48.000000000 +0200 -+++ sle12sp1/xen/drivers/passthrough/vtd/iommu.c 2015-07-08 00:00:00.000000000 +0200 +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c @@ -996,7 +996,7 @@ static void dma_msi_unmask(struct irq_de spin_lock_irqsave(&iommu->register_lock, flags); dmar_writel(iommu->reg, DMAR_FECTL_REG, 0); @@ -274,8 +335,19 @@ } static unsigned int dma_msi_startup(struct irq_desc *desc) ---- sle12sp1.orig/xen/include/asm-x86/msi.h 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/include/asm-x86/msi.h 2015-07-08 00:00:00.000000000 +0200 +--- a/xen/include/asm-arm/irq.h ++++ b/xen/include/asm-arm/irq.h +@@ -44,6 +44,8 @@ int route_irq_to_guest(struct domain *d, + const char *devname); + void arch_move_irqs(struct vcpu *v); + ++#define arch_evtchn_bind_pirq(d, pirq) ((void)((d) + (pirq))) ++ + /* Set IRQ type for an SPI */ + int irq_set_spi_type(unsigned int spi, unsigned int type); + +--- a/xen/include/asm-x86/msi.h ++++ b/xen/include/asm-x86/msi.h @@ -90,12 +90,13 @@ extern unsigned int pci_msix_get_table_l struct msi_desc { @@ -303,3 +375,14 @@ void ack_nonmaskable_msi_irq(struct irq_desc *); void end_nonmaskable_msi_irq(struct irq_desc *, u8 vector); void set_msi_affinity(struct irq_desc *, const cpumask_t *); +--- a/xen/include/xen/irq.h ++++ b/xen/include/xen/irq.h +@@ -172,4 +172,8 @@ unsigned int set_desc_affinity(struct ir + unsigned int arch_hwdom_irqs(domid_t); + #endif + ++#ifndef arch_evtchn_bind_pirq ++void arch_evtchn_bind_pirq(struct domain *, int pirq); ++#endif ++ + #endif /* __XEN_IRQ_H__ */ ++++++ 5583da64-gnttab-use-per-VCPU-maptrack-free-lists.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -25,8 +25,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- sle12sp1.orig/xen/common/domain.c 2015-07-08 00:00:00.000000000 +0200 -+++ sle12sp1/xen/common/domain.c 2015-07-08 13:52:23.000000000 +0200 +--- a/xen/common/domain.c ++++ b/xen/common/domain.c @@ -126,6 +126,8 @@ struct vcpu *alloc_vcpu( tasklet_init(&v->continue_hypercall_tasklet, NULL, 0); @@ -36,8 +36,8 @@ if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) || !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) || !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) || ---- sle12sp1.orig/xen/common/grant_table.c 2015-07-08 13:49:47.000000000 +0200 -+++ sle12sp1/xen/common/grant_table.c 2015-07-08 13:52:23.000000000 +0200 +--- a/xen/common/grant_table.c ++++ b/xen/common/grant_table.c @@ -37,6 +37,7 @@ #include <xen/iommu.h> #include <xen/paging.h> @@ -239,8 +239,8 @@ static void gnttab_usage_print(struct domain *rd) { int first = 1; ---- sle12sp1.orig/xen/include/xen/grant_table.h 2015-07-08 13:49:47.000000000 +0200 -+++ sle12sp1/xen/include/xen/grant_table.h 2015-07-08 13:52:23.000000000 +0200 +--- a/xen/include/xen/grant_table.h ++++ b/xen/include/xen/grant_table.h @@ -60,6 +60,8 @@ struct grant_mapping { u32 ref; /* grant ref */ u16 flags; /* 0-4: GNTMAP_* ; 5-15: unused */ @@ -269,8 +269,8 @@ /* Domain death release of granted mappings of other domains' memory. */ void ---- sle12sp1.orig/xen/include/xen/sched.h 2015-01-14 18:44:18.000000000 +0100 -+++ sle12sp1/xen/include/xen/sched.h 2015-07-08 13:52:23.000000000 +0200 +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h @@ -219,6 +219,10 @@ struct vcpu /* VCPU paused by system controller. */ int controller_pause_count; ++++++ 5583da8c-gnttab-steal-maptrack-entries-from-other-VCPUs.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -22,8 +22,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- sle12sp1.orig/xen/common/grant_table.c 2015-07-08 13:52:23.000000000 +0200 -+++ sle12sp1/xen/common/grant_table.c 2015-07-08 13:52:31.000000000 +0200 +--- a/xen/common/grant_table.c ++++ b/xen/common/grant_table.c @@ -283,26 +283,70 @@ __get_maptrack_handle( struct grant_table *t, struct vcpu *v) ++++++ 5587d711-evtchn-clear-xen_consumer-when-clearing-state.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -19,8 +19,8 @@ Signed-off-by: Jan Beulich <jbeulich@suse.com> ---- sle12sp1.orig/xen/common/event_channel.c 2015-07-08 13:54:42.000000000 +0200 -+++ sle12sp1/xen/common/event_channel.c 2015-07-08 13:57:44.000000000 +0200 +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c @@ -204,6 +204,7 @@ static void free_evtchn(struct domain *d /* Reset binding to vcpu0 when the channel is freed. */ chn->state = ECS_FREE; @@ -29,7 +29,7 @@ xsm_evtchn_close_post(chn); } -@@ -470,7 +471,7 @@ static long evtchn_bind_pirq(evtchn_bind +@@ -467,7 +468,7 @@ static long evtchn_bind_pirq(evtchn_bind } @@ -38,7 +38,7 @@ { struct domain *d2 = NULL; struct vcpu *v; -@@ -490,7 +491,7 @@ static long __evtchn_close(struct domain +@@ -487,7 +488,7 @@ static long __evtchn_close(struct domain chn1 = evtchn_from_port(d1, port1); /* Guest cannot close a Xen-attached event channel. */ @@ -47,7 +47,7 @@ { rc = -EINVAL; goto out; -@@ -599,12 +600,6 @@ static long __evtchn_close(struct domain +@@ -596,12 +597,6 @@ static long __evtchn_close(struct domain return rc; } @@ -60,7 +60,7 @@ int evtchn_send(struct domain *d, unsigned int lport) { struct evtchn *lchn, *rchn; -@@ -959,7 +954,7 @@ static long evtchn_reset(evtchn_reset_t +@@ -956,7 +951,7 @@ static long evtchn_reset(evtchn_reset_t goto out; for ( i = 0; port_is_valid(d, i); i++ ) @@ -69,7 +69,7 @@ spin_lock(&d->event_lock); -@@ -1066,7 +1061,7 @@ long do_event_channel_op(int cmd, XEN_GU +@@ -1063,7 +1058,7 @@ long do_event_channel_op(int cmd, XEN_GU struct evtchn_close close; if ( copy_from_guest(&close, arg, 1) != 0 ) return -EFAULT; @@ -78,7 +78,7 @@ break; } -@@ -1196,11 +1191,10 @@ void free_xen_event_channel( +@@ -1193,11 +1188,10 @@ void free_xen_event_channel( BUG_ON(!port_is_valid(d, port)); chn = evtchn_from_port(d, port); BUG_ON(!consumer_is_xen(chn)); @@ -91,7 +91,7 @@ } -@@ -1299,10 +1293,7 @@ void evtchn_destroy(struct domain *d) +@@ -1296,10 +1290,7 @@ void evtchn_destroy(struct domain *d) /* Close all existing event channels. */ for ( i = 0; port_is_valid(d, i); i++ ) ++++++ 5587d779-evtchn-defer-freeing-struct-evtchn-s-until-evtchn_destroy_final.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -14,9 +14,9 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> ---- sle12sp1.orig/xen/common/event_channel.c 2015-07-08 13:57:44.000000000 +0200 -+++ sle12sp1/xen/common/event_channel.c 2015-07-08 14:00:53.000000000 +0200 -@@ -1177,22 +1177,8 @@ int alloc_unbound_xen_event_channel( +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c +@@ -1174,22 +1174,8 @@ int alloc_unbound_xen_event_channel( void free_xen_event_channel( struct vcpu *local_vcpu, int port) { @@ -39,7 +39,7 @@ evtchn_close(d, port, 0); } -@@ -1206,18 +1192,12 @@ void notify_via_xen_event_channel(struct +@@ -1203,18 +1189,12 @@ void notify_via_xen_event_channel(struct spin_lock(&ld->event_lock); @@ -59,7 +59,7 @@ rd = lchn->u.interdomain.remote_dom; rport = lchn->u.interdomain.remote_port; rchn = evtchn_from_port(rd, rport); -@@ -1285,7 +1265,7 @@ int evtchn_init(struct domain *d) +@@ -1282,7 +1262,7 @@ int evtchn_init(struct domain *d) void evtchn_destroy(struct domain *d) { @@ -68,7 +68,7 @@ /* After this barrier no new event-channel allocations can occur. */ BUG_ON(!d->is_dying); -@@ -1295,8 +1275,17 @@ void evtchn_destroy(struct domain *d) +@@ -1292,8 +1272,17 @@ void evtchn_destroy(struct domain *d) for ( i = 0; port_is_valid(d, i); i++ ) evtchn_close(d, i, 0); @@ -87,7 +87,7 @@ for ( i = 0; i < NR_EVTCHN_GROUPS; i++ ) { if ( !d->evtchn_group[i] ) -@@ -1304,20 +1293,9 @@ void evtchn_destroy(struct domain *d) +@@ -1301,20 +1290,9 @@ void evtchn_destroy(struct domain *d) for ( j = 0; j < BUCKETS_PER_GROUP; j++ ) free_evtchn_bucket(d, d->evtchn_group[i][j]); xfree(d->evtchn_group[i]); ++++++ 5587d7b7-evtchn-use-a-per-event-channel-lock-for-sending-events.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -22,8 +22,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- sle12sp1.orig/xen/common/event_channel.c 2015-07-08 14:00:53.000000000 +0200 -+++ sle12sp1/xen/common/event_channel.c 2015-07-08 14:04:08.000000000 +0200 +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c @@ -141,6 +141,7 @@ static struct evtchn *alloc_evtchn_bucke return NULL; } @@ -143,8 +143,8 @@ + bind->port = port; - #ifdef CONFIG_X86 -@@ -577,15 +622,24 @@ static long evtchn_close(struct domain * + arch_evtchn_bind_pirq(d, pirq); +@@ -574,15 +619,24 @@ static long evtchn_close(struct domain * BUG_ON(chn2->state != ECS_INTERDOMAIN); BUG_ON(chn2->u.interdomain.remote_dom != d1); @@ -170,7 +170,7 @@ out: if ( d2 != NULL ) -@@ -607,21 +661,18 @@ int evtchn_send(struct domain *d, unsign +@@ -604,21 +658,18 @@ int evtchn_send(struct domain *d, unsign struct vcpu *rvcpu; int rport, ret = 0; @@ -197,7 +197,7 @@ } ret = xsm_evtchn_send(XSM_HOOK, ld, lchn); -@@ -651,7 +702,7 @@ int evtchn_send(struct domain *d, unsign +@@ -648,7 +699,7 @@ int evtchn_send(struct domain *d, unsign } out: @@ -206,7 +206,7 @@ return ret; } -@@ -1162,11 +1213,15 @@ int alloc_unbound_xen_event_channel( +@@ -1159,11 +1210,15 @@ int alloc_unbound_xen_event_channel( if ( rc ) goto out; @@ -222,7 +222,7 @@ out: spin_unlock(&d->event_lock); -@@ -1190,11 +1245,11 @@ void notify_via_xen_event_channel(struct +@@ -1187,11 +1242,11 @@ void notify_via_xen_event_channel(struct struct domain *rd; int rport; @@ -236,7 +236,7 @@ if ( likely(lchn->state == ECS_INTERDOMAIN) ) { ASSERT(consumer_is_xen(lchn)); -@@ -1204,7 +1259,7 @@ void notify_via_xen_event_channel(struct +@@ -1201,7 +1256,7 @@ void notify_via_xen_event_channel(struct evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport); } @@ -245,8 +245,8 @@ } void evtchn_check_pollers(struct domain *d, unsigned int port) ---- sle12sp1.orig/xen/include/xen/sched.h 2015-07-08 13:53:50.000000000 +0200 -+++ sle12sp1/xen/include/xen/sched.h 2015-07-08 14:04:08.000000000 +0200 +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h @@ -79,6 +79,7 @@ extern domid_t hardware_domid; struct evtchn ++++++ 5587d7e2-evtchn-pad-struct-evtchn-to-64-bytes.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -14,8 +14,8 @@ Signed-off-by: David Vrabel <david.vrabel@citrix.com> Acked-by: Jan Beulich <jbeulich@suse.com> ---- sle12sp1.orig/xen/include/xen/sched.h 2015-07-08 14:04:08.000000000 +0200 -+++ sle12sp1/xen/include/xen/sched.h 2015-07-08 14:04:21.000000000 +0200 +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h @@ -129,7 +129,7 @@ struct evtchn #endif } ssid; ++++++ 558bfaa0-x86-traps-avoid-using-current-too-early.patch ++++++ --- /var/tmp/diff_new_pack.wPkcl5/_old 2015-08-31 22:57:48.000000000 +0200 +++ /var/tmp/diff_new_pack.wPkcl5/_new 2015-08-31 22:57:48.000000000 +0200 @@ -12,7 +12,7 @@ --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c -@@ -84,7 +84,7 @@ void show_registers(const struct cpu_use +@@ -86,7 +86,7 @@ void show_registers(const struct cpu_use struct cpu_user_regs fault_regs = *regs; unsigned long fault_crs[8]; enum context context; ++++++ 559bc633-x86-cpupool-clear-proper-cpu_valid-bit-on-CPU-teardown.patch ++++++ # Commit 8022b05284dea80e24813d03180788ec7277a0bd # Date 2015-07-07 14:29:39 +0200 # Author Dario Faggioli <dario.faggioli@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86 / cpupool: clear the proper cpu_valid bit on pCPU teardown In fact, when a pCPU goes down, we want to clear its bit in the correct cpupool's valid mask, rather than always in cpupool0's one. Before this commit, all the pCPUs in the non-default pool(s) will be considered immediately valid, during system resume, even the one that have not been brought up yet. As a result, the (Credit1) scheduler will attempt to run its load balancing logic on them, causing the following Oops: # xl cpupool-cpu-remove Pool-0 8-15 # xl cpupool-create name=\"Pool-1\" # xl cpupool-cpu-add Pool-1 8-15 --> suspend --> resume (XEN) ----[ Xen-4.6-unstable x86_64 debug=y Tainted: C ]---- (XEN) CPU: 8 (XEN) RIP: e008:[<ffff82d080123078>] csched_schedule+0x4be/0xb97 (XEN) RFLAGS: 0000000000010087 CONTEXT: hypervisor (XEN) rax: 80007d2f7fccb780 rbx: 0000000000000009 rcx: 0000000000000000 (XEN) rdx: ffff82d08031ed40 rsi: ffff82d080334980 rdi: 0000000000000000 (XEN) rbp: ffff83010000fe20 rsp: ffff83010000fd40 r8: 0000000000000004 (XEN) r9: 0000ffff0000ffff r10: 00ff00ff00ff00ff r11: 0f0f0f0f0f0f0f0f (XEN) r12: ffff8303191ea870 r13: ffff8303226aadf0 r14: 0000000000000009 (XEN) r15: 0000000000000008 cr0: 000000008005003b cr4: 00000000000026f0 (XEN) cr3: 00000000dba9d000 cr2: 0000000000000000 (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs: e008 (XEN) ... ... ... (XEN) Xen call trace: (XEN) [<ffff82d080123078>] csched_schedule+0x4be/0xb97 (XEN) [<ffff82d08012c732>] schedule+0x12a/0x63c (XEN) [<ffff82d08012f8c8>] __do_softirq+0x82/0x8d (XEN) [<ffff82d08012f920>] do_softirq+0x13/0x15 (XEN) [<ffff82d080164791>] idle_loop+0x5b/0x6b (XEN) (XEN) **************************************** (XEN) Panic on CPU 8: (XEN) GENERAL PROTECTION FAULT (XEN) [error_code=0000] (XEN) **************************************** The reason why the error is a #GP fault is that, without this commit, we try to access the per-cpu area of a not yet allocated and initialized pCPU. In fact, %rax, which is what is used as pointer, is 80007d2f7fccb780, and we also have this: #define INVALID_PERCPU_AREA (0x8000000000000000L - (long)__per_cpu_start) Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com> Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> Acked-by: Juergen Gross <jgross@suse.com> --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -816,7 +816,6 @@ void __cpu_disable(void) remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ - cpumask_clear_cpu(cpu, cpupool0->cpu_valid); cpumask_clear_cpu(cpu, &cpu_online_map); fixup_irqs(); --- a/xen/common/cpupool.c +++ b/xen/common/cpupool.c @@ -529,6 +529,7 @@ static int cpupool_cpu_remove(unsigned i if ( cpumask_test_cpu(cpu, (*c)->cpu_valid ) ) { cpumask_set_cpu(cpu, (*c)->cpu_suspended); + cpumask_clear_cpu(cpu, (*c)->cpu_valid); break; } } @@ -551,6 +552,7 @@ static int cpupool_cpu_remove(unsigned i * If we are not suspending, we are hot-unplugging cpu, and that is * allowed only for CPUs in pool0. */ + cpumask_clear_cpu(cpu, cpupool0->cpu_valid); ret = 0; } ++++++ 559bc64e-credit1-properly-deal-with-CPUs-not-in-any-pool.patch ++++++ # Commit 02ea5031825d984d52eb9a982b8457e3434137f0 # Date 2015-07-07 14:30:06 +0200 # Author Dario Faggioli <dario.faggioli@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> credit1: properly deal with pCPUs not in any cpupool Ideally, the pCPUs that are 'free', i.e., not assigned to any cpupool, should not be considred by the scheduler for load balancing or anything. In Credit1, we fail at this, because of how we use cpupool_scheduler_cpumask(). In fact, for a free pCPU, cpupool_scheduler_cpumask() returns a pointer to cpupool_free_cpus, and hence, near the top of csched_load_balance(): if ( unlikely(!cpumask_test_cpu(cpu, online)) ) goto out; is false (the pCPU _is_ free!), and we therefore do not jump to the end right away, as we should. This, causes the following splat when resuming from ACPI S3 with pCPUs not assigned to any pool: (XEN) ----[ Xen-4.6-unstable x86_64 debug=y Tainted: C ]---- (XEN) ... ... ... (XEN) Xen call trace: (XEN) [<ffff82d080122eaa>] csched_load_balance+0x213/0x794 (XEN) [<ffff82d08012374c>] csched_schedule+0x321/0x452 (XEN) [<ffff82d08012c85e>] schedule+0x12a/0x63c (XEN) [<ffff82d08012fa09>] __do_softirq+0x82/0x8d (XEN) [<ffff82d08012fa61>] do_softirq+0x13/0x15 (XEN) [<ffff82d080164780>] idle_loop+0x5b/0x6b (XEN) (XEN) (XEN) **************************************** (XEN) Panic on CPU 8: (XEN) GENERAL PROTECTION FAULT (XEN) [error_code=0000] (XEN) **************************************** The cure is: * use cpupool_online_cpumask(), as a better guard to the case when the cpu is being offlined; * explicitly check whether the cpu is free. SEDF is in a similar situation, so fix it too. Still in Credit1, we must make sure that free (or offline) CPUs are not considered "ticklable". Not doing so would impair the load balancing algorithm, making the scheduler think that it is possible to 'ask' the pCPU to pick up some work, while in reallity, that will never happen! Evidence of such behavior is shown in this trace: Name CPU list Pool-0 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 0.112998198 | ||.|| -|x||-|- d0v0 runstate_change d0v4 offline->runnable ] 0.112998198 | ||.|| -|x||-|- d0v0 22006(2:2:6) 1 [ f ] ] 0.112999612 | ||.|| -|x||-|- d0v0 28004(2:8:4) 2 [ 0 4 ] 0.113003387 | ||.|| -||||-|x d32767v15 runstate_continue d32767v15 running->running where "22006(2:2:6) 1 [ f ]" means that pCPU 15, which is free from any pool, is tickled. The cure, in this case, is to filter out the free pCPUs, within __runq_tickle(). Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com> Acked-by: Juergen Gross <jgross@suse.com> Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com> --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -350,12 +350,17 @@ __runq_tickle(unsigned int cpu, struct c { struct csched_vcpu * const cur = CSCHED_VCPU(curr_on_cpu(cpu)); struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu)); - cpumask_t mask, idle_mask; + cpumask_t mask, idle_mask, *online; int balance_step, idlers_empty; ASSERT(cur); cpumask_clear(&mask); - idlers_empty = cpumask_empty(prv->idlers); + + /* cpu is vc->processor, so it must be in a cpupool. */ + ASSERT(per_cpu(cpupool, cpu) != NULL); + online = cpupool_online_cpumask(per_cpu(cpupool, cpu)); + cpumask_and(&idle_mask, prv->idlers, online); + idlers_empty = cpumask_empty(&idle_mask); /* @@ -392,8 +397,8 @@ __runq_tickle(unsigned int cpu, struct c /* Are there idlers suitable for new (for this balance step)? */ csched_balance_cpumask(new->vcpu, balance_step, csched_balance_mask); - cpumask_and(&idle_mask, prv->idlers, csched_balance_mask); - new_idlers_empty = cpumask_empty(&idle_mask); + cpumask_and(csched_balance_mask, csched_balance_mask, &idle_mask); + new_idlers_empty = cpumask_empty(csched_balance_mask); /* * Let's not be too harsh! If there aren't idlers suitable @@ -1494,6 +1499,7 @@ static struct csched_vcpu * csched_load_balance(struct csched_private *prv, int cpu, struct csched_vcpu *snext, bool_t *stolen) { + struct cpupool *c = per_cpu(cpupool, cpu); struct csched_vcpu *speer; cpumask_t workers; cpumask_t *online; @@ -1501,10 +1507,13 @@ csched_load_balance(struct csched_privat int node = cpu_to_node(cpu); BUG_ON( cpu != snext->vcpu->processor ); - online = cpupool_scheduler_cpumask(per_cpu(cpupool, cpu)); + online = cpupool_online_cpumask(c); - /* If this CPU is going offline we shouldn't steal work. */ - if ( unlikely(!cpumask_test_cpu(cpu, online)) ) + /* + * If this CPU is going offline, or is not (yet) part of any cpupool + * (as it happens, e.g., during cpu bringup), we shouldn't steal work. + */ + if ( unlikely(!cpumask_test_cpu(cpu, online) || c == NULL) ) goto out; if ( snext->pri == CSCHED_PRI_IDLE ) --- a/xen/common/sched_sedf.c +++ b/xen/common/sched_sedf.c @@ -791,7 +791,8 @@ static struct task_slice sedf_do_schedul if ( tasklet_work_scheduled || (list_empty(runq) && list_empty(waitq)) || unlikely(!cpumask_test_cpu(cpu, - cpupool_scheduler_cpumask(per_cpu(cpupool, cpu)))) ) + cpupool_online_cpumask(per_cpu(cpupool, cpu))) || + per_cpu(cpupool, cpu) == NULL) ) { ret.task = IDLETASK(cpu); ret.time = SECONDS(1); ++++++ 559bc87f-x86-hvmloader-avoid-data-corruption-with-xenstore-rw.patch ++++++ # Commit bbbe7e7157a964c485fb861765be291734676932 # Date 2015-07-07 14:39:27 +0200 # Author Andrew Cooper <andrew.cooper3@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86/hvmloader: avoid data corruption with xenstore reads/writes The functions ring_read and ring_write() have logic to try and deal with partial reads and writes. However, in all cases where the "while (len)" loop executed twice, data corruption would occur as the second memcpy() starts from the beginning of "data" again, rather than from where it got to. This bug manifested itself as protocol corruption when a reply header crossed the first wrap of the response ring. However, similar corruption would also occur if hvmloader observed xenstored performing partial writes of the block in question, or if hvmloader had to wait for xenstored to make space in either ring. Reported-by: Adam Kucia <djexit@o2.pl> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- a/tools/firmware/hvmloader/xenbus.c +++ b/tools/firmware/hvmloader/xenbus.c @@ -105,7 +105,7 @@ void xenbus_shutdown(void) /* Helper functions: copy data in and out of the ring */ static void ring_write(const char *data, uint32_t len) { - uint32_t part; + uint32_t part, done = 0; ASSERT(len <= XENSTORE_PAYLOAD_MAX); @@ -122,16 +122,18 @@ static void ring_write(const char *data, if ( part > len ) part = len; - memcpy(rings->req + MASK_XENSTORE_IDX(rings->req_prod), data, part); + memcpy(rings->req + MASK_XENSTORE_IDX(rings->req_prod), + data + done, part); barrier(); /* = wmb before prod write, rmb before next cons read */ rings->req_prod += part; len -= part; + done += part; } } static void ring_read(char *data, uint32_t len) { - uint32_t part; + uint32_t part, done = 0; ASSERT(len <= XENSTORE_PAYLOAD_MAX); @@ -148,10 +150,12 @@ static void ring_read(char *data, uint32 if ( part > len ) part = len; - memcpy(data, rings->rsp + MASK_XENSTORE_IDX(rings->rsp_cons), part); + memcpy(data + done, + rings->rsp + MASK_XENSTORE_IDX(rings->rsp_cons), part); barrier(); /* = wmb before cons write, rmb before next prod read */ rings->rsp_cons += part; len -= part; + done += part; } } ++++++ 55a62eb0-xl-correct-handling-of-extra_config-in-main_cpupoolcreate.patch ++++++ Subject: xl: correct handling of extra_config in main_cpupoolcreate From: Wei Liu wei.liu2@citrix.com Tue Jul 14 17:41:10 2015 +0100 Date: Wed Jul 15 10:58:08 2015 +0100: Git: 705c9e12426cba82804cb578fc70785281655d94 Don't dereference extra_config if it's NULL. Don't leak extra_config in the end. Also fixed a typo in error string while I was there. Signed-off-by: Wei Liu <wei.liu2@citrix.com> Acked-by: Ian Jackson <ian.jackson@eu.citrix.com> Index: xen-4.5.1-testing/tools/libxl/xl_cmdimpl.c =================================================================== --- xen-4.5.1-testing.orig/tools/libxl/xl_cmdimpl.c +++ xen-4.5.1-testing/tools/libxl/xl_cmdimpl.c @@ -7085,9 +7085,9 @@ int main_cpupoolcreate(int argc, char ** else config_src="command line"; - if (strlen(extra_config)) { + if (extra_config && strlen(extra_config)) { if (config_len > INT_MAX - (strlen(extra_config) + 2)) { - fprintf(stderr, "Failed to attach extra configration\n"); + fprintf(stderr, "Failed to attach extra configuration\n"); goto out; } config_data = xrealloc(config_data, @@ -7211,6 +7211,7 @@ out_cfg: out: free(name); free(config_data); + free(extra_config); return rc; } ++++++ 55a66a1e-make-rangeset_report_ranges-report-all-ranges.patch ++++++ # Commit b1c780cd315eb4db06be3bbb5c6d80b1cabd27a9 # Date 2015-07-15 16:11:42 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> make rangeset_report_ranges() report all ranges find_range() returns NULL when s is below the lowest range, so we have to use first_range() here (which is as good performance wise), or else no range gets reported at all in that case. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Ian Campbell <ian.campbell@citrix.com> --- a/xen/common/rangeset.c +++ b/xen/common/rangeset.c @@ -289,7 +289,7 @@ int rangeset_report_ranges( read_lock(&r->lock); - for ( x = find_range(r, s); x && (x->s <= e) && !rc; x = next_range(r, x) ) + for ( x = first_range(r); x && (x->s <= e) && !rc; x = next_range(r, x) ) if ( x->e >= s ) rc = cb(max(x->s, s), min(x->e, e), ctxt); ++++++ 55a77e4f-dmar-device-scope-mem-leak-fix.patch ++++++ # Commit a8bc99b981c5ad773bd646f5986e616d26fb94d7 # Date 2015-07-16 11:50:07 +0200 # Author Elena Ufimtseva <elena.ufimtseva@oracle.com> # Committer Jan Beulich <jbeulich@suse.com> dmar: device scope mem leak fix Release memory allocated for scope.devices dmar units on various failure paths and when disabling dmar. Set device count after sucessfull memory allocation, not before, in device scope parsing function. Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> Acked-by: Yang Zhang <yang.z.zhang@intel.com> # Commit 132231d10343608faf5892785a08acc500326d04 # Date 2015-07-16 15:23:37 +0200 # Author Andrew Cooper <andrew.cooper3@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> dmar: fix double free in error paths following c/s a8bc99b Several error paths would end up freeing scope->devices twice. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/vtd/dmar.c +++ b/xen/drivers/passthrough/vtd/dmar.c @@ -80,6 +80,16 @@ static int __init acpi_register_rmrr_uni return 0; } +static void scope_devices_free(struct dmar_scope *scope) +{ + if ( !scope ) + return; + + scope->devices_cnt = 0; + xfree(scope->devices); + scope->devices = NULL; +} + static void __init disable_all_dmar_units(void) { struct acpi_drhd_unit *drhd, *_drhd; @@ -89,16 +99,19 @@ static void __init disable_all_dmar_unit list_for_each_entry_safe ( drhd, _drhd, &acpi_drhd_units, list ) { list_del(&drhd->list); + scope_devices_free(&drhd->scope); xfree(drhd); } list_for_each_entry_safe ( rmrr, _rmrr, &acpi_rmrr_units, list ) { list_del(&rmrr->list); + scope_devices_free(&rmrr->scope); xfree(rmrr); } list_for_each_entry_safe ( atsr, _atsr, &acpi_atsr_units, list ) { list_del(&atsr->list); + scope_devices_free(&atsr->scope); xfree(atsr); } } @@ -317,13 +330,13 @@ static int __init acpi_parse_dev_scope( if ( (cnt = scope_device_count(start, end)) < 0 ) return cnt; - scope->devices_cnt = cnt; if ( cnt > 0 ) { scope->devices = xzalloc_array(u16, cnt); if ( !scope->devices ) return -ENOMEM; } + scope->devices_cnt = cnt; while ( start < end ) { @@ -426,7 +439,7 @@ static int __init acpi_parse_dev_scope( out: if ( ret ) - xfree(scope->devices); + scope_devices_free(scope); return ret; } @@ -541,6 +554,7 @@ acpi_parse_one_drhd(struct acpi_dmar_hea " Workaround BIOS bug: ignore the DRHD due to all " "devices under its scope are not PCI discoverable!\n"); + scope_devices_free(&dmaru->scope); iommu_free(dmaru); xfree(dmaru); } @@ -561,9 +575,11 @@ acpi_parse_one_drhd(struct acpi_dmar_hea out: if ( ret ) { + scope_devices_free(&dmaru->scope); iommu_free(dmaru); xfree(dmaru); } + return ret; } @@ -657,6 +673,7 @@ acpi_parse_one_rmrr(struct acpi_dmar_hea " Ignore the RMRR (%"PRIx64", %"PRIx64") due to " "devices under its scope are not PCI discoverable!\n", rmrru->base_address, rmrru->end_address); + scope_devices_free(&rmrru->scope); xfree(rmrru); } else if ( base_addr > end_addr ) @@ -664,6 +681,7 @@ acpi_parse_one_rmrr(struct acpi_dmar_hea dprintk(XENLOG_WARNING VTDPREFIX, " The RMRR (%"PRIx64", %"PRIx64") is incorrect!\n", rmrru->base_address, rmrru->end_address); + scope_devices_free(&rmrru->scope); xfree(rmrru); ret = -EFAULT; } @@ -726,7 +744,10 @@ acpi_parse_one_atsr(struct acpi_dmar_hea } if ( ret ) + { + scope_devices_free(&atsru->scope); xfree(atsru); + } else acpi_register_atsr_unit(atsru); return ret; ++++++ 55b0a218-x86-PCI-CFG-write-intercept.patch ++++++ References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 # Commit a88b72fddd046a0978242411276861039ec99ad0 # Date 2015-07-23 10:13:12 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/PCI: add config space abstract write intercept logic This is to be used by MSI code, and later to also be hooked up to MMCFG accesses by Dom0. Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -1108,6 +1108,12 @@ void pci_cleanup_msi(struct pci_dev *pde msi_free_irqs(pdev); } +int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg, + unsigned int size, uint32_t *data) +{ + return 0; +} + int pci_restore_msi_state(struct pci_dev *pdev) { unsigned long flags; --- a/xen/arch/x86/pci.c +++ b/xen/arch/x86/pci.c @@ -67,3 +67,28 @@ void pci_conf_write(uint32_t cf8, uint8_ spin_unlock_irqrestore(&pci_config_lock, flags); } + +int pci_conf_write_intercept(unsigned int seg, unsigned int bdf, + unsigned int reg, unsigned int size, + uint32_t *data) +{ + struct pci_dev *pdev; + int rc = 0; + + /* + * Avoid expensive operations when no hook is going to do anything + * for the access anyway. + */ + if ( reg < 64 || reg >= 256 ) + return 0; + + spin_lock(&pcidevs_lock); + + pdev = pci_get_pdev(seg, PCI_BUS(bdf), PCI_DEVFN2(bdf)); + if ( pdev ) + rc = pci_msi_conf_write_intercept(pdev, reg, size, data); + + spin_unlock(&pcidevs_lock); + + return rc; +} --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1708,8 +1708,8 @@ static int admin_io_okay( return ioports_access_permitted(v->domain, port, port + bytes - 1); } -static bool_t pci_cfg_ok(struct domain *currd, bool_t write, - unsigned int start, unsigned int size) +static bool_t pci_cfg_ok(struct domain *currd, unsigned int start, + unsigned int size, uint32_t *write) { uint32_t machine_bdf; @@ -1741,8 +1741,12 @@ static bool_t pci_cfg_ok(struct domain * start |= CF8_ADDR_HI(currd->arch.pci_cf8); } - return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf, - start, start + size - 1, write); + if ( xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf, + start, start + size - 1, !!write) != 0 ) + return 0; + + return !write || + pci_conf_write_intercept(0, machine_bdf, start, size, write) >= 0; } uint32_t guest_io_read( @@ -1796,7 +1800,7 @@ uint32_t guest_io_read( size = min(bytes, 4 - (port & 3)); if ( size == 3 ) size = 2; - if ( pci_cfg_ok(v->domain, 0, port & 3, size) ) + if ( pci_cfg_ok(v->domain, port & 3, size, NULL) ) sub_data = pci_conf_read(v->domain->arch.pci_cf8, port & 3, size); } @@ -1869,7 +1873,7 @@ void guest_io_write( size = min(bytes, 4 - (port & 3)); if ( size == 3 ) size = 2; - if ( pci_cfg_ok(v->domain, 1, port & 3, size) ) + if ( pci_cfg_ok(v->domain, port & 3, size, &data) ) pci_conf_write(v->domain->arch.pci_cf8, port & 3, size, data); } --- a/xen/include/asm-x86/pci.h +++ b/xen/include/asm-x86/pci.h @@ -15,4 +15,11 @@ struct arch_pci_dev { vmask_t used_vectors; }; +struct pci_dev; +int pci_conf_write_intercept(unsigned int seg, unsigned int bdf, + unsigned int reg, unsigned int size, + uint32_t *data); +int pci_msi_conf_write_intercept(struct pci_dev *, unsigned int reg, + unsigned int size, uint32_t *data); + #endif /* __X86_PCI_H__ */ ++++++ 55b0a255-x86-MSI-X-maskall.patch ++++++ References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 # Commit 484d7c852e4ff79c945406ed28b5db63a5a0b7f3 # Date 2015-07-23 10:14:13 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/MSI-X: track host and guest mask-all requests separately Host uses of the bits will be added subsequently, and must not be overridden by guests (including Dom0, namely when acting on behalf of a guest). Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -843,6 +843,12 @@ static int msix_capability_init(struct p if ( !msix->used_entries ) { + msix->host_maskall = 0; + if ( !msix->guest_maskall ) + control &= ~PCI_MSIX_FLAGS_MASKALL; + else + control |= PCI_MSIX_FLAGS_MASKALL; + if ( rangeset_add_range(mmio_ro_ranges, msix->table.first, msix->table.last) ) WARN(); @@ -1111,6 +1117,34 @@ void pci_cleanup_msi(struct pci_dev *pde int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg, unsigned int size, uint32_t *data) { + u16 seg = pdev->seg; + u8 bus = pdev->bus; + u8 slot = PCI_SLOT(pdev->devfn); + u8 func = PCI_FUNC(pdev->devfn); + struct msi_desc *entry; + unsigned int pos; + + if ( pdev->msix ) + { + entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX); + pos = entry ? entry->msi_attrib.pos + : pci_find_cap_offset(seg, bus, slot, func, + PCI_CAP_ID_MSIX); + ASSERT(pos); + + if ( reg < pos || reg >= msix_pba_offset_reg(pos) + 4 ) + return 0; + + if ( reg != msix_control_reg(pos) || size != 2 ) + return -EACCES; + + pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL); + if ( pdev->msix->host_maskall ) + *data |= PCI_MSIX_FLAGS_MASKALL; + + return 1; + } + return 0; } --- a/xen/include/asm-x86/msi.h +++ b/xen/include/asm-x86/msi.h @@ -228,6 +228,7 @@ struct arch_msix { int table_refcnt[MAX_MSIX_TABLE_PAGES]; int table_idx[MAX_MSIX_TABLE_PAGES]; spinlock_t table_lock; + bool_t host_maskall, guest_maskall; domid_t warned; }; ++++++ 55b0a283-x86-MSI-X-teardown.patch ++++++ References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 # Commit 082fdc6ce85e5b603f8fb24553cf200e3b67889f # Date 2015-07-23 10:14:59 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/MSI-X: be more careful during teardown When a device gets detached from a guest, pciback will clear its command register, thus disabling both memory and I/O decoding. The disabled memory decoding, however, has an effect on the MSI-X table accesses the hypervisor does: These won't have the intended effect anymore. Even worse, for PCIe devices (but not SR-IOV virtual functions) such accesses may (will?) be treated as Unsupported Requests, causing respective errors to be surfaced, potentially in the form of NMIs that may be fatal to the hypervisor or Dom0 is different ways. Hence rather than carrying out these accesses, we should avoid them where we can, and use alternative (e.g. PCI config space based) mechanisms to achieve at least the same effect. At this time it continues to be unclear whether this is fixing an actual bug or is rather just working around bogus (but apparently common) system behavior. Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> --- Backporting note (largely to myself): Depends on (not yet backported to 4.4 and earlier) commit 061eebe0e "x86/MSI: drop workaround for insecure Dom0 kernels" (due to re-use of struct arch_msix's warned field). --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -217,9 +217,9 @@ void destroy_irq(unsigned int irq) } spin_lock_irqsave(&desc->lock, flags); - desc->status |= IRQ_DISABLED; desc->status &= ~IRQ_GUEST; desc->handler->shutdown(desc); + desc->status |= IRQ_DISABLED; action = desc->action; desc->action = NULL; desc->msi_desc = NULL; @@ -995,8 +995,8 @@ void __init release_irq(unsigned int irq spin_lock_irqsave(&desc->lock,flags); action = desc->action; desc->action = NULL; - desc->status |= IRQ_DISABLED; desc->handler->shutdown(desc); + desc->status |= IRQ_DISABLED; spin_unlock_irqrestore(&desc->lock,flags); /* Wait to make sure it's not being used on another CPU */ @@ -1732,8 +1732,8 @@ static irq_guest_action_t *__pirq_guest_ BUG_ON(action->in_flight != 0); /* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */ - desc->status |= IRQ_DISABLED; desc->handler->disable(desc); + desc->status |= IRQ_DISABLED; /* * Mark any remaining pending EOIs as ready to flush. --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -123,6 +123,27 @@ static void msix_put_fixmap(struct arch_ spin_unlock(&msix->table_lock); } +static bool_t memory_decoded(const struct pci_dev *dev) +{ + u8 bus, slot, func; + + if ( !dev->info.is_virtfn ) + { + bus = dev->bus; + slot = PCI_SLOT(dev->devfn); + func = PCI_FUNC(dev->devfn); + } + else + { + bus = dev->info.physfn.bus; + slot = PCI_SLOT(dev->info.physfn.devfn); + func = PCI_FUNC(dev->info.physfn.devfn); + } + + return !!(pci_conf_read16(dev->seg, bus, slot, func, PCI_COMMAND) & + PCI_COMMAND_MEMORY); +} + /* * MSI message composition */ @@ -166,7 +187,7 @@ void msi_compose_msg(unsigned vector, co } } -static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) +static bool_t read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { switch ( entry->msi_attrib.type ) { @@ -201,6 +222,8 @@ static void read_msi_msg(struct msi_desc { void __iomem *base = entry->mask_base; + if ( unlikely(!memory_decoded(entry->dev)) ) + return 0; msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); @@ -212,6 +235,8 @@ static void read_msi_msg(struct msi_desc if ( iommu_intremap ) iommu_read_msi_from_ire(entry, msg); + + return 1; } static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) @@ -262,6 +287,8 @@ static int write_msi_msg(struct msi_desc { void __iomem *base = entry->mask_base; + if ( unlikely(!memory_decoded(entry->dev)) ) + return -ENXIO; writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); writel(msg->address_hi, @@ -289,7 +316,8 @@ void set_msi_affinity(struct irq_desc *d ASSERT(spin_is_locked(&desc->lock)); memset(&msg, 0, sizeof(msg)); - read_msi_msg(msi_desc, &msg); + if ( !read_msi_msg(msi_desc, &msg) ) + return; msg.data &= ~MSI_DATA_VECTOR_MASK; msg.data |= MSI_DATA_VECTOR(desc->arch.vector); @@ -349,23 +377,27 @@ int msi_maskable_irq(const struct msi_de || entry->msi_attrib.maskbit; } -static void msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest) +static bool_t msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest) { struct msi_desc *entry = desc->msi_desc; + struct pci_dev *pdev; + u16 seg; + u8 bus, slot, func; bool_t flag = host || guest; ASSERT(spin_is_locked(&desc->lock)); BUG_ON(!entry || !entry->dev); + pdev = entry->dev; + seg = pdev->seg; + bus = pdev->bus; + slot = PCI_SLOT(pdev->devfn); + func = PCI_FUNC(pdev->devfn); switch ( entry->msi_attrib.type ) { case PCI_CAP_ID_MSI: if ( entry->msi_attrib.maskbit ) { u32 mask_bits; - u16 seg = entry->dev->seg; - u8 bus = entry->dev->bus; - u8 slot = PCI_SLOT(entry->dev->devfn); - u8 func = PCI_FUNC(entry->dev->devfn); mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos); mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr); @@ -374,25 +406,54 @@ static void msi_set_mask_bit(struct irq_ } break; case PCI_CAP_ID_MSIX: - { - int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; - writel(flag, entry->mask_base + offset); - readl(entry->mask_base + offset); - break; - } + if ( likely(memory_decoded(pdev)) ) + { + writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + break; + } + if ( flag ) + { + u16 control; + domid_t domid = pdev->domain->domain_id; + + pdev->msix->host_maskall = 1; + control = pci_conf_read16(seg, bus, slot, func, + msix_control_reg(entry->msi_attrib.pos)); + if ( control & PCI_MSIX_FLAGS_MASKALL ) + break; + pci_conf_write16(seg, bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), + control | PCI_MSIX_FLAGS_MASKALL); + if ( pdev->msix->warned != domid ) + { + pdev->msix->warned = domid; + printk(XENLOG_G_WARNING + "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n", + desc->irq, domid, pdev->seg, pdev->bus, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + } + break; + } + /* fall through */ default: - BUG(); - break; + return 0; } entry->msi_attrib.host_masked = host; entry->msi_attrib.guest_masked = guest; + + return 1; } static int msi_get_mask_bit(const struct msi_desc *entry) { - switch (entry->msi_attrib.type) { + if ( !entry->dev ) + return -1; + + switch ( entry->msi_attrib.type ) + { case PCI_CAP_ID_MSI: - if (!entry->dev || !entry->msi_attrib.maskbit) + if ( !entry->msi_attrib.maskbit ) break; return (pci_conf_read32(entry->dev->seg, entry->dev->bus, PCI_SLOT(entry->dev->devfn), @@ -400,6 +461,8 @@ static int msi_get_mask_bit(const struct entry->msi.mpos) >> entry->msi_attrib.entry_nr) & 1; case PCI_CAP_ID_MSIX: + if ( unlikely(!memory_decoded(entry->dev)) ) + break; return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1; } return -1; @@ -407,12 +470,16 @@ static int msi_get_mask_bit(const struct void mask_msi_irq(struct irq_desc *desc) { - msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked); + if ( unlikely(!msi_set_mask_bit(desc, 1, + desc->msi_desc->msi_attrib.guest_masked)) ) + BUG_ON(!(desc->status & IRQ_DISABLED)); } void unmask_msi_irq(struct irq_desc *desc) { - msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked); + if ( unlikely(!msi_set_mask_bit(desc, 0, + desc->msi_desc->msi_attrib.guest_masked)) ) + WARN(); } void guest_mask_msi_irq(struct irq_desc *desc, bool_t mask) @@ -422,13 +489,15 @@ void guest_mask_msi_irq(struct irq_desc static unsigned int startup_msi_irq(struct irq_desc *desc) { - msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST)); + if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST))) ) + WARN(); return 0; } static void shutdown_msi_irq(struct irq_desc *desc) { - msi_set_mask_bit(desc, 1, 1); + if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) ) + BUG_ON(!(desc->status & IRQ_DISABLED)); } void ack_nonmaskable_msi_irq(struct irq_desc *desc) @@ -740,6 +809,9 @@ static int msix_capability_init(struct p control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos)); msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */ + if ( unlikely(!memory_decoded(dev)) ) + return -ENXIO; + if ( desc ) { entry = alloc_msi_entry(1); @@ -879,7 +951,8 @@ static int msix_capability_init(struct p ++msix->used_entries; /* Restore MSI-X enabled bits */ - pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control); + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control & ~PCI_MSIX_FLAGS_MASKALL); return 0; } @@ -1024,8 +1097,16 @@ static void __pci_disable_msix(struct ms BUG_ON(list_empty(&dev->msi_list)); - writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); - + if ( likely(memory_decoded(dev)) ) + writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + else if ( !(control & PCI_MSIX_FLAGS_MASKALL) ) + { + printk(XENLOG_WARNING + "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n", + entry->irq, dev->seg, dev->bus, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + control |= PCI_MSIX_FLAGS_MASKALL; + } pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control); _pci_cleanup_msix(dev->msix); @@ -1199,15 +1280,24 @@ int pci_restore_msi_state(struct pci_dev nr = entry->msi.nvec; } else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) + { msix_set_enable(pdev, 0); + if ( unlikely(!memory_decoded(pdev)) ) + { + spin_unlock_irqrestore(&desc->lock, flags); + return -ENXIO; + } + } msg = entry->msg; write_msi_msg(entry, &msg); for ( i = 0; ; ) { - msi_set_mask_bit(desc, entry[i].msi_attrib.host_masked, - entry[i].msi_attrib.guest_masked); + if ( unlikely(!msi_set_mask_bit(desc, + entry[i].msi_attrib.host_masked, + entry[i].msi_attrib.guest_masked)) ) + BUG(); if ( !--nr ) break; ++++++ 55b0a2ab-x86-MSI-X-enable.patch ++++++ References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 # Commit 0dba393db07331e9cff42df10e95b67547dfdb3e # Date 2015-07-23 10:15:39 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/MSI-X: access MSI-X table only after having enabled MSI-X As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a better way") and its broken predecessor, make sure we don't access the MSI-X table without having enabled MSI-X first, using the mask-all flag instead to prevent interrupts from occurring. Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -144,6 +144,17 @@ static bool_t memory_decoded(const struc PCI_COMMAND_MEMORY); } +static bool_t msix_memory_decoded(const struct pci_dev *dev, unsigned int pos) +{ + u16 control = pci_conf_read16(dev->seg, dev->bus, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), msix_control_reg(pos)); + + if ( !(control & PCI_MSIX_FLAGS_ENABLE) ) + return 0; + + return memory_decoded(dev); +} + /* * MSI message composition */ @@ -222,7 +233,8 @@ static bool_t read_msi_msg(struct msi_de { void __iomem *base = entry->mask_base; - if ( unlikely(!memory_decoded(entry->dev)) ) + if ( unlikely(!msix_memory_decoded(entry->dev, + entry->msi_attrib.pos)) ) return 0; msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); @@ -287,7 +299,8 @@ static int write_msi_msg(struct msi_desc { void __iomem *base = entry->mask_base; - if ( unlikely(!memory_decoded(entry->dev)) ) + if ( unlikely(!msix_memory_decoded(entry->dev, + entry->msi_attrib.pos)) ) return -ENXIO; writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); @@ -381,9 +394,9 @@ static bool_t msi_set_mask_bit(struct ir { struct msi_desc *entry = desc->msi_desc; struct pci_dev *pdev; - u16 seg; + u16 seg, control; u8 bus, slot, func; - bool_t flag = host || guest; + bool_t flag = host || guest, maskall; ASSERT(spin_is_locked(&desc->lock)); BUG_ON(!entry || !entry->dev); @@ -406,36 +419,45 @@ static bool_t msi_set_mask_bit(struct ir } break; case PCI_CAP_ID_MSIX: + maskall = pdev->msix->host_maskall; + control = pci_conf_read16(seg, bus, slot, func, + msix_control_reg(entry->msi_attrib.pos)); + if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) ) + { + pdev->msix->host_maskall = 1; + pci_conf_write16(seg, bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), + control | (PCI_MSIX_FLAGS_ENABLE | + PCI_MSIX_FLAGS_MASKALL)); + } if ( likely(memory_decoded(pdev)) ) { writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); - break; + if ( likely(control & PCI_MSIX_FLAGS_ENABLE) ) + break; + flag = 1; } - if ( flag ) + else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) ) { - u16 control; domid_t domid = pdev->domain->domain_id; - pdev->msix->host_maskall = 1; - control = pci_conf_read16(seg, bus, slot, func, - msix_control_reg(entry->msi_attrib.pos)); - if ( control & PCI_MSIX_FLAGS_MASKALL ) - break; - pci_conf_write16(seg, bus, slot, func, - msix_control_reg(entry->msi_attrib.pos), - control | PCI_MSIX_FLAGS_MASKALL); + maskall = 1; if ( pdev->msix->warned != domid ) { pdev->msix->warned = domid; printk(XENLOG_G_WARNING - "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n", + "cannot mask IRQ %d: masking MSI-X on Dom%d's %04x:%02x:%02x.%u\n", desc->irq, domid, pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); } - break; } - /* fall through */ + pdev->msix->host_maskall = maskall; + if ( maskall || pdev->msix->guest_maskall ) + control |= PCI_MSIX_FLAGS_MASKALL; + pci_conf_write16(seg, bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), control); + return flag; default: return 0; } @@ -461,7 +483,8 @@ static int msi_get_mask_bit(const struct entry->msi.mpos) >> entry->msi_attrib.entry_nr) & 1; case PCI_CAP_ID_MSIX: - if ( unlikely(!memory_decoded(entry->dev)) ) + if ( unlikely(!msix_memory_decoded(entry->dev, + entry->msi_attrib.pos)) ) break; return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1; } @@ -564,9 +587,31 @@ static struct msi_desc *alloc_msi_entry( int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc) { - return __setup_msi_irq(desc, msidesc, - msi_maskable_irq(msidesc) ? &pci_msi_maskable - : &pci_msi_nonmaskable); + const struct pci_dev *pdev = msidesc->dev; + unsigned int cpos = msix_control_reg(msidesc->msi_attrib.pos); + u16 control = ~0; + int rc; + + if ( msidesc->msi_attrib.type == PCI_CAP_ID_MSIX ) + { + control = pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), cpos); + if ( !(control & PCI_MSIX_FLAGS_ENABLE) ) + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), cpos, + control | (PCI_MSIX_FLAGS_ENABLE | + PCI_MSIX_FLAGS_MASKALL)); + } + + rc = __setup_msi_irq(desc, msidesc, + msi_maskable_irq(msidesc) ? &pci_msi_maskable + : &pci_msi_nonmaskable); + + if ( !(control & PCI_MSIX_FLAGS_ENABLE) ) + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), cpos, control); + + return rc; } int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc, @@ -803,20 +848,38 @@ static int msix_capability_init(struct p u8 bus = dev->bus; u8 slot = PCI_SLOT(dev->devfn); u8 func = PCI_FUNC(dev->devfn); + bool_t maskall = msix->host_maskall; ASSERT(spin_is_locked(&pcidevs_lock)); control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos)); - msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */ + /* + * Ensure MSI-X interrupts are masked during setup. Some devices require + * MSI-X to be enabled before we can touch the MSI-X registers. We need + * to mask all the vectors to prevent interrupts coming in before they're + * fully set up. + */ + msix->host_maskall = 1; + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control | (PCI_MSIX_FLAGS_ENABLE | + PCI_MSIX_FLAGS_MASKALL)); if ( unlikely(!memory_decoded(dev)) ) + { + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control & ~PCI_MSIX_FLAGS_ENABLE); return -ENXIO; + } if ( desc ) { entry = alloc_msi_entry(1); if ( !entry ) + { + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control & ~PCI_MSIX_FLAGS_ENABLE); return -ENOMEM; + } ASSERT(msi); } @@ -847,6 +910,8 @@ static int msix_capability_init(struct p { if ( !msi || !msi->table_base ) { + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control & ~PCI_MSIX_FLAGS_ENABLE); xfree(entry); return -ENXIO; } @@ -889,6 +954,8 @@ static int msix_capability_init(struct p if ( idx < 0 ) { + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control & ~PCI_MSIX_FLAGS_ENABLE); xfree(entry); return idx; } @@ -915,7 +982,7 @@ static int msix_capability_init(struct p if ( !msix->used_entries ) { - msix->host_maskall = 0; + maskall = 0; if ( !msix->guest_maskall ) control &= ~PCI_MSIX_FLAGS_MASKALL; else @@ -951,8 +1018,8 @@ static int msix_capability_init(struct p ++msix->used_entries; /* Restore MSI-X enabled bits */ - pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), - control & ~PCI_MSIX_FLAGS_MASKALL); + msix->host_maskall = maskall; + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control); return 0; } @@ -1092,8 +1159,15 @@ static void __pci_disable_msix(struct ms PCI_CAP_ID_MSIX); u16 control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(entry->msi_attrib.pos)); + bool_t maskall = dev->msix->host_maskall; - msix_set_enable(dev, 0); + if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) ) + { + dev->msix->host_maskall = 1; + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control | (PCI_MSIX_FLAGS_ENABLE | + PCI_MSIX_FLAGS_MASKALL)); + } BUG_ON(list_empty(&dev->msi_list)); @@ -1105,8 +1179,11 @@ static void __pci_disable_msix(struct ms "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n", entry->irq, dev->seg, dev->bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); - control |= PCI_MSIX_FLAGS_MASKALL; + maskall = 1; } + dev->msix->host_maskall = maskall; + if ( maskall || dev->msix->guest_maskall ) + control |= PCI_MSIX_FLAGS_MASKALL; pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control); _pci_cleanup_msix(dev->msix); @@ -1255,6 +1332,8 @@ int pci_restore_msi_state(struct pci_dev list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list ) { unsigned int i = 0, nr = 1; + u16 control = 0; + u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn); irq = entry->irq; desc = &irq_desc[irq]; @@ -1281,10 +1360,18 @@ int pci_restore_msi_state(struct pci_dev } else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) { - msix_set_enable(pdev, 0); + control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, + msix_control_reg(entry->msi_attrib.pos)); + pci_conf_write16(pdev->seg, pdev->bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), + control | (PCI_MSIX_FLAGS_ENABLE | + PCI_MSIX_FLAGS_MASKALL)); if ( unlikely(!memory_decoded(pdev)) ) { spin_unlock_irqrestore(&desc->lock, flags); + pci_conf_write16(pdev->seg, pdev->bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), + control & ~PCI_MSIX_FLAGS_ENABLE); return -ENXIO; } } @@ -1314,11 +1401,9 @@ int pci_restore_msi_state(struct pci_dev if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) { unsigned int cpos = msi_control_reg(entry->msi_attrib.pos); - u16 control = pci_conf_read16(pdev->seg, pdev->bus, - PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), cpos); - control &= ~PCI_MSI_FLAGS_QSIZE; + control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, cpos) & + ~PCI_MSI_FLAGS_QSIZE; multi_msi_enable(control, entry->msi.nvec); pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), cpos, control); @@ -1326,7 +1411,9 @@ int pci_restore_msi_state(struct pci_dev msi_set_enable(pdev, 1); } else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) - msix_set_enable(pdev, 1); + pci_conf_write16(pdev->seg, pdev->bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), + control | PCI_MSIX_FLAGS_ENABLE); } return 0; ++++++ 55b0a2db-x86-MSI-track-guest-masking.patch ++++++ References: bsc#907514 bsc#910258 bsc#918984 bsc#923967 # Commit aa7c1fdf9dd04a1287f4770906b2c41b88a28228 # Date 2015-07-23 10:16:27 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/MSI: properly track guest masking requests ... by monitoring writes to the mask register. This allows reverting the main effect of the XSA-129 patches in qemu. Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -1303,6 +1303,37 @@ int pci_msi_conf_write_intercept(struct return 1; } + entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI); + if ( entry && entry->msi_attrib.maskbit ) + { + uint16_t cntl; + uint32_t unused; + + pos = entry->msi_attrib.pos; + if ( reg < pos || reg >= entry->msi.mpos + 8 ) + return 0; + + if ( reg == msi_control_reg(pos) ) + return size == 2 ? 1 : -EACCES; + if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 ) + return -EACCES; + + cntl = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos)); + unused = ~(uint32_t)0 >> (32 - multi_msi_capable(cntl)); + for ( pos = 0; pos < entry->msi.nvec; ++pos, ++entry ) + { + entry->msi_attrib.guest_masked = + *data >> entry->msi_attrib.entry_nr; + if ( entry->msi_attrib.host_masked ) + *data |= 1 << pos; + unused &= ~(1 << pos); + } + + *data |= unused; + + return 1; + } + return 0; } ++++++ 55c1d83d-x86-gdt-Drop-write-only-xalloc-d-array.patch ++++++ # Commit a7bd9b1661304500cd18b7d216d616ecf053ebdb # Date 2015-08-05 10:32:45 +0100 # Author Andrew Cooper <andrew.cooper3@citrix.com> # Committer Ian Campbell <ian.campbell@citrix.com> x86/gdt: Drop write-only, xalloc()'d array from set_gdt() It is not used, and can cause a spurious failure of the set_gdt() hypercall in low memory situations. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Wei Liu <wei.liu2@citrix.com> Reviewed-by: Ian Campbell <ian.campbell@citrix.com> Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com> --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4383,20 +4383,15 @@ long set_gdt(struct vcpu *v, l1_pgentry_t *pl1e; /* NB. There are 512 8-byte entries per GDT page. */ int i, nr_pages = (entries + 511) / 512; - unsigned long mfn, *pfns; if ( entries > FIRST_RESERVED_GDT_ENTRY ) return -EINVAL; - pfns = xmalloc_array(unsigned long, nr_pages); - if ( !pfns ) - return -ENOMEM; - /* Check the pages in the new GDT. */ for ( i = 0; i < nr_pages; i++ ) { struct page_info *page; - pfns[i] = frames[i]; + page = get_page_from_gfn(d, frames[i], NULL, P2M_ALLOC); if ( !page ) goto fail; @@ -4405,7 +4400,7 @@ long set_gdt(struct vcpu *v, put_page(page); goto fail; } - mfn = frames[i] = page_to_mfn(page); + frames[i] = page_to_mfn(page); } /* Tear down the old GDT. */ @@ -4420,7 +4415,6 @@ long set_gdt(struct vcpu *v, l1e_write(&pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR)); } - xfree(pfns); return 0; fail: @@ -4428,7 +4422,6 @@ long set_gdt(struct vcpu *v, { put_page_and_type(mfn_to_page(frames[i])); } - xfree(pfns); return -EINVAL; } ++++++ 55c3232b-x86-mm-Make-hap-shadow-teardown-preemptible.patch ++++++ # Commit 0174da5b79752e2d5d6ca0faed89536e8f3d91c7 # Date 2015-08-06 10:04:43 +0100 # Author Anshul Makkar <anshul.makkar@citrix.com> # Committer Ian Campbell <ian.campbell@citrix.com> x86/mm: Make {hap, shadow}_teardown() preemptible A domain with sufficient shadow allocation can cause a watchdog timeout during domain destruction. Expand the existing -ERESTART logic in paging_teardown() to allow {hap/sh}_set_allocation() to become restartable during the DOMCTL_destroydomain hypercall. Signed-off-by: Anshul Makkar <anshul.makkar@citrix.com> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Tim Deegan <tim@xen.org> Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com> --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -503,7 +503,7 @@ void hap_final_teardown(struct domain *d } if ( d->arch.paging.hap.total_pages != 0 ) - hap_teardown(d); + hap_teardown(d, NULL); p2m_teardown(p2m_get_hostp2m(d)); /* Free any memory that the p2m teardown released */ @@ -513,7 +513,7 @@ void hap_final_teardown(struct domain *d paging_unlock(d); } -void hap_teardown(struct domain *d) +void hap_teardown(struct domain *d, int *preempted) { struct vcpu *v; mfn_t mfn; @@ -541,18 +541,11 @@ void hap_teardown(struct domain *d) if ( d->arch.paging.hap.total_pages != 0 ) { - HAP_PRINTK("teardown of domain %u starts." - " pages total = %u, free = %u, p2m=%u\n", - d->domain_id, - d->arch.paging.hap.total_pages, - d->arch.paging.hap.free_pages, - d->arch.paging.hap.p2m_pages); - hap_set_allocation(d, 0, NULL); - HAP_PRINTK("teardown done." - " pages total = %u, free = %u, p2m=%u\n", - d->arch.paging.hap.total_pages, - d->arch.paging.hap.free_pages, - d->arch.paging.hap.p2m_pages); + hap_set_allocation(d, 0, preempted); + + if ( preempted && *preempted ) + goto out; + ASSERT(d->arch.paging.hap.total_pages == 0); } @@ -561,6 +554,7 @@ void hap_teardown(struct domain *d) xfree(d->arch.hvm_domain.dirty_vram); d->arch.hvm_domain.dirty_vram = NULL; +out: paging_unlock(d); } --- a/xen/arch/x86/mm/paging.c +++ b/xen/arch/x86/mm/paging.c @@ -779,12 +779,15 @@ long paging_domctl_continuation(XEN_GUES /* Call when destroying a domain */ int paging_teardown(struct domain *d) { - int rc; + int rc, preempted = 0; if ( hap_enabled(d) ) - hap_teardown(d); + hap_teardown(d, &preempted); else - shadow_teardown(d); + shadow_teardown(d, &preempted); + + if ( preempted ) + return -ERESTART; /* clean up log dirty resources. */ rc = paging_free_log_dirty_bitmap(d, 0); --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -3030,7 +3030,7 @@ int shadow_enable(struct domain *d, u32 return rv; } -void shadow_teardown(struct domain *d) +void shadow_teardown(struct domain *d, int *preempted) /* Destroy the shadow pagetables of this domain and free its shadow memory. * Should only be called for dying domains. */ { @@ -3091,23 +3091,16 @@ void shadow_teardown(struct domain *d) if ( d->arch.paging.shadow.total_pages != 0 ) { - SHADOW_PRINTK("teardown of domain %u starts." - " Shadow pages total = %u, free = %u, p2m=%u\n", - d->domain_id, - d->arch.paging.shadow.total_pages, - d->arch.paging.shadow.free_pages, - d->arch.paging.shadow.p2m_pages); /* Destroy all the shadows and release memory to domheap */ - sh_set_allocation(d, 0, NULL); + sh_set_allocation(d, 0, preempted); + + if ( preempted && *preempted ) + goto out; + /* Release the hash table back to xenheap */ if (d->arch.paging.shadow.hash_table) shadow_hash_teardown(d); - /* Should not have any more memory held */ - SHADOW_PRINTK("teardown done." - " Shadow pages total = %u, free = %u, p2m=%u\n", - d->arch.paging.shadow.total_pages, - d->arch.paging.shadow.free_pages, - d->arch.paging.shadow.p2m_pages); + ASSERT(d->arch.paging.shadow.total_pages == 0); } @@ -3138,6 +3131,7 @@ void shadow_teardown(struct domain *d) d->arch.hvm_domain.dirty_vram = NULL; } +out: paging_unlock(d); /* Must be called outside the lock */ @@ -3159,7 +3153,7 @@ void shadow_final_teardown(struct domain * It is possible for a domain that never got domain_kill()ed * to get here with its shadow allocation intact. */ if ( d->arch.paging.shadow.total_pages != 0 ) - shadow_teardown(d); + shadow_teardown(d, NULL); /* It is now safe to pull down the p2m map. */ p2m_teardown(p2m_get_hostp2m(d)); --- a/xen/include/asm-x86/hap.h +++ b/xen/include/asm-x86/hap.h @@ -54,7 +54,7 @@ int hap_domctl(struct domain *d, xen_d XEN_GUEST_HANDLE_PARAM(void) u_domctl); int hap_enable(struct domain *d, u32 mode); void hap_final_teardown(struct domain *d); -void hap_teardown(struct domain *d); +void hap_teardown(struct domain *d, int *preempted); void hap_vcpu_init(struct vcpu *v); int hap_track_dirty_vram(struct domain *d, unsigned long begin_pfn, --- a/xen/include/asm-x86/shadow.h +++ b/xen/include/asm-x86/shadow.h @@ -72,7 +72,7 @@ int shadow_domctl(struct domain *d, XEN_GUEST_HANDLE_PARAM(void) u_domctl); /* Call when destroying a domain */ -void shadow_teardown(struct domain *d); +void shadow_teardown(struct domain *d, int *preempted); /* Call once all of the references to the domain have gone away */ void shadow_final_teardown(struct domain *d);