Hello community, here is the log from the commit of package xen for openSUSE:Factory checked in at 2014-09-06 12:17:45 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/xen (Old) and /work/SRC/openSUSE:Factory/.xen.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "xen" Changes: -------- --- /work/SRC/openSUSE:Factory/xen/xen.changes 2014-08-20 17:53:00.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.xen.new/xen.changes 2014-09-06 12:17:50.000000000 +0200 @@ -1,0 +2,41 @@ +Thu Sep 4 17:01:24 CST 2014 - cyliu@suse.com + +- bnc#882405 - Only one key-press event was generated while holding + a key before key-release in pv guests through xl vncviewer + tigervnc-long-press.patch + +------------------------------------------------------------------- +Tue Sep 2 09:01:24 MDT 2014 - carnold@suse.com + +- Update to Xen Version 4.4.1 FCS + xen-4.4.1-testing-src.tar.bz2 +- Dropped patches now contained in tarball + 53d7b781-x86-cpu-undo-BIOS-CPUID-max_leaf-limit-earlier.patch + 53df71c7-lz4-check-for-underruns.patch + 53e47d6b-x86_emulate-properly-do-IP-updates-and-other-side-effects.patch + +------------------------------------------------------------------- +Mon Sep 1 15:20:20 MDT 2014 - carnold@suse.com + +- bnc#882089 - Windows 2012 R2 fails to boot up with greater than + 60 vcpus + 53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch + 53e8be5f-x86-vHPET-use-rwlock-instead-of-simple-one.patch + 53ff3659-x86-consolidate-boolean-inputs-in-hvm-and-p2m.patch + 53ff36ae-x86-hvm-treat-non-insn-fetch-NPF-also-as-read-violations.patch + 53ff36d5-x86-mem_event-deliver-gla-fault-EPT-violation-information.patch + 54005472-EPT-utilize-GLA-GPA-translation-known-for-certain-faults.patch +- Upstream patches from Jan + 53f737b1-VMX-fix-DebugCtl-MSR-clearing.patch + 53f7386d-x86-irq-process-softirqs-in-irq-keyhandlers.patch + 53ff3716-x86-ats-Disable-Address-Translation-Services-by-default.patch + 53ff3899-x86-NMI-allow-processing-unknown-NMIs-with-watchdog.patch + +------------------------------------------------------------------- +Fri Aug 29 09:25:47 MDT 2014 - carnold@suse.com + +- bnc#864801 - VUL-0: CVE-2013-4540: qemu: zaurus: buffer overrun + on invalid state load + CVE-2013-4540-qemu.patch + +------------------------------------------------------------------- Old: ---- 53d7b781-x86-cpu-undo-BIOS-CPUID-max_leaf-limit-earlier.patch 53df71c7-lz4-check-for-underruns.patch 53e47d6b-x86_emulate-properly-do-IP-updates-and-other-side-effects.patch New: ---- 53e8be5f-x86-vHPET-use-rwlock-instead-of-simple-one.patch 53f737b1-VMX-fix-DebugCtl-MSR-clearing.patch 53f7386d-x86-irq-process-softirqs-in-irq-keyhandlers.patch 53ff3659-x86-consolidate-boolean-inputs-in-hvm-and-p2m.patch 53ff36ae-x86-hvm-treat-non-insn-fetch-NPF-also-as-read-violations.patch 53ff36d5-x86-mem_event-deliver-gla-fault-EPT-violation-information.patch 53ff3716-x86-ats-Disable-Address-Translation-Services-by-default.patch 53ff3899-x86-NMI-allow-processing-unknown-NMIs-with-watchdog.patch 54005472-EPT-utilize-GLA-GPA-translation-known-for-certain-faults.patch CVE-2013-4540-qemu.patch tigervnc-long-press.patch ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ xen.spec ++++++ --- /var/tmp/diff_new_pack.SmKn8C/_old 2014-09-06 12:17:53.000000000 +0200 +++ /var/tmp/diff_new_pack.SmKn8C/_new 2014-09-06 12:17:53.000000000 +0200 @@ -21,7 +21,7 @@ ExclusiveArch: %ix86 x86_64 %arm aarch64 %define xvers 4.4 %define xvermaj 4 -%define changeset 28531 +%define changeset 28541 %define xen_build_dir xen-4.4.1-testing # %define with_kmp 0 @@ -153,7 +153,7 @@ %endif %endif -Version: 4.4.1_02 +Version: 4.4.1_04 Release: 0 PreReq: %insserv_prereq %fillup_prereq Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) @@ -224,11 +224,17 @@ Patch10: 53aac342-x86-HVM-consolidate-and-sanitize-CR4-guest-reserved-bit-determination.patch Patch11: 53c9151b-Fix-xl-vncviewer-accesses-port-0-by-any-invalid-domid.patch Patch12: 53d124e7-fix-list_domain_details-check-config-data-length-0.patch -Patch13: 53d7b781-x86-cpu-undo-BIOS-CPUID-max_leaf-limit-earlier.patch -Patch14: 53dba447-x86-ACPI-allow-CMOS-RTC-use-even-when-ACPI-says-there-is-none.patch -Patch15: 53df71c7-lz4-check-for-underruns.patch -Patch16: 53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch -Patch17: 53e47d6b-x86_emulate-properly-do-IP-updates-and-other-side-effects.patch +Patch13: 53dba447-x86-ACPI-allow-CMOS-RTC-use-even-when-ACPI-says-there-is-none.patch +Patch14: 53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch +Patch15: 53e8be5f-x86-vHPET-use-rwlock-instead-of-simple-one.patch +Patch16: 53f737b1-VMX-fix-DebugCtl-MSR-clearing.patch +Patch17: 53f7386d-x86-irq-process-softirqs-in-irq-keyhandlers.patch +Patch18: 53ff3659-x86-consolidate-boolean-inputs-in-hvm-and-p2m.patch +Patch19: 53ff36ae-x86-hvm-treat-non-insn-fetch-NPF-also-as-read-violations.patch +Patch20: 53ff36d5-x86-mem_event-deliver-gla-fault-EPT-violation-information.patch +Patch21: 53ff3716-x86-ats-Disable-Address-Translation-Services-by-default.patch +Patch22: 53ff3899-x86-NMI-allow-processing-unknown-NMIs-with-watchdog.patch +Patch23: 54005472-EPT-utilize-GLA-GPA-translation-known-for-certain-faults.patch # Upstream qemu Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch @@ -239,6 +245,7 @@ Patch256: 0006-e1000-clear-EOP-for-multi-buffer-descriptors.patch Patch257: 0007-e1000-verify-we-have-buffers-upfront.patch Patch258: 0008-e1000-check-buffer-availability.patch +Patch259: CVE-2013-4540-qemu.patch # Our platform specific patches Patch301: xen-destdir.patch Patch302: xen-xmexample.patch @@ -357,6 +364,7 @@ Patch470: qemu-xen-upstream-qdisk-cache-unsafe.patch Patch471: xen-pass-kernel-initrd-to-qemu.patch Patch472: qemu-support-xen-hvm-direct-kernel-boot.patch +Patch473: tigervnc-long-press.patch # Hypervisor and PV driver Patches Patch501: x86-ioapic-ack-default.patch Patch502: x86-cpufreq-report.patch @@ -611,6 +619,12 @@ %patch15 -p1 %patch16 -p1 %patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 +%patch22 -p1 +%patch23 -p1 # Upstream qemu patches %patch250 -p1 %patch251 -p1 @@ -621,6 +635,7 @@ %patch256 -p1 %patch257 -p1 %patch258 -p1 +%patch259 -p1 # Our platform specific patches %patch301 -p1 %patch302 -p1 @@ -738,6 +753,7 @@ %patch470 -p1 %patch471 -p1 %patch472 -p1 +%patch473 -p1 # Hypervisor and PV driver Patches %patch501 -p1 %patch502 -p1 ++++++ 53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch ++++++ --- /var/tmp/diff_new_pack.SmKn8C/_old 2014-09-06 12:17:53.000000000 +0200 +++ /var/tmp/diff_new_pack.SmKn8C/_new 2014-09-06 12:17:53.000000000 +0200 @@ -1,3 +1,5 @@ +References: bnc#882089 + # Commit fd1863847af15c3676348447755e1a1801f9d394 # Date 2014-08-04 13:46:03 +0200 # Author Jan Beulich <jbeulich@suse.com> ++++++ 53e8be5f-x86-vHPET-use-rwlock-instead-of-simple-one.patch ++++++ References: bnc#882089 # Commit ded2100990d1688b96c2edc7221887c56c1a8e04 # Date 2014-08-11 15:00:15 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/vHPET: use rwlock instead of simple one This namely benefits guests heavily reading the main counter, but not touching the HPET much otherwise. Note that due to the way hpet_get_comparator() works hpet_read() has to special cases reads from the comparator registers and use a write lock there instead of the read one used for all other registers. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/hvm/hpet.c +++ b/xen/arch/x86/hvm/hpet.c @@ -75,7 +75,7 @@ static inline uint64_t hpet_read_maincounter(HPETState *h) { - ASSERT(spin_is_locked(&h->lock)); + ASSERT(rw_is_locked(&h->lock)); if ( hpet_enabled(h) ) return guest_time_hpet(h) + h->mc_offset; @@ -88,6 +88,8 @@ static uint64_t hpet_get_comparator(HPET uint64_t comparator; uint64_t elapsed; + ASSERT(rw_is_write_locked(&h->lock)); + comparator = h->hpet.comparator64[tn]; if ( timer_is_periodic(h, tn) ) { @@ -172,16 +174,24 @@ static int hpet_read( goto out; } - spin_lock(&h->lock); + result = addr < HPET_Tn_CMP(0) || + ((addr - HPET_Tn_CMP(0)) % (HPET_Tn_CMP(1) - HPET_Tn_CMP(0))) > 7; + if ( result ) + read_lock(&h->lock); + else + write_lock(&h->lock); val = hpet_read64(h, addr); + if ( result ) + read_unlock(&h->lock); + else + write_unlock(&h->lock); + result = val; if ( length != 8 ) result = (val >> ((addr & 7) * 8)) & ((1ULL << (length * 8)) - 1); - spin_unlock(&h->lock); - out: *pval = result; return X86EMUL_OKAY; @@ -190,7 +200,7 @@ static int hpet_read( static void hpet_stop_timer(HPETState *h, unsigned int tn) { ASSERT(tn < HPET_TIMER_NUM); - ASSERT(spin_is_locked(&h->lock)); + ASSERT(rw_is_write_locked(&h->lock)); destroy_periodic_time(&h->pt[tn]); /* read the comparator to get it updated so a read while stopped will * return the expected value. */ @@ -208,7 +218,7 @@ static void hpet_set_timer(HPETState *h, unsigned int oneshot; ASSERT(tn < HPET_TIMER_NUM); - ASSERT(spin_is_locked(&h->lock)); + ASSERT(rw_is_write_locked(&h->lock)); if ( (tn == 0) && (h->hpet.config & HPET_CFG_LEGACY) ) { @@ -289,7 +299,7 @@ static int hpet_write( if ( hpet_check_access_length(addr, length) != 0 ) goto out; - spin_lock(&h->lock); + write_lock(&h->lock); old_val = hpet_read64(h, addr); new_val = val; @@ -448,7 +458,7 @@ static int hpet_write( #undef set_start_timer #undef set_restart_timer - spin_unlock(&h->lock); + write_unlock(&h->lock); out: return X86EMUL_OKAY; @@ -473,7 +483,7 @@ static int hpet_save(struct domain *d, h HPETState *hp = domain_vhpet(d); int rc; - spin_lock(&hp->lock); + write_lock(&hp->lock); /* Write the proper value into the main counter */ hp->hpet.mc64 = hp->mc_offset + guest_time_hpet(hp); @@ -507,7 +517,7 @@ static int hpet_save(struct domain *d, h rec->timers[2].cmp = hp->hpet.comparator64[2]; } - spin_unlock(&hp->lock); + write_unlock(&hp->lock); return rc; } @@ -519,12 +529,12 @@ static int hpet_load(struct domain *d, h uint64_t cmp; int i; - spin_lock(&hp->lock); + write_lock(&hp->lock); /* Reload the HPET registers */ if ( _hvm_check_entry(h, HVM_SAVE_CODE(HPET), HVM_SAVE_LENGTH(HPET), 1) ) { - spin_unlock(&hp->lock); + write_unlock(&hp->lock); return -EINVAL; } @@ -564,7 +574,7 @@ static int hpet_load(struct domain *d, h if ( timer_enabled(hp, i) ) hpet_set_timer(hp, i); - spin_unlock(&hp->lock); + write_unlock(&hp->lock); return 0; } @@ -578,7 +588,7 @@ void hpet_init(struct vcpu *v) memset(h, 0, sizeof(HPETState)); - spin_lock_init(&h->lock); + rwlock_init(&h->lock); h->stime_freq = S_TO_NS; @@ -607,14 +617,14 @@ void hpet_deinit(struct domain *d) int i; HPETState *h = domain_vhpet(d); - spin_lock(&h->lock); + write_lock(&h->lock); if ( hpet_enabled(h) ) for ( i = 0; i < HPET_TIMER_NUM; i++ ) if ( timer_enabled(h, i) ) hpet_stop_timer(h, i); - spin_unlock(&h->lock); + write_unlock(&h->lock); } void hpet_reset(struct domain *d) --- a/xen/arch/x86/hvm/vpt.c +++ b/xen/arch/x86/hvm/vpt.c @@ -508,10 +508,10 @@ void pt_adjust_global_vcpu_target(struct pt_adjust_vcpu(&pl_time->vrtc.pt, v); spin_unlock(&pl_time->vrtc.lock); - spin_lock(&pl_time->vhpet.lock); + write_lock(&pl_time->vhpet.lock); for ( i = 0; i < HPET_TIMER_NUM; i++ ) pt_adjust_vcpu(&pl_time->vhpet.pt[i], v); - spin_unlock(&pl_time->vhpet.lock); + write_unlock(&pl_time->vhpet.lock); } --- a/xen/include/asm-x86/hvm/vpt.h +++ b/xen/include/asm-x86/hvm/vpt.h @@ -96,7 +96,7 @@ typedef struct HPETState { uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns */ uint64_t mc_offset; struct periodic_time pt[HPET_TIMER_NUM]; - spinlock_t lock; + rwlock_t lock; } HPETState; typedef struct RTCState { ++++++ 53f737b1-VMX-fix-DebugCtl-MSR-clearing.patch ++++++ # Commit dfa625e15f3d6c374637f2bb789e1f444c2781c3 # Date 2014-08-22 14:29:37 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> VMX: fix DebugCtl MSR clearing The previous shortcut was wrong, as it bypassed the necessary vmwrite: All we really want to avoid if the guest writes zero is to add the MSR to the host-load list. Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> Acked-by: Kevin Tian <kevin.tian@intel.com> --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -2170,8 +2170,6 @@ static int vmx_msr_write_intercept(unsig int i, rc = 0; uint64_t supported = IA32_DEBUGCTLMSR_LBR | IA32_DEBUGCTLMSR_BTF; - if ( !msr_content ) - break; if ( msr_content & ~supported ) { /* Perhaps some other bits are supported in vpmu. */ @@ -2191,12 +2189,10 @@ static int vmx_msr_write_intercept(unsig } if ( (rc < 0) || - (vmx_add_host_load_msr(msr) < 0) ) + (msr_content && (vmx_add_host_load_msr(msr) < 0)) ) hvm_inject_hw_exception(TRAP_machine_check, 0); else - { __vmwrite(GUEST_IA32_DEBUGCTL, msr_content); - } break; } ++++++ 53f7386d-x86-irq-process-softirqs-in-irq-keyhandlers.patch ++++++ # Commit e13b3203990706db1313ec2aadd9a30b249ee793 # Date 2014-08-22 14:32:45 +0200 # Author Andrew Cooper <andrew.cooper3@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86/irq: process softirqs in irq keyhandlers Large machines with lots of interrupts can trip over the Xen watchdog. Suggested-by: Santosh Jodh <Santosh.Jodh@citrix.com> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Tested-by: Santosh Jodh <Santosh.Jodh@citrix.com> # Commit bd083922f9e78ed19ef98e7de372e5f568402ed3 # Date 2014-08-26 17:56:52 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/IO-APIC: don't process softirqs during early boot Commit e13b320399 ("x86/irq: process softirqs in irq keyhandlers") made this unconditional, but the boot time use of __print_IO_APIC() (when "apic_verbosity=debug" was given) can't tolerate that. Reported-by: Sander Eikelenboom <linux@eikelenboom.it> Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> Tested-by: Sander Eikelenboom <linux@eikelenboom.it> --- a/xen/arch/x86/io_apic.c +++ b/xen/arch/x86/io_apic.c @@ -28,6 +28,7 @@ #include <xen/sched.h> #include <xen/acpi.h> #include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/mc146818rtc.h> #include <asm/smp.h> #include <asm/desc.h> @@ -1091,7 +1092,7 @@ static inline void UNEXPECTED_IO_APIC(vo { } -static void /*__init*/ __print_IO_APIC(void) +static void /*__init*/ __print_IO_APIC(bool_t boot) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1112,6 +1113,9 @@ static void /*__init*/ __print_IO_APIC(v printk(KERN_INFO "testing the IO APIC.......................\n"); for (apic = 0; apic < nr_ioapics; apic++) { + if ( !boot ) + process_pending_softirqs(); + if (!nr_ioapic_entries[apic]) continue; @@ -1215,6 +1219,10 @@ static void /*__init*/ __print_IO_APIC(v printk(KERN_DEBUG "IRQ to pin mappings:\n"); for (i = 0; i < nr_irqs_gsi; i++) { struct irq_pin_list *entry = irq_2_pin + i; + + if ( !boot && !(i & 0x1f) ) + process_pending_softirqs(); + if (entry->pin < 0) continue; printk(KERN_DEBUG "IRQ%d ", irq_to_desc(i)->arch.vector); @@ -1235,12 +1243,12 @@ static void /*__init*/ __print_IO_APIC(v static void __init print_IO_APIC(void) { if (apic_verbosity != APIC_QUIET) - __print_IO_APIC(); + __print_IO_APIC(1); } static void _print_IO_APIC_keyhandler(unsigned char key) { - __print_IO_APIC(); + __print_IO_APIC(0); } static struct keyhandler print_IO_APIC_keyhandler = { .diagnostic = 1, @@ -2454,6 +2462,9 @@ void dump_ioapic_irq_info(void) for ( irq = 0; irq < nr_irqs_gsi; irq++ ) { + if ( !(irq & 0x1f) ) + process_pending_softirqs(); + entry = &irq_2_pin[irq]; if ( entry->pin == -1 ) continue; --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -19,6 +19,7 @@ #include <xen/iommu.h> #include <xen/symbols.h> #include <xen/trace.h> +#include <xen/softirq.h> #include <xsm/xsm.h> #include <asm/msi.h> #include <asm/current.h> @@ -2231,6 +2232,8 @@ static void dump_irqs(unsigned char key) for ( irq = 0; irq < nr_irqs; irq++ ) { + if ( !(irq & 0x1f) ) + process_pending_softirqs(); desc = irq_to_desc(irq); @@ -2284,6 +2287,7 @@ static void dump_irqs(unsigned char key) xfree(ssid); } + process_pending_softirqs(); printk("Direct vector information:\n"); for ( i = FIRST_DYNAMIC_VECTOR; i < NR_VECTORS; ++i ) if ( direct_apic_vector[i] ) ++++++ 53ff3659-x86-consolidate-boolean-inputs-in-hvm-and-p2m.patch ++++++ References: bnc#882089 # Commit 3d4d4f9336159f3f77a7b480ce9984fd3ff7949f # Date 2014-08-28 16:02:01 +0200 # Author Tamas K Lengyel <tamas.lengyel@zentific.com> # Committer Jan Beulich <jbeulich@suse.com> x86: consolidate boolean inputs in hvm and p2m into a shared bitmap This patch consolidates the boolean input parameters of hvm_hap_nested_page_fault and p2m_mem_access_check into a common bitmap and defines the bitmap members accordingly. Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> Acked-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Tim Deegan <tim@xen.org> # Commit 24857896a30105b7947e2cd36d63768054538bbc # Date 2014-09-03 15:06:06 +0200 # Author Andrew Cooper <andrew.cooper3@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86/hvm: fix operator precedence bug introduced by 3d4d4f9336 Bitwise or has greater precedence than the ternary operator, making the result of the expression a constant P2M_UNSHARE. Coverity-ID: 1234633 Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Don Slutz <dslutz@verizon.com> --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1464,12 +1464,8 @@ void hvm_inject_page_fault(int errcode, hvm_inject_trap(&trap); } -int hvm_hap_nested_page_fault(paddr_t gpa, - bool_t gla_valid, - unsigned long gla, - bool_t access_r, - bool_t access_w, - bool_t access_x) +int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla, + struct npfec npfec) { unsigned long gfn = gpa >> PAGE_SHIFT; p2m_type_t p2mt; @@ -1498,8 +1494,11 @@ int hvm_hap_nested_page_fault(paddr_t gp * into l1 guest if not fixable. The algorithm is * the same as for shadow paging. */ - rv = nestedhvm_hap_nested_page_fault(v, &gpa, - access_r, access_w, access_x); + + rv = nestedhvm_hap_nested_page_fault(v, &gpa, + npfec.read_access, + npfec.write_access, + npfec.insn_fetch); switch (rv) { case NESTEDHVM_PAGEFAULT_DONE: case NESTEDHVM_PAGEFAULT_RETRY: @@ -1538,47 +1537,49 @@ int hvm_hap_nested_page_fault(paddr_t gp p2m = p2m_get_hostp2m(v->domain); mfn = get_gfn_type_access(p2m, gfn, &p2mt, &p2ma, - P2M_ALLOC | (access_w ? P2M_UNSHARE : 0), NULL); + P2M_ALLOC | (npfec.write_access ? P2M_UNSHARE : 0), + NULL); /* Check access permissions first, then handle faults */ if ( mfn_x(mfn) != INVALID_MFN ) { - int violation = 0; + bool_t violation; + /* If the access is against the permissions, then send to mem_event */ - switch (p2ma) + switch (p2ma) { case p2m_access_n: case p2m_access_n2rwx: default: - violation = access_r || access_w || access_x; + violation = npfec.read_access || npfec.write_access || npfec.insn_fetch; break; case p2m_access_r: - violation = access_w || access_x; + violation = npfec.write_access || npfec.insn_fetch; break; case p2m_access_w: - violation = access_r || access_x; + violation = npfec.read_access || npfec.insn_fetch; break; case p2m_access_x: - violation = access_r || access_w; + violation = npfec.read_access || npfec.write_access; break; case p2m_access_rx: case p2m_access_rx2rw: - violation = access_w; + violation = npfec.write_access; break; case p2m_access_wx: - violation = access_r; + violation = npfec.read_access; break; case p2m_access_rw: - violation = access_x; + violation = npfec.insn_fetch; break; case p2m_access_rwx: + violation = 0; break; } if ( violation ) { - if ( p2m_mem_access_check(gpa, gla_valid, gla, access_r, - access_w, access_x, &req_ptr) ) + if ( p2m_mem_access_check(gpa, gla, npfec, &req_ptr) ) { fall_through = 1; } else { @@ -1594,7 +1595,7 @@ int hvm_hap_nested_page_fault(paddr_t gp * to the mmio handler. */ if ( (p2mt == p2m_mmio_dm) || - (access_w && (p2mt == p2m_ram_ro)) ) + (npfec.write_access && (p2mt == p2m_ram_ro)) ) { put_gfn(p2m->domain, gfn); @@ -1613,7 +1614,7 @@ int hvm_hap_nested_page_fault(paddr_t gp paged = 1; /* Mem sharing: unshare the page and try again */ - if ( access_w && (p2mt == p2m_ram_shared) ) + if ( npfec.write_access && (p2mt == p2m_ram_shared) ) { ASSERT(!p2m_is_nestedp2m(p2m)); sharing_enomem = @@ -1630,7 +1631,7 @@ int hvm_hap_nested_page_fault(paddr_t gp * a large page, we do not change other pages type within that large * page. */ - if ( access_w ) + if ( npfec.write_access ) { paging_mark_dirty(v->domain, mfn_x(mfn)); p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); @@ -1640,7 +1641,7 @@ int hvm_hap_nested_page_fault(paddr_t gp } /* Shouldn't happen: Maybe the guest was writing to a r/o grant mapping? */ - if ( access_w && (p2mt == p2m_grant_map_ro) ) + if ( npfec.write_access && (p2mt == p2m_grant_map_ro) ) { gdprintk(XENLOG_WARNING, "trying to write to read-only grant mapping\n"); --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -1289,7 +1289,7 @@ const struct hvm_function_table * __init } static void svm_do_nested_pgfault(struct vcpu *v, - struct cpu_user_regs *regs, uint32_t npfec, paddr_t gpa) + struct cpu_user_regs *regs, uint32_t pfec, paddr_t gpa) { int ret; unsigned long gfn = gpa >> PAGE_SHIFT; @@ -1298,10 +1298,13 @@ static void svm_do_nested_pgfault(struct p2m_access_t p2ma; struct p2m_domain *p2m = NULL; - ret = hvm_hap_nested_page_fault(gpa, 0, ~0ul, - 1, /* All NPFs count as reads */ - npfec & PFEC_write_access, - npfec & PFEC_insn_fetch); + struct npfec npfec = { + .read_access = 1, /* All NPFs count as reads */ + .write_access = !!(pfec & PFEC_write_access), + .insn_fetch = !!(pfec & PFEC_insn_fetch) + }; + + ret = hvm_hap_nested_page_fault(gpa, ~0ul, npfec); if ( tb_init_done ) { @@ -1329,7 +1332,7 @@ static void svm_do_nested_pgfault(struct case -1: ASSERT(nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v)); /* inject #VMEXIT(NPF) into guest. */ - nestedsvm_vmexit_defer(v, VMEXIT_NPF, npfec, gpa); + nestedsvm_vmexit_defer(v, VMEXIT_NPF, pfec, gpa); return; } --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -2278,6 +2278,11 @@ static void ept_handle_violation(unsigne p2m_type_t p2mt; int ret; struct domain *d = current->domain; + struct npfec npfec = { + .read_access = !!(qualification & EPT_READ_VIOLATION), + .write_access = !!(qualification & EPT_WRITE_VIOLATION), + .insn_fetch = !!(qualification & EPT_EXEC_VIOLATION) + }; if ( tb_init_done ) { @@ -2296,14 +2301,14 @@ static void ept_handle_violation(unsigne } if ( qualification & EPT_GLA_VALID ) + { __vmread(GUEST_LINEAR_ADDRESS, &gla); + npfec.gla_valid = 1; + } else gla = ~0ull; - ret = hvm_hap_nested_page_fault(gpa, - !!(qualification & EPT_GLA_VALID), gla, - !!(qualification & EPT_READ_VIOLATION), - !!(qualification & EPT_WRITE_VIOLATION), - !!(qualification & EPT_EXEC_VIOLATION)); + + ret = hvm_hap_nested_page_fault(gpa, gla, npfec); switch ( ret ) { case 0: // Unhandled L1 EPT violation --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -1261,9 +1261,9 @@ void p2m_mem_paging_resume(struct domain } } -bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla, - bool_t access_r, bool_t access_w, bool_t access_x, - mem_event_request_t **req_ptr) +bool_t p2m_mem_access_check(paddr_t gpa, unsigned long gla, + struct npfec npfec, + mem_event_request_t **req_ptr) { struct vcpu *v = current; unsigned long gfn = gpa >> PAGE_SHIFT; @@ -1281,7 +1281,7 @@ bool_t p2m_mem_access_check(paddr_t gpa, gfn_lock(p2m, gfn, 0); mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, 0, NULL); - if ( access_w && p2ma == p2m_access_rx2rw ) + if ( npfec.write_access && p2ma == p2m_access_rx2rw ) { rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rw); ASSERT(rc); @@ -1290,7 +1290,7 @@ bool_t p2m_mem_access_check(paddr_t gpa, } else if ( p2ma == p2m_access_n2rwx ) { - ASSERT(access_w || access_r || access_x); + ASSERT(npfec.write_access || npfec.read_access || npfec.insn_fetch); rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rwx); ASSERT(rc); @@ -1341,11 +1341,11 @@ bool_t p2m_mem_access_check(paddr_t gpa, /* Send request to mem event */ req->gfn = gfn; req->offset = gpa & ((1 << PAGE_SHIFT) - 1); - req->gla_valid = gla_valid; + req->gla_valid = npfec.gla_valid; req->gla = gla; - req->access_r = access_r; - req->access_w = access_w; - req->access_x = access_x; + req->access_r = npfec.read_access; + req->access_w = npfec.write_access; + req->access_x = npfec.insn_fetch; req->vcpu_id = v->vcpu_id; } --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -435,11 +435,8 @@ static inline void hvm_invalidate_regs_f #endif } -int hvm_hap_nested_page_fault(paddr_t gpa, - bool_t gla_valid, unsigned long gla, - bool_t access_r, - bool_t access_w, - bool_t access_x); +int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla, + struct npfec npfec); #define hvm_msr_tsc_aux(v) ({ \ struct domain *__d = (v)->domain; \ --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -551,6 +551,16 @@ void audit_domains(void); #endif +/* + * Nested page fault exception codes. + */ +struct npfec { + unsigned int read_access:1; + unsigned int write_access:1; + unsigned int insn_fetch:1; + unsigned int gla_valid:1; +}; + int new_guest_cr3(unsigned long pfn); void make_cr3(struct vcpu *v, unsigned long mfn); void update_cr3(struct vcpu *v); --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -568,9 +568,9 @@ void p2m_mem_paging_resume(struct domain * been promoted with no underlying vcpu pause. If the req_ptr has been populated, * then the caller must put the event in the ring (once having released get_gfn* * locks -- caller must also xfree the request. */ -bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla, - bool_t access_r, bool_t access_w, bool_t access_x, - mem_event_request_t **req_ptr); +bool_t p2m_mem_access_check(paddr_t gpa, unsigned long gla, + struct npfec npfec, + mem_event_request_t **req_ptr); /* Resumes the running of the VCPU, restarting the last instruction */ void p2m_mem_access_resume(struct domain *d); ++++++ 53ff36ae-x86-hvm-treat-non-insn-fetch-NPF-also-as-read-violations.patch ++++++ References: bnc#882089 # Commit 401d5c5cc5a780cad160aa0e3c282c11ac11dd0c # Date 2014-08-28 16:03:26 +0200 # Author Tamas K Lengyel <tamas.lengyel@zentific.com> # Committer Jan Beulich <jbeulich@suse.com> x86/hvm: treat non-instruction fetch nested page faults also as read violations As pointed out by Jan Beulich in http://lists.xen.org/archives/html/xen-devel/2014-08/msg01269.html: "Read-modify-write instructions absolutely need to be treated as read accesses, yet hardware doesn't guarantee to tell us so (they may surface as just write accesses)." This patch addresses the issue in both the VMX and the SVM side. VMX: Treat all write data access violations also as read violations (in addition to those that were already reported as read violations). SVM: Refine the meaning of read data access violations to distinguish between read/write and instruction fetch access violations. With this patch both VMX and SVM specific nested page fault handling code reports violations the same way, thus abstracting the hardware specific behaviour from the layers above. Suggested-by: Jan Beulich <JBeulich@suse.com> Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Reviewed-by: Tim Deegan <tim@xen.org> --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -1298,8 +1298,13 @@ static void svm_do_nested_pgfault(struct p2m_access_t p2ma; struct p2m_domain *p2m = NULL; + /* + * Since HW doesn't explicitly provide a read access bit and we need to + * somehow describe read-modify-write instructions we will conservatively + * set read_access for all memory accesses that are not instruction fetches. + */ struct npfec npfec = { - .read_access = 1, /* All NPFs count as reads */ + .read_access = !(pfec & PFEC_insn_fetch), .write_access = !!(pfec & PFEC_write_access), .insn_fetch = !!(pfec & PFEC_insn_fetch) }; --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -2278,8 +2278,21 @@ static void ept_handle_violation(unsigne p2m_type_t p2mt; int ret; struct domain *d = current->domain; + + /* + * We treat all write violations also as read violations. + * The reason why this is required is the following warning: + * "An EPT violation that occurs during as a result of execution of a + * read-modify-write operation sets bit 1 (data write). Whether it also + * sets bit 0 (data read) is implementation-specific and, for a given + * implementation, may differ for different kinds of read-modify-write + * operations." + * - Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 3C: System Programming Guide, Part 3 + */ struct npfec npfec = { - .read_access = !!(qualification & EPT_READ_VIOLATION), + .read_access = !!(qualification & EPT_READ_VIOLATION) || + !!(qualification & EPT_WRITE_VIOLATION), .write_access = !!(qualification & EPT_WRITE_VIOLATION), .insn_fetch = !!(qualification & EPT_EXEC_VIOLATION) }; ++++++ 53ff36d5-x86-mem_event-deliver-gla-fault-EPT-violation-information.patch ++++++ References: bnc#882089 # Commit 692f3cc7dd05b80dbd027e46372b1c25d7975332 # Date 2014-08-28 16:04:05 +0200 # Author Tamas K Lengyel <tamas.lengyel@zentific.com> # Committer Jan Beulich <jbeulich@suse.com> x86/mem_event: deliver gla fault EPT violation information On Intel EPT the exit qualification generated by a violation also includes a bit (EPT_GLA_FAULT) which describes the following information: Set if the access causing the EPT violation is to a guest-physical address that is the translation of a linear address. Clear if the access causing the EPT violation is to a paging-structure entry as part of a page walk or the update of an accessed or dirty bit. For more information see Table 27-7 in the Intel SDM. This patch extends the mem_event system to deliver this extra information, which could be useful for determining the cause of a violation. Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> Acked-by: Kevin Tian <kevin.tian@intel.com> Acked-by: Tim Deegan <tim@xen.org> --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -1289,7 +1289,7 @@ const struct hvm_function_table * __init } static void svm_do_nested_pgfault(struct vcpu *v, - struct cpu_user_regs *regs, uint32_t pfec, paddr_t gpa) + struct cpu_user_regs *regs, uint64_t pfec, paddr_t gpa) { int ret; unsigned long gfn = gpa >> PAGE_SHIFT; @@ -1309,6 +1309,12 @@ static void svm_do_nested_pgfault(struct .insn_fetch = !!(pfec & PFEC_insn_fetch) }; + /* These bits are mutually exclusive */ + if ( pfec & NPT_PFEC_with_gla ) + npfec.kind = npfec_kind_with_gla; + else if ( pfec & NPT_PFEC_in_gpt ) + npfec.kind = npfec_kind_in_gpt; + ret = hvm_hap_nested_page_fault(gpa, ~0ul, npfec); if ( tb_init_done ) --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -2317,6 +2317,10 @@ static void ept_handle_violation(unsigne { __vmread(GUEST_LINEAR_ADDRESS, &gla); npfec.gla_valid = 1; + if( qualification & EPT_GLA_FAULT ) + npfec.kind = npfec_kind_with_gla; + else + npfec.kind = npfec_kind_in_gpt; } else gla = ~0ull; --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -1343,10 +1343,13 @@ bool_t p2m_mem_access_check(paddr_t gpa, req->offset = gpa & ((1 << PAGE_SHIFT) - 1); req->gla_valid = npfec.gla_valid; req->gla = gla; + if ( npfec.kind == npfec_kind_with_gla ) + req->fault_with_gla = 1; + else if ( npfec.kind == npfec_kind_in_gpt ) + req->fault_in_gpt = 1; req->access_r = npfec.read_access; req->access_w = npfec.write_access; req->access_x = npfec.insn_fetch; - req->vcpu_id = v->vcpu_id; } --- a/xen/include/asm-x86/hvm/svm/svm.h +++ b/xen/include/asm-x86/hvm/svm/svm.h @@ -105,4 +105,10 @@ extern u32 svm_feature_flags; extern void svm_host_osvw_reset(void); extern void svm_host_osvw_init(void); +/* EXITINFO1 fields on NPT faults */ +#define _NPT_PFEC_with_gla 32 +#define NPT_PFEC_with_gla (1UL<<_NPT_PFEC_with_gla) +#define _NPT_PFEC_in_gpt 33 +#define NPT_PFEC_in_gpt (1UL<<_NPT_PFEC_in_gpt) + #endif /* __ASM_X86_HVM_SVM_H__ */ --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -552,6 +552,16 @@ void audit_domains(void); #endif /* + * Extra fault info types which are used to further describe + * the source of an access violation. + */ +typedef enum { + npfec_kind_unknown, /* must be first */ + npfec_kind_in_gpt, /* violation in guest page table */ + npfec_kind_with_gla /* violation with guest linear address */ +} npfec_kind_t; + +/* * Nested page fault exception codes. */ struct npfec { @@ -559,6 +569,7 @@ struct npfec { unsigned int write_access:1; unsigned int insn_fetch:1; unsigned int gla_valid:1; + unsigned int kind:2; /* npfec_kind_t */ }; int new_guest_cr3(unsigned long pfn); --- a/xen/include/public/mem_event.h +++ b/xen/include/public/mem_event.h @@ -62,7 +62,9 @@ typedef struct mem_event_st { uint16_t access_w:1; uint16_t access_x:1; uint16_t gla_valid:1; - uint16_t available:12; + uint16_t fault_with_gla:1; + uint16_t fault_in_gpt:1; + uint16_t available:10; uint16_t reason; } mem_event_request_t, mem_event_response_t; ++++++ 53ff3716-x86-ats-Disable-Address-Translation-Services-by-default.patch ++++++ # Commit ad6eddb742577d182e634785bcfaf92732a50024 # Date 2014-08-28 16:05:10 +0200 # Author Andrew Cooper <andrew.cooper3@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86/ats: Disable Address Translation Services by default Xen cannot safely use any ATS functionality until it gains asynchronous queued invalidation support, because of the current synchronous wait for completion. Do not turn ATS on by default. While editing the default in the command line documentation, correct the statement regarding PCI Passthrough. ATS is purely a performance optimisation, and is certainly not required for PCI Passthrough to function. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> Acked-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -167,10 +167,13 @@ developers wishing Xen to fall back to o ### ats
`= <boolean>`
-> Default: `true` +> Default: `false` + +Permits Xen to set up and use PCI Address Translation Services. This is a +performance optimisation for PCI Passthrough. -Permits Xen to set up and use PCI Address Translation Services, which -is required for PCI Passthrough. +**WARNING: Xen cannot currently safely use ATS because of its synchronous wait +loops for Queued Invalidation completions.** ### availmem
`= <size>` --- a/xen/drivers/passthrough/x86/ats.c +++ b/xen/drivers/passthrough/x86/ats.c @@ -20,7 +20,7 @@
LIST_HEAD(ats_devices); -bool_t __read_mostly ats_enabled = 1; +bool_t __read_mostly ats_enabled = 0; boolean_param("ats", ats_enabled); int enable_ats_device(int seg, int bus, int devfn, const void *iommu) ++++++ 53ff3899-x86-NMI-allow-processing-unknown-NMIs-with-watchdog.patch ++++++ # Commit 3ea2ba980afe7356c613c8e1ba00d223d1c25412 # Date 2014-08-28 16:11:37 +0200 # Author Ross Lagerwall <ross.lagerwall@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86/NMI: allow processing unknown NMIs when watchdog is enabled Change NMI processing so that if watchdog=force is passed on the command-line and the NMI is not caused by a perf counter overflow (i.e. likely not a watchdog "tick"), the NMI is handled by the unknown NMI handler. This allows injection of NMIs from IPMI controllers that don't set the IOCK/SERR bits to trigger the unknown NMI handler rather than be ignored. Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> Fix command line parsing (don't enable the watchdog on e.g. "watchdog=xyz"). Signed-off-by: Jan Beulich <jbeulich@suse.com> # Commit fd553ae5f0f57baa63d033bedee84f607de57d33 # Date 2014-09-03 15:09:59 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> x86/NMI: allow passing just "watchdog" again This capability got inadvertently lost in commit 3ea2ba980a ("x86/NMI: allow processing unknown NMIs when watchdog is enabled") due to an oversight of mine. Reported-by: Ross Lagerwall <ross.lagerwall@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -1039,12 +1039,14 @@ As the BTS virtualisation is not 100% sa don't use the vpmu flag on production systems with Intel cpus! ### watchdog -> `= <boolean>` +> `= force | <boolean>`
Default: `false`
Run an NMI watchdog on each processor. If a processor is stuck for -longer than the **watchdog\_timeout**, a panic occurs. +longer than the **watchdog\_timeout**, a panic occurs. When `force` is +specified, in addition to running an NMI watchdog on each processor, +unknown NMIs will still be processed. ### watchdog\_timeout
`= <integer>` --- a/xen/arch/x86/nmi.c +++ b/xen/arch/x86/nmi.c @@ -43,7 +43,32 @@ static DEFINE_PER_CPU(unsigned int, nmi_
/* opt_watchdog: If true, run a watchdog NMI on each processor. */ bool_t __initdata opt_watchdog = 0; -boolean_param("watchdog", opt_watchdog); + +/* watchdog_force: If true, process unknown NMIs when running the watchdog. */ +bool_t watchdog_force = 0; + +static void __init parse_watchdog(char *s) +{ + if ( !*s ) + { + opt_watchdog = 1; + return; + } + + switch ( parse_bool(s) ) + { + case 0: + opt_watchdog = 0; + return; + case 1: + opt_watchdog = 1; + return; + } + + if ( !strcmp(s, "force") ) + watchdog_force = opt_watchdog = 1; +} +custom_param("watchdog", parse_watchdog); /* opt_watchdog_timeout: Number of seconds to wait before panic. */ static unsigned int opt_watchdog_timeout = 5; @@ -82,6 +107,7 @@ int nmi_active; #define K7_EVNTSEL_USR (1 << 16) #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING +#define K7_EVENT_WIDTH 32 #define P6_EVNTSEL0_ENABLE (1 << 22) #define P6_EVNTSEL_INT (1 << 20) @@ -89,10 +115,12 @@ int nmi_active; #define P6_EVNTSEL_USR (1 << 16) #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 #define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c +#define P6_EVENT_WIDTH 32 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) #define P4_CCCR_OVF_PMI0 (1<<26) #define P4_CCCR_OVF_PMI1 (1<<27) +#define P4_CCCR_OVF (1<<31) #define P4_CCCR_THRESHOLD(N) ((N)<<20) #define P4_CCCR_COMPLEMENT (1<<19) #define P4_CCCR_COMPARE (1<<18) @@ -433,8 +461,10 @@ int __init watchdog_setup(void) return 0; } -void nmi_watchdog_tick(struct cpu_user_regs * regs) +/* Returns false if this was not a watchdog NMI, true otherwise */ +bool_t nmi_watchdog_tick(struct cpu_user_regs *regs) { + bool_t watchdog_tick = 1; unsigned int sum = this_cpu(nmi_timer_ticks); if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() ) @@ -460,8 +490,15 @@ void nmi_watchdog_tick(struct cpu_user_r if ( nmi_perfctr_msr ) { + uint64_t msr_content; + + /* Work out if this is a watchdog tick by checking for overflow. */ if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 ) { + rdmsrl(MSR_P4_IQ_CCCR0, msr_content); + if ( !(msr_content & P4_CCCR_OVF) ) + watchdog_tick = 0; + /* * P4 quirks: * - An overflown perfctr will assert its interrupt @@ -474,14 +511,26 @@ void nmi_watchdog_tick(struct cpu_user_r } else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 ) { + rdmsrl(MSR_P6_PERFCTR0, msr_content); + if ( msr_content & (1ULL << P6_EVENT_WIDTH) ) + watchdog_tick = 0; + /* * Only P6 based Pentium M need to re-unmask the apic vector but * it doesn't hurt other P6 variants. */ apic_write(APIC_LVTPC, APIC_DM_NMI); } + else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 ) + { + rdmsrl(MSR_K7_PERFCTR0, msr_content); + if ( msr_content & (1ULL << K7_EVENT_WIDTH) ) + watchdog_tick = 0; + } write_watchdog_counter(NULL); } + + return watchdog_tick; } /* --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -3226,14 +3226,15 @@ void do_nmi(struct cpu_user_regs *regs) { unsigned int cpu = smp_processor_id(); unsigned char reason; + bool_t handle_unknown = 0; ++nmi_count(cpu); if ( nmi_callback(regs, cpu) ) return; - if ( nmi_watchdog ) - nmi_watchdog_tick(regs); + if ( !nmi_watchdog || (!nmi_watchdog_tick(regs) && watchdog_force) ) + handle_unknown = 1; /* Only the BSP gets external NMIs from the system. */ if ( cpu == 0 ) @@ -3243,7 +3244,7 @@ void do_nmi(struct cpu_user_regs *regs) pci_serr_error(regs); if ( reason & 0x40 ) io_check_error(regs); - if ( !(reason & 0xc0) && !nmi_watchdog ) + if ( !(reason & 0xc0) && handle_unknown ) unknown_nmi_error(regs, reason); } } --- a/xen/include/asm-x86/apic.h +++ b/xen/include/asm-x86/apic.h @@ -206,7 +206,7 @@ extern void release_lapic_nmi(void); extern void self_nmi(void); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); -extern void nmi_watchdog_tick (struct cpu_user_regs *regs); +extern bool_t nmi_watchdog_tick (struct cpu_user_regs *regs); extern int APIC_init_uniprocessor (void); extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); --- a/xen/include/asm-x86/nmi.h +++ b/xen/include/asm-x86/nmi.h @@ -8,6 +8,9 @@ struct cpu_user_regs; /* Watchdog boolean from the command line */ extern bool_t opt_watchdog; + +/* Watchdog force parameter from the command line */ +extern bool_t watchdog_force; typedef int (*nmi_callback_t)(struct cpu_user_regs *regs, int cpu); ++++++ 54005472-EPT-utilize-GLA-GPA-translation-known-for-certain-faults.patch ++++++ References: bnc#882089 # Commit ecb69533582e51999e5d76bce513be870222908f # Date 2014-08-29 12:22:42 +0200 # Author Jan Beulich <jbeulich@suse.com> # Committer Jan Beulich <jbeulich@suse.com> EPT: utilize GLA->GPA translation known for certain faults Rather than doing the translation ourselves in __hvmemul_{read,write}() leverage that we know the association for faults other than such having occurred when translating addresses of page tables. There is one intentional but not necessarily obvious (and possibly subtle) adjustment to behavior: __hvmemul_read() no longer blindly bails on instruction fetches matching the MMIO GVA (the callers of handle_mmio_with_translation() now control the behavior via the struct npfec they pass, and it didn't seem right to bail here rather than just falling through to the unaccelerated path) Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Tim Deegan <tim@xen.org> --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -496,10 +496,11 @@ static int __hvmemul_read( while ( off & (chunk - 1) ) chunk >>= 1; - if ( unlikely(vio->mmio_gva == (addr & PAGE_MASK)) && vio->mmio_gva ) + if ( ((access_type != hvm_access_insn_fetch + ? vio->mmio_access.read_access + : vio->mmio_access.insn_fetch)) && + (vio->mmio_gva == (addr & PAGE_MASK)) ) { - if ( access_type == hvm_access_insn_fetch ) - return X86EMUL_UNHANDLEABLE; gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off); while ( (off + chunk) <= PAGE_SIZE ) { @@ -639,7 +640,8 @@ static int hvmemul_write( while ( off & (chunk - 1) ) chunk >>= 1; - if ( unlikely(vio->mmio_gva == (addr & PAGE_MASK)) && vio->mmio_gva ) + if ( vio->mmio_access.write_access && + (vio->mmio_gva == (addr & PAGE_MASK)) ) { gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off); while ( (off + chunk) <= PAGE_SIZE ) --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1529,7 +1529,7 @@ int hvm_hap_nested_page_fault(paddr_t gp && is_hvm_vcpu(v) && hvm_mmio_internal(gpa) ) { - if ( !handle_mmio() ) + if ( !handle_mmio_with_translation(gla, gpa >> PAGE_SHIFT, npfec) ) hvm_inject_hw_exception(TRAP_gp_fault, 0); rc = 1; goto out; @@ -1603,7 +1603,7 @@ int hvm_hap_nested_page_fault(paddr_t gp if ( unlikely(is_pvh_vcpu(v)) ) goto out; - if ( !handle_mmio() ) + if ( !handle_mmio_with_translation(gla, gpa >> PAGE_SHIFT, npfec) ) hvm_inject_hw_exception(TRAP_gp_fault, 0); rc = 1; goto out; --- a/xen/arch/x86/hvm/io.c +++ b/xen/arch/x86/hvm/io.c @@ -189,7 +189,7 @@ int handle_mmio(void) if ( vio->io_state == HVMIO_awaiting_completion ) vio->io_state = HVMIO_handle_mmio_awaiting_completion; else - vio->mmio_gva = 0; + vio->mmio_access = (struct npfec){}; switch ( rc ) { @@ -218,9 +218,14 @@ int handle_mmio(void) return 1; } -int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn) +int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn, + struct npfec access) { struct hvm_vcpu_io *vio = ¤t->arch.hvm_vcpu.hvm_io; + + vio->mmio_access = access.gla_valid && + access.kind == npfec_kind_with_gla + ? access : (struct npfec){}; vio->mmio_gva = gva & PAGE_MASK; vio->mmio_gpfn = gpfn; return handle_mmio(); --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -2839,6 +2839,11 @@ static int sh_page_fault(struct vcpu *v, p2m_type_t p2mt; uint32_t rc; int version; + struct npfec access = { + .read_access = 1, + .gla_valid = 1, + .kind = npfec_kind_with_gla + }; #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION int fast_emul = 0; #endif @@ -2849,6 +2854,9 @@ static int sh_page_fault(struct vcpu *v, perfc_incr(shadow_fault); + if ( regs->error_code & PFEC_write_access ) + access.write_access = 1; + #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION /* If faulting frame is successfully emulated in last shadow fault * it's highly likely to reach same emulation action for this frame. @@ -2950,7 +2958,7 @@ static int sh_page_fault(struct vcpu *v, SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa); reset_early_unshadow(v); trace_shadow_gen(TRC_SHADOW_FAST_MMIO, va); - return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT) + return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, access) ? EXCRET_fault_fixed : 0); } else @@ -3447,7 +3455,7 @@ static int sh_page_fault(struct vcpu *v, paging_unlock(d); put_gfn(d, gfn_x(gfn)); trace_shadow_gen(TRC_SHADOW_MMIO, va); - return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT) + return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, access) ? EXCRET_fault_fixed : 0); not_a_shadow_fault: --- a/xen/include/asm-x86/hvm/io.h +++ b/xen/include/asm-x86/hvm/io.h @@ -119,7 +119,8 @@ static inline void register_buffered_io_ void send_timeoffset_req(unsigned long timeoff); void send_invalidate_req(void); int handle_mmio(void); -int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn); +int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn, + struct npfec); int handle_pio(uint16_t port, unsigned int size, int dir); void hvm_interrupt_post(struct vcpu *v, int vector, int type); void hvm_io_assist(ioreq_t *p); --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -54,8 +54,9 @@ struct hvm_vcpu_io { * HVM emulation: * Virtual address @mmio_gva maps to MMIO physical frame @mmio_gpfn. * The latter is known to be an MMIO frame (not RAM). - * This translation is only valid if @mmio_gva is non-zero. + * This translation is only valid for accesses as per @mmio_access. */ + struct npfec mmio_access; unsigned long mmio_gva; unsigned long mmio_gpfn; ++++++ CVE-2013-4540-qemu.patch ++++++ References: bnc#864801 Subject: zaurus: fix buffer overrun on invalid state load From: Michael S. Tsirkin mst@redhat.com Thu Apr 3 19:52:13 2014 +0300 Date: Mon May 5 22:15:02 2014 +0200: Git: 52f91c3723932f8340fe36c8ec8b18a757c37b2b CVE-2013-4540 Within scoop_gpio_handler_update, if prev_level has a high bit set, then we get bit > 16 and that causes a buffer overrun. Since prev_level comes from wire indirectly, this can happen on invalid state load. Similarly for gpio_level and gpio_dir. To fix, limit to 16 bit. Reported-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Signed-off-by: Juan Quintela <quintela@redhat.com> Index: xen-4.4.1-testing/tools/qemu-xen-dir-remote/hw/gpio/zaurus.c =================================================================== --- xen-4.4.1-testing.orig/tools/qemu-xen-dir-remote/hw/gpio/zaurus.c +++ xen-4.4.1-testing/tools/qemu-xen-dir-remote/hw/gpio/zaurus.c @@ -203,6 +203,15 @@ static bool is_version_0 (void *opaque, return version_id == 0; } +static bool vmstate_scoop_validate(void *opaque, int version_id) +{ + ScoopInfo *s = opaque; + + return !(s->prev_level & 0xffff0000) && + !(s->gpio_level & 0xffff0000) && + !(s->gpio_dir & 0xffff0000); +} + static const VMStateDescription vmstate_scoop_regs = { .name = "scoop", .version_id = 1, @@ -215,6 +224,7 @@ static const VMStateDescription vmstate_ VMSTATE_UINT32(gpio_level, ScoopInfo), VMSTATE_UINT32(gpio_dir, ScoopInfo), VMSTATE_UINT32(prev_level, ScoopInfo), + VMSTATE_VALIDATE("irq levels are 16 bit", vmstate_scoop_validate), VMSTATE_UINT16(mcr, ScoopInfo), VMSTATE_UINT16(cdr, ScoopInfo), VMSTATE_UINT16(ccr, ScoopInfo), ++++++ README.SUSE ++++++ --- /var/tmp/diff_new_pack.SmKn8C/_old 2014-09-06 12:17:53.000000000 +0200 +++ /var/tmp/diff_new_pack.SmKn8C/_new 2014-09-06 12:17:53.000000000 +0200 @@ -550,16 +550,16 @@ Grub2 Example: Edit /etc/default/grub and add, - GRUB_CMDLINE_XEN_DEFAULT="loglvl=all loglvl_guest=all" + GRUB_CMDLINE_XEN_DEFAULT="loglvl=all guest_loglvl=all" and then run, grub2-mkconfig -o /boot/grub2/grub.cfg Grub1 Example: Edit /boot/grub/menu.lst and edit the line containing xen.gz - kernel /boot/xen.gz loglvl=all loglvl_guest=all + kernel /boot/xen.gz loglvl=all guest_loglvl=all 2) With the log levels specified above and the host rebooted, more useful -information about domain 0 and running VMs can be obtained using using the +information about domain 0 and running VMs can be obtained using the 'xl dmesg' and 'xl debug-keys' commands. For example, from the command line run: xl debug-keys h @@ -581,7 +581,7 @@ Grub2 Example: Edit /etc/default/grub and add, - GRUB_CMDLINE_XEN_DEFAULT="loglvl=all loglvl_guest=all console=com1 com1=115200,8n1" + GRUB_CMDLINE_XEN_DEFAULT="loglvl=all guest_loglvl=all console=com1 com1=115200,8n1" Also append additional serial flags to the option below such that it appears as, GRUB_CMDLINE_LINUX_DEFAULT="<pre-existing flags> console=ttyS0, 115200" where pre-existing flags are those options already present and then run, @@ -600,7 +600,7 @@ Grub2 Example: Edit /etc/default/grub and add, - GRUB_CMDLINE_XEN_DEFAULT="noreboot loglvl=all loglvl_guest=all" + GRUB_CMDLINE_XEN_DEFAULT="noreboot loglvl=all guest_loglvl=all" Edit /etc/grub.d/20_linux_xen file. Look for this line: while [ "x${xen_list}" != "x" ] ; do and add *before* the above line something like this: @@ -616,7 +616,7 @@ Edit your menu.lst configuration from something like this: kernel (hd0,5)/xen.gz To something like this: - kernel (hd0,5)/xen-dbg.gz noreboot loglvl=all loglvl_guest=all + kernel (hd0,5)/xen-dbg.gz noreboot loglvl=all guest_loglvl=all All hypervisor options require a reboot to take effect. After rebooting, the Xen hypervisor will write any error messages to the log file (viewable with ++++++ tigervnc-long-press.patch ++++++ Index: xen-4.4.1-testing/tools/qemu-xen-dir-remote/ui/vnc.c =================================================================== --- xen-4.4.1-testing.orig/tools/qemu-xen-dir-remote/ui/vnc.c +++ xen-4.4.1-testing/tools/qemu-xen-dir-remote/ui/vnc.c @@ -1651,6 +1651,25 @@ static void do_key_event(VncState *vs, i if (down) vs->modifiers_state[keycode] ^= 1; break; + default: + if (qemu_console_is_graphic(NULL)) { + /* record key 'down' info. Some client like tigervnc + * will send key down repeatedly if user pressing a + * a key for long time. In this case, we should add + * additional key up event before repeated key down, + * so that it can display the key multiple times. + */ + if (down) { + if (vs->modifiers_state[keycode]) { + /* add a key up event */ + do_key_event(vs, 0, keycode, sym); + } + vs->modifiers_state[keycode] = 1; + } else { + vs->modifiers_state[keycode] = 0; + } + } + break; } /* Turn off the lock state sync logic if the client support the led ++++++ x86-ioapic-ack-default.patch ++++++ --- /var/tmp/diff_new_pack.SmKn8C/_old 2014-09-06 12:17:53.000000000 +0200 +++ /var/tmp/diff_new_pack.SmKn8C/_new 2014-09-06 12:17:53.000000000 +0200 @@ -1,10 +1,8 @@ Change default IO-APIC ack mode for single IO-APIC systems to old-style. -Index: xen-4.3.0-testing/xen/arch/x86/io_apic.c -=================================================================== ---- xen-4.3.0-testing.orig/xen/arch/x86/io_apic.c -+++ xen-4.3.0-testing/xen/arch/x86/io_apic.c -@@ -2026,7 +2026,10 @@ void __init setup_IO_APIC(void) +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -2034,7 +2034,10 @@ void __init setup_IO_APIC(void) io_apic_irqs = ~PIC_IRQS; printk("ENABLING IO-APIC IRQs\n"); ++++++ xen-4.4.1-testing-src.tar.bz2 ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xen-4.4.1-testing/ChangeLog new/xen-4.4.1-testing/ChangeLog --- old/xen-4.4.1-testing/ChangeLog 2014-08-07 18:29:51.000000000 +0200 +++ new/xen-4.4.1-testing/ChangeLog 2014-09-02 16:44:07.000000000 +0200 @@ -1,5 +1,5 @@ -commit 0f3cdfc4d7fa1e3dc93cc6153782872d90f25b53 +commit d5a7ed88d86f840c0cc26ebc48987101669b5bf7 Author: Jan Beulich <jbeulich@suse.com> -Date: Tue Aug 5 13:41:22 2014 +0200 +Date: Tue Sep 2 08:20:19 2014 +0200 - update Xen version to 4.4.1-rc2 + update Xen version to 4.4.1 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xen-4.4.1-testing/Config.mk new/xen-4.4.1-testing/Config.mk --- old/xen-4.4.1-testing/Config.mk 2014-08-07 18:29:51.000000000 +0200 +++ new/xen-4.4.1-testing/Config.mk 2014-09-02 16:44:07.000000000 +0200 @@ -234,7 +234,7 @@ SEABIOS_UPSTREAM_URL ?= git://xenbits.xen.org/seabios.git endif OVMF_UPSTREAM_REVISION ?= 447d264115c476142f884af0be287622cd244423 -QEMU_UPSTREAM_REVISION ?= qemu-xen-4.4.1-rc1 +QEMU_UPSTREAM_REVISION ?= qemu-xen-4.4.1 SEABIOS_UPSTREAM_TAG ?= rel-1.7.3.1 # Fri Aug 2 14:12:09 2013 -0400 # Fix bug in CBFS file walking with compressed files. @@ -246,7 +246,7 @@ # CONFIG_QEMU ?= `pwd`/$(XEN_ROOT)/../qemu-xen.git CONFIG_QEMU ?= $(QEMU_REMOTE) -QEMU_TAG ?= xen-4.4.1-rc2 +QEMU_TAG ?= xen-4.4.1 # Tue Apr 8 16:50:06 2014 +0000 # qemu-xen-trad: free all the pirqs for msi/msix when driver unloads diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xen-4.4.1-testing/tools/tests/x86_emulator/test_x86_emulator.c new/xen-4.4.1-testing/tools/tests/x86_emulator/test_x86_emulator.c --- old/xen-4.4.1-testing/tools/tests/x86_emulator/test_x86_emulator.c 2014-08-07 18:29:51.000000000 +0200 +++ new/xen-4.4.1-testing/tools/tests/x86_emulator/test_x86_emulator.c 2014-09-02 16:44:07.000000000 +0200 @@ -597,23 +597,32 @@ printf("skipped\n"); #endif +#define decl_insn(which) extern const unsigned char which[], which##_len[] +#define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \ + #which ": " insn "\n" \ + ".equ " #which "_len, .-" #which "\n" \ + ".popsection" +#define set_insn(which) (regs.eip = (unsigned long)memcpy(instr, which, \ + (unsigned long)which##_len)) +#define check_eip(which) (regs.eip == (unsigned long)instr + \ + (unsigned long)which##_len) + printf("%-40s", "Testing movq %mm3,(%ecx)..."); if ( stack_exec && cpu_has_mmx ) { - extern const unsigned char movq_to_mem[]; + decl_insn(movq_to_mem); asm volatile ( "pcmpeqb %%mm3, %%mm3\n" - ".pushsection .test, \"a\", @progbits\n" - "movq_to_mem: movq %%mm3, (%0)\n" - ".popsection" :: "c" (NULL) ); + put_insn(movq_to_mem, "movq %%mm3, (%0)") + :: "c" (NULL) ); - memcpy(instr, movq_to_mem, 15); + set_insn(movq_to_mem); memset(res, 0x33, 64); memset(res + 8, 0xff, 8); - regs.eip = (unsigned long)&instr[0]; regs.ecx = (unsigned long)res; rc = x86_emulate(&ctxt, &emulops); - if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) || + !check_eip(movq_to_mem) ) goto fail; printf("okay\n"); } @@ -623,19 +632,17 @@ printf("%-40s", "Testing movq (%edx),%mm5..."); if ( stack_exec && cpu_has_mmx ) { - extern const unsigned char movq_from_mem[]; + decl_insn(movq_from_mem); asm volatile ( "pcmpgtb %%mm5, %%mm5\n" - ".pushsection .test, \"a\", @progbits\n" - "movq_from_mem: movq (%0), %%mm5\n" - ".popsection" :: "d" (NULL) ); + put_insn(movq_from_mem, "movq (%0), %%mm5") + :: "d" (NULL) ); - memcpy(instr, movq_from_mem, 15); - regs.eip = (unsigned long)&instr[0]; + set_insn(movq_from_mem); regs.ecx = 0; regs.edx = (unsigned long)res; rc = x86_emulate(&ctxt, &emulops); - if ( rc != X86EMUL_OKAY ) + if ( rc != X86EMUL_OKAY || !check_eip(movq_from_mem) ) goto fail; asm ( "pcmpeqb %%mm3, %%mm3\n\t" "pcmpeqb %%mm5, %%mm3\n\t" @@ -650,20 +657,19 @@ printf("%-40s", "Testing movdqu %xmm2,(%ecx)..."); if ( stack_exec && cpu_has_sse2 ) { - extern const unsigned char movdqu_to_mem[]; + decl_insn(movdqu_to_mem); asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n" - ".pushsection .test, \"a\", @progbits\n" - "movdqu_to_mem: movdqu %%xmm2, (%0)\n" - ".popsection" :: "c" (NULL) ); + put_insn(movdqu_to_mem, "movdqu %%xmm2, (%0)") + :: "c" (NULL) ); - memcpy(instr, movdqu_to_mem, 15); + set_insn(movdqu_to_mem); memset(res, 0x55, 64); memset(res + 8, 0xff, 16); - regs.eip = (unsigned long)&instr[0]; regs.ecx = (unsigned long)res; rc = x86_emulate(&ctxt, &emulops); - if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) || + !check_eip(movdqu_to_mem) ) goto fail; printf("okay\n"); } @@ -673,19 +679,17 @@ printf("%-40s", "Testing movdqu (%edx),%xmm4..."); if ( stack_exec && cpu_has_sse2 ) { - extern const unsigned char movdqu_from_mem[]; + decl_insn(movdqu_from_mem); asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n" - ".pushsection .test, \"a\", @progbits\n" - "movdqu_from_mem: movdqu (%0), %%xmm4\n" - ".popsection" :: "d" (NULL) ); + put_insn(movdqu_from_mem, "movdqu (%0), %%xmm4") + :: "d" (NULL) ); - memcpy(instr, movdqu_from_mem, 15); - regs.eip = (unsigned long)&instr[0]; + set_insn(movdqu_from_mem); regs.ecx = 0; regs.edx = (unsigned long)res; rc = x86_emulate(&ctxt, &emulops); - if ( rc != X86EMUL_OKAY ) + if ( rc != X86EMUL_OKAY || !check_eip(movdqu_from_mem) ) goto fail; asm ( "pcmpeqb %%xmm2, %%xmm2\n\t" "pcmpeqb %%xmm4, %%xmm2\n\t" @@ -700,21 +704,20 @@ printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)..."); if ( stack_exec && cpu_has_avx ) { - extern const unsigned char vmovdqu_to_mem[]; + decl_insn(vmovdqu_to_mem); asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n" - ".pushsection .test, \"a\", @progbits\n" - "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n" - ".popsection" :: "c" (NULL) ); + put_insn(vmovdqu_to_mem, "vmovdqu %%ymm2, (%0)") + :: "c" (NULL) ); - memcpy(instr, vmovdqu_to_mem, 15); + set_insn(vmovdqu_to_mem); memset(res, 0x55, 128); memset(res + 16, 0xff, 16); memset(res + 20, 0x00, 16); - regs.eip = (unsigned long)&instr[0]; regs.ecx = (unsigned long)res; rc = x86_emulate(&ctxt, &emulops); - if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ) + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) || + !check_eip(vmovdqu_to_mem) ) goto fail; printf("okay\n"); } @@ -724,7 +727,7 @@ printf("%-40s", "Testing vmovdqu (%edx),%ymm4..."); if ( stack_exec && cpu_has_avx ) { - extern const unsigned char vmovdqu_from_mem[]; + decl_insn(vmovdqu_from_mem); #if 0 /* Don't use AVX2 instructions for now */ asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n" @@ -732,17 +735,15 @@ asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t" "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n" #endif - ".pushsection .test, \"a\", @progbits\n" - "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n" - ".popsection" :: "d" (NULL) ); + put_insn(vmovdqu_from_mem, "vmovdqu (%0), %%ymm4") + :: "d" (NULL) ); - memcpy(instr, vmovdqu_from_mem, 15); + set_insn(vmovdqu_from_mem); memset(res + 4, 0xff, 16); - regs.eip = (unsigned long)&instr[0]; regs.ecx = 0; regs.edx = (unsigned long)res; rc = x86_emulate(&ctxt, &emulops); - if ( rc != X86EMUL_OKAY ) + if ( rc != X86EMUL_OKAY || !check_eip(vmovdqu_from_mem) ) goto fail; #if 0 /* Don't use AVX2 instructions for now */ asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" @@ -769,20 +770,19 @@ memset(res + 10, 0x66, 8); if ( stack_exec && cpu_has_sse2 ) { - extern const unsigned char movsd_to_mem[]; + decl_insn(movsd_to_mem); asm volatile ( "movlpd %0, %%xmm5\n\t" "movhpd %0, %%xmm5\n" - ".pushsection .test, \"a\", @progbits\n" - "movsd_to_mem: movsd %%xmm5, (%1)\n" - ".popsection" :: "m" (res[10]), "c" (NULL) ); + put_insn(movsd_to_mem, "movsd %%xmm5, (%1)") + :: "m" (res[10]), "c" (NULL) ); - memcpy(instr, movsd_to_mem, 15); - regs.eip = (unsigned long)&instr[0]; + set_insn(movsd_to_mem); regs.ecx = (unsigned long)(res + 2); regs.edx = 0; rc = x86_emulate(&ctxt, &emulops); - if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) || + !check_eip(movsd_to_mem) ) goto fail; printf("okay\n"); } @@ -795,19 +795,17 @@ printf("%-40s", "Testing movaps (%edx),%xmm7..."); if ( stack_exec && cpu_has_sse ) { - extern const unsigned char movaps_from_mem[]; + decl_insn(movaps_from_mem); asm volatile ( "xorps %%xmm7, %%xmm7\n" - ".pushsection .test, \"a\", @progbits\n" - "movaps_from_mem: movaps (%0), %%xmm7\n" - ".popsection" :: "d" (NULL) ); + put_insn(movaps_from_mem, "movaps (%0), %%xmm7") + :: "d" (NULL) ); - memcpy(instr, movaps_from_mem, 15); - regs.eip = (unsigned long)&instr[0]; + set_insn(movaps_from_mem); regs.ecx = 0; regs.edx = (unsigned long)res; rc = x86_emulate(&ctxt, &emulops); - if ( rc != X86EMUL_OKAY ) + if ( rc != X86EMUL_OKAY || !check_eip(movaps_from_mem) ) goto fail; asm ( "cmpeqps %1, %%xmm7\n\t" "movmskps %%xmm7, %0" : "=r" (rc) : "m" (res[8]) ); @@ -823,19 +821,18 @@ memset(res + 10, 0x77, 8); if ( stack_exec && cpu_has_avx ) { - extern const unsigned char vmovsd_to_mem[]; + decl_insn(vmovsd_to_mem); asm volatile ( "vbroadcastsd %0, %%ymm5\n" - ".pushsection .test, \"a\", @progbits\n" - "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n" - ".popsection" :: "m" (res[10]), "c" (NULL) ); + put_insn(vmovsd_to_mem, "vmovsd %%xmm5, (%1)") + :: "m" (res[10]), "c" (NULL) ); - memcpy(instr, vmovsd_to_mem, 15); - regs.eip = (unsigned long)&instr[0]; + set_insn(vmovsd_to_mem); regs.ecx = (unsigned long)(res + 2); regs.edx = 0; rc = x86_emulate(&ctxt, &emulops); - if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) || + !check_eip(vmovsd_to_mem) ) goto fail; printf("okay\n"); } @@ -848,19 +845,17 @@ printf("%-40s", "Testing vmovaps (%edx),%ymm7..."); if ( stack_exec && cpu_has_avx ) { - extern const unsigned char vmovaps_from_mem[]; + decl_insn(vmovaps_from_mem); asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n" - ".pushsection .test, \"a\", @progbits\n" - "vmovaps_from_mem: vmovaps (%0), %%ymm7\n" - ".popsection" :: "d" (NULL) ); + put_insn(vmovaps_from_mem, "vmovaps (%0), %%ymm7") + :: "d" (NULL) ); - memcpy(instr, vmovaps_from_mem, 15); - regs.eip = (unsigned long)&instr[0]; + set_insn(vmovaps_from_mem); regs.ecx = 0; regs.edx = (unsigned long)res; rc = x86_emulate(&ctxt, &emulops); - if ( rc != X86EMUL_OKAY ) + if ( rc != X86EMUL_OKAY || !check_eip(vmovaps_from_mem) ) goto fail; asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t" "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) ); @@ -871,6 +866,11 @@ else printf("skipped\n"); +#undef decl_insn +#undef put_insn +#undef set_insn +#undef check_eip + for ( j = 1; j <= 2; j++ ) { #if defined(__i386__) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xen-4.4.1-testing/xen/Makefile new/xen-4.4.1-testing/xen/Makefile --- old/xen-4.4.1-testing/xen/Makefile 2014-08-07 18:29:51.000000000 +0200 +++ new/xen-4.4.1-testing/xen/Makefile 2014-09-02 16:44:07.000000000 +0200 @@ -2,7 +2,7 @@ # All other places this is stored (eg. compile.h) should be autogenerated. export XEN_VERSION = 4 export XEN_SUBVERSION = 4 -export XEN_EXTRAVERSION ?= .1-rc2$(XEN_VENDORVERSION) +export XEN_EXTRAVERSION ?= .1$(XEN_VENDORVERSION) export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -include xen-version diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xen-4.4.1-testing/xen/arch/arm/traps.c new/xen-4.4.1-testing/xen/arch/arm/traps.c --- old/xen-4.4.1-testing/xen/arch/arm/traps.c 2014-08-07 18:29:51.000000000 +0200 +++ new/xen-4.4.1-testing/xen/arch/arm/traps.c 2014-09-02 16:44:07.000000000 +0200 @@ -286,7 +286,7 @@ regs->cpsr |= PSR_BIG_ENDIAN; } -static vaddr_t exception_handler(vaddr_t offset) +static vaddr_t exception_handler32(vaddr_t offset) { uint32_t sctlr = READ_SYSREG32(SCTLR_EL1); @@ -318,7 +318,7 @@ regs->lr_und = regs->pc32 + return_offset; /* Branch to exception vector */ - regs->pc32 = exception_handler(VECTOR32_UND); + regs->pc32 = exception_handler32(VECTOR32_UND); } /* Injects an Abort exception into the current vcpu, PC is the exact @@ -344,7 +344,7 @@ regs->spsr_abt = spsr; regs->lr_abt = regs->pc32 + return_offset; - regs->pc32 = exception_handler(prefetch ? VECTOR32_PABT : VECTOR32_DABT); + regs->pc32 = exception_handler32(prefetch ? VECTOR32_PABT : VECTOR32_DABT); /* Inject a debug fault, best we can do right now */ if ( READ_SYSREG(TCR_EL1) & TTBCR_EAE ) @@ -397,9 +397,28 @@ } #ifdef CONFIG_ARM_64 +/* + * Take care to call this while regs contains the original faulting + * state and not the (partially constructed) exception state. + */ +static vaddr_t exception_handler64(struct cpu_user_regs *regs, vaddr_t offset) +{ + vaddr_t base = READ_SYSREG(VBAR_EL1); + + if ( usr_mode(regs) ) + base += VECTOR64_LOWER32_BASE; + else if ( psr_mode(regs->cpsr,PSR_MODE_EL0t) ) + base += VECTOR64_LOWER64_BASE; + else /* Otherwise must be from kernel mode */ + base += VECTOR64_CURRENT_SPx_BASE; + + return base + offset; +} + /* Inject an undefined exception into a 64 bit guest */ static void inject_undef64_exception(struct cpu_user_regs *regs, int instr_len) { + vaddr_t handler; union hsr esr = { .iss = 0, .len = instr_len, @@ -408,12 +427,14 @@ BUG_ON( is_pv32_domain(current->domain) ); + handler = exception_handler64(regs, VECTOR64_SYNC_OFFSET); + regs->spsr_el1 = regs->cpsr; regs->elr_el1 = regs->pc; regs->cpsr = PSR_MODE_EL1h | PSR_ABT_MASK | PSR_FIQ_MASK | \ PSR_IRQ_MASK | PSR_DBG_MASK; - regs->pc = READ_SYSREG(VBAR_EL1) + VECTOR64_CURRENT_SPx_SYNC; + regs->pc = handler; WRITE_SYSREG32(esr.bits, ESR_EL1); } @@ -424,6 +445,7 @@ register_t addr, int instr_len) { + vaddr_t handler; union hsr esr = { .iss = 0, .len = instr_len, @@ -445,12 +467,14 @@ BUG_ON( is_pv32_domain(current->domain) ); + handler = exception_handler64(regs, VECTOR64_SYNC_OFFSET); + regs->spsr_el1 = regs->cpsr; regs->elr_el1 = regs->pc; regs->cpsr = PSR_MODE_EL1h | PSR_ABT_MASK | PSR_FIQ_MASK | \ PSR_IRQ_MASK | PSR_DBG_MASK; - regs->pc = READ_SYSREG(VBAR_EL1) + VECTOR64_CURRENT_SPx_SYNC; + regs->pc = handler; WRITE_SYSREG(addr, FAR_EL1); WRITE_SYSREG32(esr.bits, ESR_EL1); @@ -472,6 +496,17 @@ #endif +static void inject_undef_exception(struct cpu_user_regs *regs, + int instr_len) +{ + if ( is_pv32_domain(current->domain) ) + inject_undef32_exception(regs); +#ifdef CONFIG_ARM_64 + else + inject_undef64_exception(regs, instr_len); +#endif +} + static void inject_iabt_exception(struct cpu_user_regs *regs, register_t addr, int instr_len) @@ -697,7 +732,17 @@ show_registers_32(regs, ctxt, guest_mode, v); #ifdef CONFIG_ARM_64 else if ( is_pv64_domain(v->domain) ) - show_registers_64(regs, ctxt, guest_mode, v); + { + if ( psr_mode_is_32bit(regs->cpsr) ) + { + BUG_ON(!usr_mode(regs)); + show_registers_32(regs, ctxt, guest_mode, v); + } + else + { + show_registers_64(regs, ctxt, guest_mode, v); + } + } #endif } else @@ -1430,7 +1475,7 @@ gdprintk(XENLOG_ERR, "unhandled 32-bit CP15 access %#x\n", hsr.bits & HSR_CP32_REGS_MASK); #endif - inject_undef32_exception(regs); + inject_undef_exception(regs, hsr.len); return; } advance_pc(regs, hsr); @@ -1467,7 +1512,7 @@ gdprintk(XENLOG_ERR, "unhandled 64-bit CP15 access %#x\n", hsr.bits & HSR_CP64_REGS_MASK); #endif - inject_undef32_exception(regs); + inject_undef_exception(regs, hsr.len); return; } } @@ -1536,7 +1581,7 @@ gdprintk(XENLOG_ERR, "unhandled 32-bit cp14 access %#x\n", hsr.bits & HSR_CP32_REGS_MASK); #endif - inject_undef32_exception(regs); + inject_undef_exception(regs, hsr.len); return; } @@ -1551,7 +1596,7 @@ return; } - inject_undef32_exception(regs); + inject_undef_exception(regs, hsr.len); } static void do_cp(struct cpu_user_regs *regs, union hsr hsr) @@ -1562,7 +1607,7 @@ return; } - inject_undef32_exception(regs); + inject_undef_exception(regs, hsr.len); } #ifdef CONFIG_ARM_64 @@ -1637,7 +1682,8 @@ gdprintk(XENLOG_ERR, "unhandled 64-bit sysreg access %#x\n", hsr.bits & HSR_SYSREG_REGS_MASK); #endif - inject_undef64_exception(regs, sysreg.len); + inject_undef_exception(regs, sysreg.len); + return; } } @@ -1767,6 +1813,17 @@ { union hsr hsr = { .bits = READ_SYSREG32(ESR_EL2) }; + /* + * We currently do not handle 32-bit userspace on 64-bit kernels + * correctly (See XSA-102). Until that is resolved we treat any + * trap from 32-bit userspace on 64-bit kernel as undefined. + */ + if ( is_pv64_domain(current->domain) && psr_mode_is_32bit(regs->cpsr) ) + { + inject_undef_exception(regs, hsr.len); + return; + } + switch (hsr.ec) { case HSR_EC_WFI_WFE: if ( !check_conditional_instr(regs, hsr) ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xen-4.4.1-testing/xen/arch/x86/cpu/common.c new/xen-4.4.1-testing/xen/arch/x86/cpu/common.c --- old/xen-4.4.1-testing/xen/arch/x86/cpu/common.c 2014-08-07 18:29:51.000000000 +0200 +++ new/xen-4.4.1-testing/xen/arch/x86/cpu/common.c 2014-09-02 16:44:07.000000000 +0200 @@ -234,6 +234,9 @@ paddr_bits = cpuid_eax(0x80000008) & 0xff; } + /* Might lift BIOS max_leaf=3 limit. */ + early_intel_workaround(c); + /* Intel-defined flags: level 0x00000007 */ if ( c->cpuid_level >= 0x00000007 ) { u32 dummy; @@ -241,8 +244,6 @@ c->x86_capability[X86_FEATURE_FSGSBASE / 32] = ebx; } - early_intel_workaround(c); - #ifdef CONFIG_X86_HT c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; #endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xen-4.4.1-testing/xen/arch/x86/x86_emulate/x86_emulate.c new/xen-4.4.1-testing/xen/arch/x86/x86_emulate/x86_emulate.c --- old/xen-4.4.1-testing/xen/arch/x86/x86_emulate/x86_emulate.c 2014-08-07 18:29:51.000000000 +0200 +++ new/xen-4.4.1-testing/xen/arch/x86/x86_emulate/x86_emulate.c 2014-09-02 16:44:07.000000000 +0200 @@ -720,29 +720,26 @@ put_fpu(&fic); \ } while (0) -static unsigned long __get_rep_prefix( - struct cpu_user_regs *int_regs, - struct cpu_user_regs *ext_regs, +static unsigned long _get_rep_prefix( + const struct cpu_user_regs *int_regs, int ad_bytes) { - unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx : - (ad_bytes == 4) ? (uint32_t)int_regs->ecx : - int_regs->ecx); - - /* Skip the instruction if no repetitions are required. */ - if ( ecx == 0 ) - ext_regs->eip = int_regs->eip; - - return ecx; + return (ad_bytes == 2) ? (uint16_t)int_regs->ecx : + (ad_bytes == 4) ? (uint32_t)int_regs->ecx : + int_regs->ecx; } #define get_rep_prefix() ({ \ unsigned long max_reps = 1; \ if ( rep_prefix() ) \ - max_reps = __get_rep_prefix(&_regs, ctxt->regs, ad_bytes); \ + max_reps = _get_rep_prefix(&_regs, ad_bytes); \ if ( max_reps == 0 ) \ - goto done; \ - max_reps; \ + { \ + /* Skip the instruction if no repetitions are required. */ \ + dst.type = OP_NONE; \ + goto writeback; \ + } \ + max_reps; \ }) static void __put_rep_prefix( @@ -3921,7 +3918,8 @@ if ( !rc && (b & 1) && (ea.type == OP_MEM) ) rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp, ea.bytes, ctxt); - goto done; + dst.type = OP_NONE; + break; } case 0x20: /* mov cr,reg */ @@ -4188,7 +4186,8 @@ if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) ) rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp, ea.bytes, ctxt); - goto done; + dst.type = OP_NONE; + break; } case 0x80 ... 0x8f: /* jcc (near) */ { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xen-4.4.1-testing/xen/common/lz4/decompress.c new/xen-4.4.1-testing/xen/common/lz4/decompress.c --- old/xen-4.4.1-testing/xen/common/lz4/decompress.c 2014-08-07 18:29:51.000000000 +0200 +++ new/xen-4.4.1-testing/xen/common/lz4/decompress.c 2014-09-02 16:44:07.000000000 +0200 @@ -84,6 +84,8 @@ ip += length; break; /* EOF */ } + if (unlikely((unsigned long)cpy < (unsigned long)op)) + goto _output_error; LZ4_WILDCOPY(ip, op, cpy); ip -= (op - cpy); op = cpy; @@ -142,6 +144,8 @@ goto _output_error; continue; } + if (unlikely((unsigned long)cpy < (unsigned long)op)) + goto _output_error; LZ4_SECURECOPY(ref, op, cpy); op = cpy; /* correction */ } @@ -207,6 +211,8 @@ op += length; break;/* Necessarily EOF, due to parsing restrictions */ } + if (unlikely((unsigned long)cpy < (unsigned long)op)) + goto _output_error; LZ4_WILDCOPY(ip, op, cpy); ip -= (op - cpy); op = cpy; @@ -270,6 +276,8 @@ goto _output_error; continue; } + if (unlikely((unsigned long)cpy < (unsigned long)op)) + goto _output_error; LZ4_SECURECOPY(ref, op, cpy); op = cpy; /* correction */ } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xen-4.4.1-testing/xen/include/asm-arm/processor.h new/xen-4.4.1-testing/xen/include/asm-arm/processor.h --- old/xen-4.4.1-testing/xen/include/asm-arm/processor.h 2014-08-07 18:29:51.000000000 +0200 +++ new/xen-4.4.1-testing/xen/include/asm-arm/processor.h 2014-09-02 16:44:07.000000000 +0200 @@ -432,14 +432,16 @@ #define VECTOR32_PABT 12 #define VECTOR32_DABT 16 /* ... ARM64 */ -#define VECTOR64_CURRENT_SP0_SYNC 0x000 -#define VECTOR64_CURRENT_SP0_IRQ 0x080 -#define VECTOR64_CURRENT_SP0_FIQ 0x100 -#define VECTOR64_CURRENT_SP0_ERROR 0x180 -#define VECTOR64_CURRENT_SPx_SYNC 0x200 -#define VECTOR64_CURRENT_SPx_IRQ 0x280 -#define VECTOR64_CURRENT_SPx_FIQ 0x300 -#define VECTOR64_CURRENT_SPx_ERROR 0x380 +#define VECTOR64_CURRENT_SP0_BASE 0x000 +#define VECTOR64_CURRENT_SPx_BASE 0x200 +#define VECTOR64_LOWER64_BASE 0x400 +#define VECTOR64_LOWER32_BASE 0x600 + +#define VECTOR64_SYNC_OFFSET 0x000 +#define VECTOR64_IRQ_OFFSET 0x080 +#define VECTOR64_FIQ_OFFSET 0x100 +#define VECTOR64_ERROR_OFFSET 0x180 + #if defined(CONFIG_ARM_32) # include <asm/arm32/processor.h> -- To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org For additional commands, e-mail: opensuse-commit+help@opensuse.org