Mailinglist Archive: opensuse-commit (1973 mails)

< Previous Next >
commit xen.1481 for openSUSE:12.3:Update
Hello community,

here is the log from the commit of package xen.1481 for openSUSE:12.3:Update
checked in at 2013-04-02 16:25:34
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:12.3:Update/xen.1481 (Old)
and /work/SRC/openSUSE:12.3:Update/.xen.1481.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "xen.1481", Maintainer is ""

Changes:
--------
New Changes file:

--- /dev/null 2013-02-26 18:15:11.936010755 +0100
+++ /work/SRC/openSUSE:12.3:Update/.xen.1481.new/xen.changes 2013-04-02
16:25:40.000000000 +0200
@@ -0,0 +1,7010 @@
+-------------------------------------------------------------------
+Thu Mar 14 09:58:38 MDT 2013 - jfehlig@xxxxxxxx
+
+- Load blktap module in xencommons init script. blktap2 doesn't
+ support qcow2, so blktap is needed to support domains with
+ 'tap:qcow2' disk configurations.
+ modified tmp-initscript-modprobe.patch
+
+-------------------------------------------------------------------
+Thu Mar 14 07:32:17 MDT 2013 - carnold@xxxxxxxx
+
+- bnc#809203 - xen.efi isn't signed with SUSE Secure Boot key
+ xen.spec
+
+-------------------------------------------------------------------
+Mon Mar 11 21:07:21 MDT 2013 - jfehlig@xxxxxxxx
+
+- Fix adding managed PCI device to an inactive domain
+ modified xen-managed-pci-device.patch
+
+-------------------------------------------------------------------
+Mon Mar 11 11:45:22 MDT 2013 - jfehlig@xxxxxxxx
+
+- bnc#805094 - xen hot plug attach/detach fails
+ modified blktap-pv-cdrom.patch
+
+-------------------------------------------------------------------
+Mon Mar 11 11:17:57 MDT 2013 - jfehlig@xxxxxxxx
+
+- bnc# 802690 - domain locking can prevent a live migration from
+ completing
+ modified xend-domain-lock.patch
+
+-------------------------------------------------------------------
+Fri Mar 8 15:01:15 CET 2013 - ohering@xxxxxxx
+
+- bnc#797014 - no way to control live migrations
+ 26675-tools-xentoollog_update_tty_detection_in_stdiostream_progress.patch
+ xen.migrate.tools-xc_print_messages_from_xc_save_with_xc_report.patch
+ xen.migrate.tools-xc_document_printf_calls_in_xc_restore.patch
+ xen.migrate.tools-xc_rework_xc_save.cswitch_qemu_logdirty.patch
+ xen.migrate.tools_set_migration_constraints_from_cmdline.patch
+ xen.migrate.tools_add_xm_migrate_--log_progress_option.patch
+
+-------------------------------------------------------------------
+Thu Mar 7 14:39:57 MST 2013 - carnold@xxxxxxxx
+
+- Upstream patches from Jan
+ 26585-x86-mm-Take-the-p2m-lock-even-in-shadow-mode.patch
+ 26595-x86-nhvm-properly-clean-up-after-failure-to-set-up-all-vCPU-s.patch
+ 26601-honor-ACPI-v4-FADT-flags.patch
+ 26656-x86-fix-null-pointer-dereference-in-intel_get_extended_msrs.patch
+ 26659-AMD-IOMMU-erratum-746-workaround.patch
+ 26660-x86-fix-CMCI-injection.patch
+ 26672-vmx-fix-handling-of-NMI-VMEXIT.patch
+ 26673-Avoid-stale-pointer-when-moving-domain-to-another-cpupool.patch
+ 26676-fix-compat-memory-exchange-op-splitting.patch
+ 26677-x86-make-certain-memory-sub-ops-return-valid-values.patch
+ 26678-SEDF-avoid-gathering-vCPU-s-on-pCPU0.patch
+ 26679-x86-defer-processing-events-on-the-NMI-exit-path.patch
+ 26683-credit1-Use-atomic-bit-operations-for-the-flags-structure.patch
+ 26689-fix-domain-unlocking-in-some-xsm-error-paths.patch
+
+-------------------------------------------------------------------
+Tue Mar 5 13:35:40 MST 2013 - carnold@xxxxxxxx
+
+- fate#313584: pass bios information to XEN HVM guest
+ xend-hvm-firmware-passthrough.patch
+
+-------------------------------------------------------------------
+Mon Mar 4 20:28:29 CET 2013 - ohering@xxxxxxx
+
+- bnc#806736: enabling xentrace crashes hypervisor
+ 26686-xentrace_fix_off-by-one_in_calculate_tbuf_size.patch
+
+-------------------------------------------------------------------
+Thu Feb 28 11:12:04 CET 2013 - ohering@xxxxxxx
+
+- update xenalyze to revision 149
+ Make eip_list output more useful
+ Use correct length when copying record into buffer
+ decode PV_HYPERCALL_SUBCALL events
+ decode PV_HYPERCALL_V2 records
+ Analyze populate-on-demand reclamation patterns
+ Handle 64-bit MMIO
+ Also strip write bit when processing a generic event
+ Make the warnigns in hvm_generic_postprocess more informative
+ Don't warn about switching paging levels unless verbosity>=6
+ Process NPFs as generic for summary purposes
+ Add HVM_EVENT_VLAPIC
+
+-------------------------------------------------------------------
+Wed Feb 20 15:00:13 MST 2013 - jfehlig@xxxxxxxx
+
+- Add upstream patch to fix vfb/vkb initialization in libxl
+ 26369-libxl-devid.patch
+
+-------------------------------------------------------------------
+Tue Feb 19 14:35:07 MST 2013 - carnold@xxxxxxxx
+
+- fate##313584: pass bios information to XEN HVM guest
+ 26554-hvm-firmware-passthrough.patch
+ 26555-hvm-firmware-passthrough.patch
+ 26556-hvm-firmware-passthrough.patch
+
+-------------------------------------------------------------------
+Tue Feb 19 10:46:46 MST 2013 - carnold@xxxxxxxx
+
+- Upstream patches from Jan
+ 26516-ACPI-parse-table-retval.patch (Replaces CVE-2013-0153-xsa36.patch)
+ 26517-AMD-IOMMU-clear-irtes.patch (Replaces CVE-2013-0153-xsa36.patch)
+ 26518-AMD-IOMMU-disable-if-SATA-combined-mode.patch (Replaces
CVE-2013-0153-xsa36.patch)
+ 26519-AMD-IOMMU-perdev-intremap-default.patch (Replaces
CVE-2013-0153-xsa36.patch)
+ 26526-pvdrv-no-devinit.patch
+ 26529-gcc48-build-fix.patch
+ 26531-AMD-IOMMU-IVHD-special-missing.patch (Replaces
CVE-2013-0153-xsa36.patch)
+ 26532-AMD-IOMMU-phantom-MSI.patch
+ 26536-xenoprof-div-by-0.patch
+ 26576-x86-APICV-migration.patch
+ 26577-x86-APICV-x2APIC.patch
+ 26578-AMD-IOMMU-replace-BUG_ON.patch
+
+-------------------------------------------------------------------
+Mon Feb 18 17:28:00 CET 2013 - ohering@xxxxxxx
+
+- bnc#797014 - no way to control live migrations
+ 26547-tools-xc_fix_logic_error_in_stdiostream_progress.patch
+ 26548-tools-xc_handle_tty_output_differently_in_stdiostream_progress.patch
+ 26549-tools-xc_turn_XCFLAGS_*_into_shifts.patch
+ 26550-tools-xc_restore_logging_in_xc_save.patch
+ 26551-tools-xc_log_pid_in_xc_save-xc_restore_output.patch
+
+-------------------------------------------------------------------
+Mon Feb 11 14:35:06 UTC 2013 - mmarek@xxxxxxx
+
+- Set $BRP_PESIGN_FILES in the %install section so that modules
+ are signed in the buildservice (fate#314552).
+
+-------------------------------------------------------------------
+Mon Feb 11 15:33:24 CET 2013 - ohering@xxxxxxx
+
+- PVonHVM: __devinit was removed in linux-3.8
+
+-------------------------------------------------------------------
+Wed Feb 6 09:01:29 MST 2013 - jfehlig@xxxxxxxx
+
+- Add 'managed' PCI passthrough support to xend, allowing support
+ for the same through libvirt
+ xen-managed-pci-device.patch
+ FATE#313570
+
+-------------------------------------------------------------------
+Tue Feb 5 11:50:14 MST 2013 - carnold@xxxxxxxx
+
+- Upstream patches from Jan
+ 26287-sched-credit-pick-idle.patch
+ 26340-VT-d-intremap-verify-legacy-bridge.patch (Replaces
CVE-2012-5634-xsa33.patch)
+ 26370-libxc-x86-initial-mapping-fit.patch
+ 26395-x86-FPU-context-conditional.patch
+ 26404-x86-forward-both-NMI-kinds.patch
+ 26418-x86-trampoline-consider-multiboot.patch
+ 26427-x86-AMD-enable-WC+.patch
+ 26428-x86-HVM-RTC-update.patch
+ 26440-x86-forward-SERR.patch
+ 26443-ACPI-zap-DMAR.patch
+ 26444-x86-nHVM-no-self-enable.patch (Replaces CVE-2013-0152-xsa35.patch)
+ 26501-VMX-simplify-CR0-update.patch
+ 26502-VMX-disable-SMEP-when-not-paging.patch
+
+-------------------------------------------------------------------
+Fri Feb 1 08:30:28 MST 2013 - carnold@xxxxxxxx
+
+- bnc#800275 - VUL-0: XSA-36: CVE-2013-0153: xen: interrupt remap
+ entries shared and old ones not cleared on AMD IOMMUs
+ CVE-2013-0153-xsa36.patch
+
+-------------------------------------------------------------------
+Wed Jan 30 15:14:41 UTC 2013 - mmarek@xxxxxxx
+
+- Add # needssslcertforbuild to the specfile, to make the UEFI
+ signing certificate available during build (fate#314511, fate#314552).
+
+-------------------------------------------------------------------
+Fri Jan 25 14:57:30 MST 2013 - jfehlig@xxxxxxxx
+
+- bnc#798188 - Add $network to xend initscript dependencies
+
+-------------------------------------------------------------------
+Thu Jan 24 15:57:12 MST 2013 - jfehlig@xxxxxxxx
+
+- Add upstream patches to fix libxl bugs. These patches have
+ already been posted for inclusion in xen-4.2-testing.
+ 25912-partial-libxl.patch
+ 26372-tools-paths.patch
+ 26468-libxl-race.patch
+ 26469-libxl-race.patch
+
++++ 6813 more lines (skipped)
++++ between /dev/null
++++ and /work/SRC/openSUSE:12.3:Update/.xen.1481.new/xen.changes

New:
----
25861-x86-early-fixmap.patch
25862-sercon-non-com.patch
25863-sercon-ehci-dbgp.patch
25864-sercon-unused.patch
25866-sercon-ns16550-pci-irq.patch
25867-sercon-ns16550-parse.patch
25874-x86-EFI-chain-cfg.patch
25909-xenpm-consistent.patch
25912-partial-libxl.patch
25920-x86-APICV-enable.patch
25921-x86-APICV-delivery.patch
25922-x86-APICV-x2APIC.patch
25952-x86-MMIO-remap-permissions.patch
25957-x86-TSC-adjust-HVM.patch
25958-x86-TSC-adjust-sr.patch
25959-x86-TSC-adjust-expose.patch
25975-x86-IvyBridge.patch
26062-ACPI-ERST-move-data.patch
26077-stubdom_fix_compile_errors_in_grub.patch
26078-hotplug-Linux_remove_hotplug_support_rely_on_udev_instead.patch
26079-hotplug-Linux_close_lockfd_after_lock_attempt.patch
26081-stubdom_fix_rpmlint_warning_spurious-executable-perm.patch
26082-blktap2-libvhd_fix_rpmlint_warning_spurious-executable-perm.patch
26083-blktap_fix_rpmlint_warning_spurious-executable-perm.patch
26084-hotplug_install_hotplugpath.sh_as_data_file.patch
26085-stubdom_install_stubdompath.sh_as_data_file.patch
26086-hotplug-Linux_correct_sysconfig_tag_in_xendomains.patch
26087-hotplug-Linux_install_sysconfig_files_as_data_files.patch
26114-pygrub-list-entries.patch
26129-ACPI-BGRT-invalidate.patch
26133-IOMMU-defer-BM-disable.patch
26183-x86-HPET-masking.patch
26189-xenstore-chmod.patch
26200-IOMMU-debug-verbose.patch
26235-IOMMU-ATS-max-queue-depth.patch
26252-VMX-nested-rflags.patch
26253-VMX-nested-rdtsc.patch
26254-VMX-nested-dr.patch
26255-VMX-nested-ia32e-mode.patch
26258-VMX-nested-intr-delivery.patch
26262-x86-EFI-secure-shim.patch
26266-sched-ratelimit-check.patch
26287-sched-credit-pick-idle.patch
26294-x86-AMD-Fam15-way-access-filter.patch
26320-IOMMU-domctl-assign-seg.patch
26324-IOMMU-assign-params.patch
26325-IOMMU-add-remove-params.patch
26326-VT-d-context-map-params.patch
26327-AMD-IOMMU-flush-params.patch
26328-IOMMU-pdev-type.patch
26329-IOMMU-phantom-dev.patch
26330-VT-d-phantom-MSI.patch
26331-IOMMU-phantom-dev-quirk.patch
26332-x86-compat-show-guest-stack-mfn.patch
26333-x86-get_page_type-assert.patch
26340-VT-d-intremap-verify-legacy-bridge.patch
26341-hvm-firmware-passthrough.patch
26342-hvm-firmware-passthrough.patch
26343-hvm-firmware-passthrough.patch
26344-hvm-firmware-passthrough.patch
26369-libxl-devid.patch
26370-libxc-x86-initial-mapping-fit.patch
26372-tools-paths.patch
26395-x86-FPU-context-conditional.patch
26404-x86-forward-both-NMI-kinds.patch
26418-x86-trampoline-consider-multiboot.patch
26427-x86-AMD-enable-WC+.patch
26428-x86-HVM-RTC-update.patch
26440-x86-forward-SERR.patch
26443-ACPI-zap-DMAR.patch
26444-x86-nHVM-no-self-enable.patch
26468-libxl-race.patch
26469-libxl-race.patch
26501-VMX-simplify-CR0-update.patch
26502-VMX-disable-SMEP-when-not-paging.patch
26516-ACPI-parse-table-retval.patch
26517-AMD-IOMMU-clear-irtes.patch
26518-AMD-IOMMU-disable-if-SATA-combined-mode.patch
26519-AMD-IOMMU-perdev-intremap-default.patch
26526-pvdrv-no-devinit.patch
26529-gcc48-build-fix.patch
26531-AMD-IOMMU-IVHD-special-missing.patch
26532-AMD-IOMMU-phantom-MSI.patch
26536-xenoprof-div-by-0.patch
26547-tools-xc_fix_logic_error_in_stdiostream_progress.patch
26548-tools-xc_handle_tty_output_differently_in_stdiostream_progress.patch
26549-tools-xc_turn_XCFLAGS__into_shifts.patch
26550-tools-xc_restore_logging_in_xc_save.patch
26551-tools-xc_log_pid_in_xc_save-xc_restore_output.patch
26554-hvm-firmware-passthrough.patch
26555-hvm-firmware-passthrough.patch
26556-hvm-firmware-passthrough.patch
26576-x86-APICV-migration.patch
26577-x86-APICV-x2APIC.patch
26578-AMD-IOMMU-replace-BUG_ON.patch
26585-x86-mm-Take-the-p2m-lock-even-in-shadow-mode.patch
26595-x86-nhvm-properly-clean-up-after-failure-to-set-up-all-vCPU-s.patch
26601-honor-ACPI-v4-FADT-flags.patch
26656-x86-fix-null-pointer-dereference-in-intel_get_extended_msrs.patch
26659-AMD-IOMMU-erratum-746-workaround.patch
26660-x86-fix-CMCI-injection.patch
26672-vmx-fix-handling-of-NMI-VMEXIT.patch
26673-Avoid-stale-pointer-when-moving-domain-to-another-cpupool.patch
26675-tools-xentoollog_update_tty_detection_in_stdiostream_progress.patch
26676-fix-compat-memory-exchange-op-splitting.patch
26677-x86-make-certain-memory-sub-ops-return-valid-values.patch
26678-SEDF-avoid-gathering-vCPU-s-on-pCPU0.patch
26679-x86-defer-processing-events-on-the-NMI-exit-path.patch
26683-credit1-Use-atomic-bit-operations-for-the-flags-structure.patch
26686-xentrace-fix-off-by-one-in-calculate_tbuf_size.patch
26689-fix-domain-unlocking-in-some-xsm-error-paths.patch
32on64-extra-mem.patch
CVE-2012-6075-xsa41.patch
CVE-2013-0151-xsa34.patch
README.SuSE
VNC-Support-for-ExtendedKeyEvent-client-message.patch
altgr_2.patch
baselibs.conf
bdrv_default_rwflag.patch
bdrv_open2_fix_flags.patch
bdrv_open2_flags_2.patch
blktap-close-fifos.patch
blktap-disable-debug-printf.patch
blktap-pv-cdrom.patch
blktap.patch
blktapctrl-default-to-ioemu.patch
block-dmmd
block-iscsi
block-nbd
block-npiv
block-npiv-common.sh
block-npiv-vport
boot.local.xenU
boot.xen
bridge-bonding.diff
bridge-opensuse.patch
bridge-record-creation.patch
bridge-vlan.diff
build-tapdisk-ioemu.patch
capslock_enable.patch
cdrom-removable.patch
change-vnc-passwd.patch
change_home_server.patch
check_device_status.patch
checkpoint-rename.patch
del_usb_xend_entry.patch
disable_emulated_device.diff
domUloader.py
domu-usb-controller.patch
etc_pam.d_xen-api
hibernate.patch
hv_extid_compatibility.patch
init.pciback
init.xen_loop
init.xend
init.xendomains
ioemu-7615-qcow2-fix-alloc_cluster_link_l2.patch
ioemu-bdrv-open-CACHE_WB.patch
ioemu-blktap-barriers.patch
ioemu-blktap-fv-init.patch
ioemu-blktap-image-format.patch
ioemu-blktap-zero-size.patch
ioemu-debuginfo.patch
ioemu-disable-emulated-ide-if-pv.patch
ioemu-disable-scsi.patch
ioemu-vnc-resize.patch
ioemu-watchdog-ib700-timer.patch
ioemu-watchdog-linkage.patch
ioemu-watchdog-support.patch
ipxe-enable-nics.patch
ipxe.tar.bz2
kernel-boot-hvm.patch
kmp_filelist
libxen_permissive.patch
log-guest-console.patch
logrotate.conf
magic_ioport_compat.patch
minios-fixups.patch
multi-xvdp.patch
network-nat-open-SuSEfirewall2-FORWARD.patch
pvdrv-import-shared-info.patch
pvdrv_emulation_control.patch
pygrub-netware-xnloader.patch
qemu-dm-segfault.patch
qemu-security-etch1.diff
qemu-xen-dir-remote.tar.bz2
qemu-xen-traditional-dir-remote.tar.bz2
seabios-dir-remote.tar.bz2
serial-split.patch
stdvga-cache.patch
stubdom.tar.bz2
supported_module.diff
suspend_evtchn_lock.patch
sysconfig.pciback
tapdisk-ioemu-logfile.patch
tapdisk-ioemu-shutdown-fix.patch
tmp-initscript-modprobe.patch
tmp_build.patch
tools-watchdog-support.patch
udev-rules.patch
usb-list.patch
vif-bridge-no-iptables.patch
vif-bridge-tap-fix.patch
vif-route-ifup.patch
x86-cpufreq-report.patch
x86-dom-print.patch
x86-extra-trap-info.patch
x86-ioapic-ack-default.patch
xen-4.2.1-testing-src.tar.bz2
xen-api-auth.patch
xen-changeset.diff
xen-cpupool-xl-config-format.patch
xen-destdir.diff
xen-disable-qemu-monitor.diff
xen-domUloader.diff
xen-fixme-doc.diff
xen-glibc217.patch
xen-hvm-default-bridge.diff
xen-hvm-default-pae.diff
xen-ioemu-hvm-pv-support.diff
xen-managed-pci-device.patch
xen-max-free-mem.diff
xen-migration-bridge-check.patch
xen-minimum-restart-time.patch
xen-no-dummy-nfs-ip.diff
xen-paths.diff
xen-qemu-iscsi-fix.patch
xen-updown.sh
xen-utils-0.1.tar.bz2
xen-xm-top-needs-root.diff
xen-xmexample-vti.diff
xen-xmexample.diff
xen.changes
xen.migrate.tools-xc_document_printf_calls_in_xc_restore.patch
xen.migrate.tools-xc_print_messages_from_xc_save_with_xc_report.patch
xen.migrate.tools-xc_rework_xc_save.cswitch_qemu_logdirty.patch
xen.migrate.tools_add_xm_migrate_--log_progress_option.patch
xen.migrate.tools_set_migration_constraints_from_cmdline.patch
xen.sles11sp1.fate311487.xen_platform_pci.dmistring.patch
xen.spec
xen_pvdrivers.conf
xenalyze.hg.tar.bz2
xenapi-console-protocol.patch
xenapiusers
xenconsole-no-multiple-connections.patch
xend-config-enable-dump-comment.patch
xend-config.diff
xend-console-port-restore.patch
xend-core-dump-loc.diff
xend-cpuid.patch
xend-devid-or-name.patch
xend-disable-internal-logrotate.patch
xend-domain-lock-sfex.patch
xend-domain-lock.patch
xend-hvm-firmware-passthrough.patch
xend-migration-domname-fix.patch
xend-relocation-server.fw
xend-relocation.sh
xend-sysconfig.patch
xend-vcpu-affinity-fix.patch
xenpaging.autostart.patch
xenpaging.doc.patch
xm-create-maxmem.patch
xm-create-xflag.patch
xm-save-check-file.patch
xmclone.sh
xmexample.disks
xmexample.domUloader
xnloader.py

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ xen.spec ++++++
++++ 1480 lines (skipped)

++++++ 25861-x86-early-fixmap.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1347371120 -7200
# Node ID 51c2d7c83cbc2a0357ce112a463f91d354dcdba9
# Parent e4cb8411161043c726f699252cc761e77853e820
x86: allow early use of fixmaps

As a prerequisite for adding an EHCI debug port based console
implementation, set up the page tables needed for (a sub-portion of)
the fixmaps together with other boot time page table construction.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

Index: xen-4.2.0-testing/xen/arch/x86/boot/head.S
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/boot/head.S
+++ xen-4.2.0-testing/xen/arch/x86/boot/head.S
@@ -3,6 +3,7 @@
#include <public/xen.h>
#include <asm/asm_defns.h>
#include <asm/desc.h>
+#include <asm/fixmap.h>
#include <asm/page.h>
#include <asm/msr.h>

@@ -136,6 +137,9 @@ __start:
add $8,%edx
add $(1<<L2_PAGETABLE_SHIFT),%eax
loop 1b
+ /* Initialise L2 fixmap page directory entry. */
+ mov $(sym_phys(l1_fixmap)+7),%eax
+ mov %eax,sym_phys(l2_fixmap) + l2_table_offset(FIXADDR_TOP-1)*8
/* Initialise L3 identity-map page directory entries. */
mov $sym_phys(l3_identmap),%edi
mov $(sym_phys(l2_identmap)+7),%eax
@@ -144,9 +148,11 @@ __start:
add $8,%edi
add $PAGE_SIZE,%eax
loop 1b
- /* Initialise L3 xen-map page directory entry. */
+ /* Initialise L3 xen-map and fixmap page directory entries. */
mov $(sym_phys(l2_xenmap)+7),%eax
mov %eax,sym_phys(l3_xenmap) + l3_table_offset(XEN_VIRT_START)*8
+ mov $(sym_phys(l2_fixmap)+7),%eax
+ mov %eax,sym_phys(l3_xenmap) + l3_table_offset(FIXADDR_TOP-1)*8
/* Initialise L3 boot-map page directory entry. */
mov $(sym_phys(l2_bootmap)+7),%eax
mov %eax,sym_phys(l3_bootmap) + 0*8
@@ -172,6 +178,9 @@ __start:
add $(1<<L2_PAGETABLE_SHIFT),%eax
cmp $(16<<20)+0xe3,%eax
jne 1b
+ /* Initialise L2 fixmap page directory entry. */
+ mov $(sym_phys(l1_fixmap)+7),%eax
+ mov %eax,sym_phys(idle_pg_table_l2) +
l2_table_offset(FIXADDR_TOP-1)*8
#endif

/* Initialize 4kB mappings of first 2MB or 4MB of memory. */
Index: xen-4.2.0-testing/xen/arch/x86/efi/boot.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/efi/boot.c
+++ xen-4.2.0-testing/xen/arch/x86/efi/boot.c
@@ -17,6 +17,9 @@
#include <xen/vga.h>
#include <asm/e820.h>
#include <asm/edd.h>
+#define __ASSEMBLY__ /* avoid pulling in ACPI stuff (conflicts with EFI) */
+#include <asm/fixmap.h>
+#undef __ASSEMBLY__
#include <asm/mm.h>
#include <asm/msr.h>
#include <asm/processor.h>
@@ -1123,14 +1126,19 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
slot &= L2_PAGETABLE_ENTRIES - 1;
l2_bootmap[slot] = l2e_from_paddr(addr, __PAGE_HYPERVISOR|_PAGE_PSE);
}
+ /* Initialise L2 fixmap page directory entry. */
+ l2_fixmap[l2_table_offset(FIXADDR_TOP - 1)] =
+ l2e_from_paddr((UINTN)l1_fixmap, __PAGE_HYPERVISOR);
/* Initialise L3 identity-map page directory entries. */
for ( i = 0; i < ARRAY_SIZE(l2_identmap) / L2_PAGETABLE_ENTRIES; ++i )
l3_identmap[i] = l3e_from_paddr((UINTN)(l2_identmap +
i * L2_PAGETABLE_ENTRIES),
__PAGE_HYPERVISOR);
- /* Initialise L3 xen-map page directory entry. */
+ /* Initialise L3 xen-map and fixmap page directory entries. */
l3_xenmap[l3_table_offset(XEN_VIRT_START)] =
l3e_from_paddr((UINTN)l2_xenmap, __PAGE_HYPERVISOR);
+ l3_xenmap[l3_table_offset(FIXADDR_TOP - 1)] =
+ l3e_from_paddr((UINTN)l2_fixmap, __PAGE_HYPERVISOR);
/* Initialise L3 boot-map page directory entries. */
l3_bootmap[l3_table_offset(xen_phys_start)] =
l3e_from_paddr((UINTN)l2_bootmap, __PAGE_HYPERVISOR);
Index: xen-4.2.0-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/mm.c
+++ xen-4.2.0-testing/xen/arch/x86/mm.c
@@ -130,6 +130,10 @@
l1_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
l1_identmap[L1_PAGETABLE_ENTRIES];

+/* Mapping of the fixmap space needed early. */
+l1_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ l1_fixmap[L1_PAGETABLE_ENTRIES];
+
#define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)

/*
Index: xen-4.2.0-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-4.2.0-testing/xen/arch/x86/x86_64/mm.c
@@ -65,6 +65,10 @@ l3_pgentry_t __attribute__ ((__section__
l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
l2_xenmap[L2_PAGETABLE_ENTRIES];

+/* Enough page directories to map the early fixmap space. */
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ l2_fixmap[L2_PAGETABLE_ENTRIES];
+
/* Enough page directories to map into the bottom 1GB. */
l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
l3_bootmap[L3_PAGETABLE_ENTRIES];
Index: xen-4.2.0-testing/xen/include/asm-x86/config.h
===================================================================
--- xen-4.2.0-testing.orig/xen/include/asm-x86/config.h
+++ xen-4.2.0-testing/xen/include/asm-x86/config.h
@@ -317,7 +317,7 @@ extern unsigned char boot_edid_info[128]
#define MACHPHYS_MBYTES 16 /* 1 MB needed per 1 GB memory */
#define FRAMETABLE_MBYTES (MACHPHYS_MBYTES * 6)

-#define IOREMAP_VIRT_END 0UL
+#define IOREMAP_VIRT_END _AC(0,UL)
#define IOREMAP_VIRT_START (IOREMAP_VIRT_END - (IOREMAP_MBYTES<<20))
#define DIRECTMAP_VIRT_END IOREMAP_VIRT_START
#define DIRECTMAP_VIRT_START (DIRECTMAP_VIRT_END - (DIRECTMAP_MBYTES<<20))
Index: xen-4.2.0-testing/xen/include/asm-x86/fixmap.h
===================================================================
--- xen-4.2.0-testing.orig/xen/include/asm-x86/fixmap.h
+++ xen-4.2.0-testing/xen/include/asm-x86/fixmap.h
@@ -13,12 +13,17 @@
#define _ASM_FIXMAP_H

#include <xen/config.h>
+#include <asm/page.h>
+
+#define FIXADDR_TOP (IOREMAP_VIRT_END - PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+
#include <xen/pfn.h>
#include <xen/kexec.h>
#include <xen/iommu.h>
#include <asm/apicdef.h>
#include <asm/acpi.h>
-#include <asm/page.h>
#include <asm/amd-iommu.h>
#include <asm/msi.h>
#include <acpi/apei.h>
@@ -66,7 +71,6 @@ enum fixed_addresses {
__end_of_fixed_addresses
};

-#define FIXADDR_TOP (IOREMAP_VIRT_END - PAGE_SIZE)
#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)

@@ -90,4 +94,6 @@ static inline unsigned long virt_to_fix(
return __virt_to_fix(vaddr);
}

+#endif /* __ASSEMBLY__ */
+
#endif
Index: xen-4.2.0-testing/xen/include/asm-x86/page.h
===================================================================
--- xen-4.2.0-testing.orig/xen/include/asm-x86/page.h
+++ xen-4.2.0-testing/xen/include/asm-x86/page.h
@@ -1,6 +1,8 @@
#ifndef __X86_PAGE_H__
#define __X86_PAGE_H__

+#include <xen/const.h>
+
/*
* It is important that the masks are signed quantities. This ensures that
* the compiler sign-extends a 32-bit mask to 64 bits if that is required.
@@ -306,13 +308,15 @@ extern l2_pgentry_t idle_pg_table_l2[
extern l2_pgentry_t *compat_idle_pg_table_l2;
extern unsigned int m2p_compat_vstart;
extern l2_pgentry_t l2_xenmap[L2_PAGETABLE_ENTRIES],
+ l2_fixmap[L2_PAGETABLE_ENTRIES],
l2_bootmap[L2_PAGETABLE_ENTRIES];
extern l3_pgentry_t l3_xenmap[L3_PAGETABLE_ENTRIES],
l3_identmap[L3_PAGETABLE_ENTRIES],
l3_bootmap[L3_PAGETABLE_ENTRIES];
#endif
extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES];
-extern l1_pgentry_t l1_identmap[L1_PAGETABLE_ENTRIES];
+extern l1_pgentry_t l1_identmap[L1_PAGETABLE_ENTRIES],
+ l1_fixmap[L1_PAGETABLE_ENTRIES];
void paging_init(void);
void setup_idle_pagetable(void);
#endif /* !defined(__ASSEMBLY__) */
Index: xen-4.2.0-testing/xen/include/xen/const.h
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/xen/include/xen/const.h
@@ -0,0 +1,24 @@
+/* const.h: Macros for dealing with constants. */
+
+#ifndef __XEN_CONST_H__
+#define __XEN_CONST_H__
+
+/* Some constant macros are used in both assembler and
+ * C code. Therefore we cannot annotate them always with
+ * 'UL' and other type specifiers unilaterally. We
+ * use the following macros to deal with this.
+ *
+ * Similarly, _AT() will cast an expression with a type in C, but
+ * leave it unchanged in asm.
+ */
+
+#ifdef __ASSEMBLY__
+#define _AC(X,Y) X
+#define _AT(T,X) X
+#else
+#define __AC(X,Y) (X##Y)
+#define _AC(X,Y) __AC(X,Y)
+#define _AT(T,X) ((T)(X))
+#endif
+
+#endif /* __XEN_CONST_H__ */
++++++ 25862-sercon-non-com.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1347371236 -7200
# Node ID 776a23fa0e938e4cf3307fc2e3b3f1a9488a5927
# Parent 51c2d7c83cbc2a0357ce112a463f91d354dcdba9
console: prepare for non-COMn port support

Widen SERHND_IDX (and use it where needed), introduce a flush low level
driver method, and remove unnecessary peeking of the common code at the
(driver specific) serial port identification string in the "console="
command line option value.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -1017,7 +1017,7 @@ void __init smp_intr_init(void)
* Also ensure serial interrupts are high priority. We do not
* want them to be blocked by unacknowledged guest-bound interrupts.
*/
- for ( seridx = 0; seridx < 2; seridx++ )
+ for ( seridx = 0; seridx <= SERHND_IDX; seridx++ )
{
if ( (irq = serial_irq(seridx)) < 0 )
continue;
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -539,6 +539,7 @@ void printk(const char *fmt, ...)
void __init console_init_preirq(void)
{
char *p;
+ int sh;

serial_init_preirq();

@@ -551,8 +552,9 @@ void __init console_init_preirq(void)
vga_init();
else if ( !strncmp(p, "none", 4) )
continue;
- else if ( strncmp(p, "com", 3) ||
- (sercon_handle = serial_parse_handle(p)) == -1 )
+ else if ( (sh = serial_parse_handle(p)) >= 0 )
+ sercon_handle = sh;
+ else
{
char *q = strchr(p, ',');
if ( q != NULL )
--- a/xen/drivers/char/serial.c
+++ b/xen/drivers/char/serial.c
@@ -22,9 +22,11 @@ size_param("serial_tx_buffer", serial_tx
#define mask_serial_rxbuf_idx(_i) ((_i)&(serial_rxbufsz-1))
#define mask_serial_txbuf_idx(_i) ((_i)&(serial_txbufsz-1))

-static struct serial_port com[2] = {
- { .rx_lock = SPIN_LOCK_UNLOCKED, .tx_lock = SPIN_LOCK_UNLOCKED },
- { .rx_lock = SPIN_LOCK_UNLOCKED, .tx_lock = SPIN_LOCK_UNLOCKED }
+static struct serial_port com[SERHND_IDX + 1] = {
+ [0 ... SERHND_IDX] = {
+ .rx_lock = SPIN_LOCK_UNLOCKED,
+ .tx_lock = SPIN_LOCK_UNLOCKED
+ }
};

void serial_rx_interrupt(struct serial_port *port, struct cpu_user_regs *regs)
@@ -81,6 +83,8 @@ void serial_tx_interrupt(struct serial_p
port->driver->putc(
port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
}
+ if ( i && port->driver->flush )
+ port->driver->flush(port);
}

spin_unlock(&port->tx_lock);
@@ -175,6 +179,9 @@ void serial_putc(int handle, char c)

__serial_putc(port, c);

+ if ( port->driver->flush )
+ port->driver->flush(port);
+
spin_unlock_irqrestore(&port->tx_lock, flags);
}

@@ -206,6 +213,9 @@ void serial_puts(int handle, const char
__serial_putc(port, c);
}

+ if ( port->driver->flush )
+ port->driver->flush(port);
+
spin_unlock_irqrestore(&port->tx_lock, flags);
}

@@ -261,10 +271,10 @@ int __init serial_parse_handle(char *con
switch ( conf[3] )
{
case '1':
- handle = 0;
+ handle = SERHND_COM1;
break;
case '2':
- handle = 1;
+ handle = SERHND_COM2;
break;
default:
goto fail;
@@ -365,6 +375,8 @@ void serial_start_sync(int handle)
port->driver->putc(
port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
}
+ if ( port->driver->flush )
+ port->driver->flush(port);
}

spin_unlock_irqrestore(&port->tx_lock, flags);
--- a/xen/include/xen/serial.h
+++ b/xen/include/xen/serial.h
@@ -60,6 +60,8 @@ struct uart_driver {
int (*tx_empty)(struct serial_port *);
/* Put a character onto the serial line. */
void (*putc)(struct serial_port *, char);
+ /* Flush accumulated characters. */
+ void (*flush)(struct serial_port *);
/* Get a character from the serial line: returns 0 if none available. */
int (*getc)(struct serial_port *, char *);
/* Get IRQ number for this port's serial line: returns -1 if none. */
@@ -67,10 +69,12 @@ struct uart_driver {
};

/* 'Serial handles' are composed from the following fields. */
-#define SERHND_IDX (1<<0) /* COM1 or COM2? */
-#define SERHND_HI (1<<1) /* Mux/demux each transferred char by MSB. */
-#define SERHND_LO (1<<2) /* Ditto, except that the MSB is cleared. */
-#define SERHND_COOKED (1<<3) /* Newline/carriage-return translation? */
+#define SERHND_IDX (3<<0) /* COM1 or COM2? */
+# define SERHND_COM1 (0<<0)
+# define SERHND_COM2 (1<<0)
+#define SERHND_HI (1<<2) /* Mux/demux each transferred char by MSB. */
+#define SERHND_LO (1<<3) /* Ditto, except that the MSB is cleared. */
+#define SERHND_COOKED (1<<4) /* Newline/carriage-return translation? */

/* Two-stage initialisation (before/after IRQ-subsystem initialisation). */
void serial_init_preirq(void);
++++++ 25863-sercon-ehci-dbgp.patch ++++++
++++ 1778 lines (skipped)

++++++ 25864-sercon-unused.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1347371512 -7200
# Node ID e1380b5311ccee14eb47d7badb75339933d42249
# Parent 0d0c55a1975db9c6cac2e9259b5ebea7a7bdbaec
serial: avoid fully initializing unused consoles

Defer calling the drivers' post-IRQ initialization functions (generally
doing allocation of transmit buffers) until it is known that the
respective console is actually going to be used.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/drivers/char/ehci-dbgp.c
+++ b/xen/drivers/char/ehci-dbgp.c
@@ -1391,7 +1391,8 @@ static int ehci_dbgp_check_release(struc
printk(XENLOG_INFO "Releasing EHCI debug port at %02x:%02x.%u\n",
dbgp->bus, dbgp->slot, dbgp->func);

- kill_timer(&dbgp->timer);
+ if ( dbgp->timer.function )
+ kill_timer(&dbgp->timer);
dbgp->ehci_debug = NULL;

ctrl = readl(&ehci_debug->control);
--- a/xen/drivers/char/serial.c
+++ b/xen/drivers/char/serial.c
@@ -29,6 +29,8 @@ static struct serial_port com[SERHND_IDX
}
};

+static bool_t __read_mostly post_irq;
+
void serial_rx_interrupt(struct serial_port *port, struct cpu_user_regs *regs)
{
char c;
@@ -263,14 +265,12 @@ char serial_getc(int handle)

int __init serial_parse_handle(char *conf)
{
- int handle;
+ int handle, flags = 0;

if ( !strncmp(conf, "dbgp", 4) && (!conf[4] || conf[4] == ',') )
{
- if ( !com[SERHND_DBGP].driver )
- goto fail;
-
- return SERHND_DBGP | SERHND_COOKED;
+ handle = SERHND_DBGP;
+ goto common;
}

if ( strncmp(conf, "com", 3) )
@@ -288,17 +288,25 @@ int __init serial_parse_handle(char *con
goto fail;
}

- if ( !com[handle].driver )
- goto fail;
-
if ( conf[4] == 'H' )
- handle |= SERHND_HI;
+ flags |= SERHND_HI;
else if ( conf[4] == 'L' )
- handle |= SERHND_LO;
+ flags |= SERHND_LO;

- handle |= SERHND_COOKED;
+ common:
+ if ( !com[handle].driver )
+ goto fail;
+
+ if ( !post_irq )
+ com[handle].state = serial_parsed;
+ else if ( com[handle].state != serial_initialized )
+ {
+ if ( com[handle].driver->init_postirq )
+ com[handle].driver->init_postirq(&com[handle]);
+ com[handle].state = serial_initialized;
+ }

- return handle;
+ return handle | flags | SERHND_COOKED;

fail:
return -1;
@@ -450,8 +458,13 @@ void __init serial_init_postirq(void)
{
int i;
for ( i = 0; i < ARRAY_SIZE(com); i++ )
- if ( com[i].driver && com[i].driver->init_postirq )
- com[i].driver->init_postirq(&com[i]);
+ if ( com[i].state == serial_parsed )
+ {
+ if ( com[i].driver->init_postirq )
+ com[i].driver->init_postirq(&com[i]);
+ com[i].state = serial_initialized;
+ }
+ post_irq = 1;
}

void __init serial_endboot(void)
@@ -475,7 +488,7 @@ void serial_suspend(void)
{
int i;
for ( i = 0; i < ARRAY_SIZE(com); i++ )
- if ( com[i].driver && com[i].driver->suspend )
+ if ( com[i].state == serial_initialized && com[i].driver->suspend )
com[i].driver->suspend(&com[i]);
}

@@ -483,7 +496,7 @@ void serial_resume(void)
{
int i;
for ( i = 0; i < ARRAY_SIZE(com); i++ )
- if ( com[i].driver && com[i].driver->resume )
+ if ( com[i].state == serial_initialized && com[i].driver->resume )
com[i].driver->resume(&com[i]);
}

--- a/xen/include/xen/serial.h
+++ b/xen/include/xen/serial.h
@@ -25,10 +25,17 @@ extern unsigned int serial_txbufsz;

struct uart_driver;

+enum serial_port_state {
+ serial_unused,
+ serial_parsed,
+ serial_initialized
+};
+
struct serial_port {
/* Uart-driver parameters. */
struct uart_driver *driver;
void *uart;
+ enum serial_port_state state;
/* Number of characters the port can hold for transmit. */
int tx_fifo_size;
/* Transmit data buffer (interrupt-driven uart). */
++++++ 25866-sercon-ns16550-pci-irq.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1347371733 -7200
# Node ID ee12dc357fbecbb0517798f395d14bf1764c6766
# Parent 5fb5b3b70e34ef278d06aff27878b4b8e6d9145f
ns16550: PCI initialization adjustments

Besides single-port serial cards, also accept multi-port ones and such
providing mixed functionality (e.g. also having a parallel port).

Reading PCI_INTERRUPT_PIN before ACPI gets enabled generally produces
an incorrect IRQ (below 16, whereas after enabling ACPI it frequently
would end up at a higher one), so this is useful (almost) only when a
system already boots in ACPI mode.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/drivers/char/ns16550.c
+++ b/xen/drivers/char/ns16550.c
@@ -449,7 +449,6 @@ static int __init check_existence(struct
static int
pci_uart_config (struct ns16550 *uart, int skip_amt, int bar_idx)
{
- uint16_t class;
uint32_t bar, len;
int b, d, f;

@@ -460,9 +459,15 @@ pci_uart_config (struct ns16550 *uart, i
{
for ( f = 0; f < 0x8; f++ )
{
- class = pci_conf_read16(0, b, d, f, PCI_CLASS_DEVICE);
- if ( class != 0x700 )
+ switch ( pci_conf_read16(0, b, d, f, PCI_CLASS_DEVICE) )
+ {
+ case 0x0700: /* single port serial */
+ case 0x0702: /* multi port serial */
+ case 0x0780: /* other (e.g serial+parallel) */
+ break;
+ default:
continue;
+ }

bar = pci_conf_read32(0, b, d, f,
PCI_BASE_ADDRESS_0 + bar_idx*4);
@@ -485,7 +490,8 @@ pci_uart_config (struct ns16550 *uart, i
uart->bar = bar;
uart->bar_idx = bar_idx;
uart->io_base = bar & 0xfffe;
- uart->irq = 0;
+ uart->irq = pci_conf_read8(0, b, d, f, PCI_INTERRUPT_PIN) ?
+ pci_conf_read8(0, b, d, f, PCI_INTERRUPT_LINE) : 0;

return 0;
}
++++++ 25867-sercon-ns16550-parse.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1347371805 -7200
# Node ID b22f184e1a3cac03abeed92ec4b74235fd0881f4
# Parent ee12dc357fbecbb0517798f395d14bf1764c6766
ns16550: command line parsing adjustments

Allow intermediate parts of the command line options to be absent
(expressed by two immediately succeeding commas).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -199,7 +199,7 @@ If set, override Xen's calculation of th
If set, override Xen's default choice for the platform timer.

### com1,com2
-> `= <baud>[/<clock_hz>][,DPS[,<io-base>[,<irq>[,<port-bdf>[,<bridge-bdf>]]]]
| pci | amt ] `
+> `=
<baud>[/<clock_hz>][,[DPS][,[<io-base>|pci|amt][,[<irq>][,[<port-bdf>][,[<bridge-bdf>]]]]]]`

Both option `com1` and `com2` follow the same format.

--- a/xen/drivers/char/ns16550.c
+++ b/xen/drivers/char/ns16550.c
@@ -536,26 +536,23 @@ static void __init ns16550_parse_port_co
else if ( (baud = simple_strtoul(conf, &conf, 10)) != 0 )
uart->baud = baud;

- if ( *conf == '/')
+ if ( *conf == '/' )
{
conf++;
uart->clock_hz = simple_strtoul(conf, &conf, 0) << 4;
}

- if ( *conf != ',' )
- goto config_parsed;
- conf++;
-
- uart->data_bits = simple_strtoul(conf, &conf, 10);
+ if ( *conf == ',' && *++conf != ',' )
+ {
+ uart->data_bits = simple_strtoul(conf, &conf, 10);

- uart->parity = parse_parity_char(*conf);
- conf++;
+ uart->parity = parse_parity_char(*conf);

- uart->stop_bits = simple_strtoul(conf, &conf, 10);
+ uart->stop_bits = simple_strtoul(conf + 1, &conf, 10);
+ }

- if ( *conf == ',' )
+ if ( *conf == ',' && *++conf != ',' )
{
- conf++;
if ( strncmp(conf, "pci", 3) == 0 )
{
if ( pci_uart_config(uart, 1/* skip AMT */, uart - ns16550_com) )
@@ -572,24 +569,21 @@ static void __init ns16550_parse_port_co
{
uart->io_base = simple_strtoul(conf, &conf, 0);
}
+ }

- if ( *conf == ',' )
- {
- conf++;
- uart->irq = simple_strtoul(conf, &conf, 10);
- if ( *conf == ',' )
- {
- conf++;
- uart->ps_bdf_enable = 1;
- parse_pci_bdf(&conf, &uart->ps_bdf[0]);
- if ( *conf == ',' )
- {
- conf++;
- uart->pb_bdf_enable = 1;
- parse_pci_bdf(&conf, &uart->pb_bdf[0]);
- }
- }
- }
+ if ( *conf == ',' && *++conf != ',' )
+ uart->irq = simple_strtol(conf, &conf, 10);
+
+ if ( *conf == ',' && *++conf != ',' )
+ {
+ uart->ps_bdf_enable = 1;
+ parse_pci_bdf(&conf, &uart->ps_bdf[0]);
+ }
+
+ if ( *conf == ',' && *++conf != ',' )
+ {
+ uart->pb_bdf_enable = 1;
+ parse_pci_bdf(&conf, &uart->pb_bdf[0]);
}

config_parsed:
++++++ 25874-x86-EFI-chain-cfg.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1347437974 -7200
# Node ID 8c0aa97d529a55de2ab96be1a5a6e9ed6a9c6bf0
# Parent ac8f4afccd6c6786a3fd5691e8b0c9b38c47e994
x86-64/EFI: allow chaining of config files

Namely when making use the CONFIG_XEN_COMPAT_* options in the legacy
Linux kernels, newer kernels may not be compatible with older
hypervisors, so trying to boot such a combination makes little sense.
Booting older kernels on newer hypervisors, however, has to always
work.

With the way xen.efi looks for its configuration file, allowing
individual configuration files to refer only to compatible kernels,
and referring from an older- to a newer-hypervisor one (the kernels
of which will, as said, necessarily be compatible with the older
hypervisor) allows to greatly reduce redundancy at least in
development environments where one frequently wants multiple
hypervisors and kernles to be installed in parallel.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/docs/misc/efi.markdown
+++ b/docs/misc/efi.markdown
@@ -75,6 +75,13 @@ Specifies an XSM module to load.

Specifies a CPU microcode blob to load.

+###`chain=<filename>`
+
+Specifies an alternate configuration file to use in case the specified section
+(and in particular its `kernel=` setting) can't be found in the default (or
+specified) configuration file. This is only meaningful in the [global] section
+and really not meant to be used together with the `-cfg=` command line option.
+
Filenames must be specified relative to the location of the EFI binary.

Extra options to be passed to Xen can also be specified on the command line,
--- a/xen/arch/x86/efi/boot.c
+++ b/xen/arch/x86/efi/boot.c
@@ -797,7 +797,26 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
else
section.s = get_value(&cfg, "global", "default");

- name.s = get_value(&cfg, section.s, "kernel");
+ for ( ; ; )
+ {
+ name.s = get_value(&cfg, section.s, "kernel");
+ if ( name.s )
+ break;
+ name.s = get_value(&cfg, "global", "chain");
+ if ( !name.s )
+ break;
+ efi_bs->FreePages(cfg.addr, PFN_UP(cfg.size));
+ cfg.addr = 0;
+ if ( !read_file(dir_handle, s2w(&name), &cfg) )
+ {
+ PrintStr(L"Chained configuration file '");
+ PrintStr(name.w);
+ efi_bs->FreePool(name.w);
+ blexit(L"'not found\r\n");
+ }
+ pre_parse(&cfg);
+ efi_bs->FreePool(name.w);
+ }
if ( !name.s )
blexit(L"No Dom0 kernel image specified\r\n");
split_value(name.s);
++++++ 25909-xenpm-consistent.patch ++++++
++++ 630 lines (skipped)

++++++ 25912-partial-libxl.patch ++++++
No functional change.

The purpose is to make it easier to backport patches from Xen 4.3's
libxl, as Xen 4.3's libxl has had this done:

libxl: Enable -Wshadow.

It was convenient to invent $(CFLAGS_LIBXL) to do this.

Various renamings to avoid shadowing standard functions:
- index(3)
- listen(2)
- link(2)
- abort(3)
- abs(3)

Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

In this patch we do not change the others, and we do not enable
-Wshadow. We're just trying to bring 4.2's libxl textually closer to
4.3's.

Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
tools/libxl/libxl_event.c | 34 +++++++++++++++++-----------------
1 files changed, 17 insertions(+), 17 deletions(-)

Index: xen-4.2.1-testing/tools/libxl/libxl_event.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_event.c
+++ xen-4.2.1-testing/tools/libxl/libxl_event.c
@@ -167,15 +167,15 @@ static void time_insert_finite(libxl__gc
}

static int time_register_finite(libxl__gc *gc, libxl__ev_time *ev,
- struct timeval abs)
+ struct timeval absolute)
{
int rc;

- rc = OSEVENT_HOOK(timeout_register, &ev->for_app_reg, abs, ev);
+ rc = OSEVENT_HOOK(timeout_register, &ev->for_app_reg, absolute, ev);
if (rc) return rc;

ev->infinite = 0;
- ev->abs = abs;
+ ev->abs = absolute;
time_insert_finite(gc, ev);

return 0;
@@ -202,16 +202,16 @@ static void time_done_debug(libxl__gc *g

int libxl__ev_time_register_abs(libxl__gc *gc, libxl__ev_time *ev,
libxl__ev_time_callback *func,
- struct timeval abs)
+ struct timeval absolute)
{
int rc;

CTX_LOCK;

DBG("ev_time=%p register abs=%lu.%06lu",
- ev, (unsigned long)abs.tv_sec, (unsigned long)abs.tv_usec);
+ ev, (unsigned long)absolute.tv_sec, (unsigned long)absolute.tv_usec);

- rc = time_register_finite(gc, ev, abs);
+ rc = time_register_finite(gc, ev, absolute);
if (rc) goto out;

ev->func = func;
@@ -228,7 +228,7 @@ int libxl__ev_time_register_rel(libxl__g
libxl__ev_time_callback *func,
int milliseconds /* as for poll(2) */)
{
- struct timeval abs;
+ struct timeval absolute;
int rc;

CTX_LOCK;
@@ -238,10 +238,10 @@ int libxl__ev_time_register_rel(libxl__g
if (milliseconds < 0) {
ev->infinite = 1;
} else {
- rc = time_rel_to_abs(gc, milliseconds, &abs);
+ rc = time_rel_to_abs(gc, milliseconds, &absolute);
if (rc) goto out;

- rc = time_register_finite(gc, ev, abs);
+ rc = time_register_finite(gc, ev, absolute);
if (rc) goto out;
}

@@ -255,26 +255,26 @@ int libxl__ev_time_register_rel(libxl__g
}

int libxl__ev_time_modify_abs(libxl__gc *gc, libxl__ev_time *ev,
- struct timeval abs)
+ struct timeval absolute)
{
int rc;

CTX_LOCK;

DBG("ev_time=%p modify abs==%lu.%06lu",
- ev, (unsigned long)abs.tv_sec, (unsigned long)abs.tv_usec);
+ ev, (unsigned long)absolute.tv_sec, (unsigned long)absolute.tv_usec);

assert(libxl__ev_time_isregistered(ev));

if (ev->infinite) {
- rc = time_register_finite(gc, ev, abs);
+ rc = time_register_finite(gc, ev, absolute);
if (rc) goto out;
} else {
- rc = OSEVENT_HOOK(timeout_modify, &ev->for_app_reg, abs);
+ rc = OSEVENT_HOOK(timeout_modify, &ev->for_app_reg, absolute);
if (rc) goto out;

LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
- ev->abs = abs;
+ ev->abs = absolute;
time_insert_finite(gc, ev);
}

@@ -288,7 +288,7 @@ int libxl__ev_time_modify_abs(libxl__gc
int libxl__ev_time_modify_rel(libxl__gc *gc, libxl__ev_time *ev,
int milliseconds)
{
- struct timeval abs;
+ struct timeval absolute;
int rc;

CTX_LOCK;
@@ -304,10 +304,10 @@ int libxl__ev_time_modify_rel(libxl__gc
goto out;
}

- rc = time_rel_to_abs(gc, milliseconds, &abs);
+ rc = time_rel_to_abs(gc, milliseconds, &absolute);
if (rc) goto out;

- rc = libxl__ev_time_modify_abs(gc, ev, abs);
+ rc = libxl__ev_time_modify_abs(gc, ev, absolute);
if (rc) goto out;

rc = 0;
++++++ 25920-x86-APICV-enable.patch ++++++
References: FATE#313605

# HG changeset patch
# User Jiongxi Li <jiongxi.li@xxxxxxxxx>
# Date 1347912248 -3600
# Node ID ec60de627945f17ec2ce5c14e1224b59403875f7
# Parent 62de66cec48a1716bb700912da451a26296b8d1e
xen: enable APIC-Register Virtualization

Add APIC register virtualization support
- APIC read doesn't cause VM-Exit
- APIC write becomes trap-like

Signed-off-by: Gang Wei <gang.wei@xxxxxxxxx>
Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
Signed-off-by: Jiongxi Li <jiongxi.li@xxxxxxxxx>

--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -823,6 +823,14 @@ static int vlapic_write(struct vcpu *v,
return rc;
}

+int vlapic_apicv_write(struct vcpu *v, unsigned int offset)
+{
+ uint32_t val = vlapic_get_reg(vcpu_vlapic(v), offset);
+
+ vlapic_reg_write(v, offset, val);
+ return 0;
+}
+
int hvm_x2apic_msr_write(struct vcpu *v, unsigned int msr, uint64_t
msr_content)
{
struct vlapic *vlapic = vcpu_vlapic(v);
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -89,6 +89,7 @@ static void __init vmx_display_features(
P(cpu_has_vmx_vnmi, "Virtual NMI");
P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
+ P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
#undef P

if ( !printed )
@@ -186,6 +187,14 @@ static int vmx_init_vmcs_config(void)
if ( opt_unrestricted_guest_enabled )
opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST;

+ /*
+ * "APIC Register Virtualization"
+ * can be set only when "use TPR shadow" is set
+ */
+ if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
+ opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT;
+
+
_vmx_secondary_exec_control = adjust_vmx_controls(
"Secondary Exec Control", min, opt,
MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch);
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2274,6 +2274,16 @@ static void vmx_idtv_reinject(unsigned l
}
}

+static int vmx_handle_apic_write(void)
+{
+ unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION);
+ unsigned int offset = exit_qualification & 0xfff;
+
+ ASSERT(cpu_has_vmx_apic_reg_virt);
+
+ return vlapic_apicv_write(current, offset);
+}
+
void vmx_vmexit_handler(struct cpu_user_regs *regs)
{
unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0;
@@ -2729,6 +2739,11 @@ void vmx_vmexit_handler(struct cpu_user_
break;
}

+ case EXIT_REASON_APIC_WRITE:
+ if ( vmx_handle_apic_write() )
+ hvm_inject_hw_exception(TRAP_gp_fault, 0);
+ break;
+
case EXIT_REASON_ACCESS_GDTR_OR_IDTR:
case EXIT_REASON_ACCESS_LDTR_OR_TR:
case EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED:
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -103,6 +103,8 @@ void vlapic_EOI_set(struct vlapic *vlapi

int vlapic_ipi(struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high);

+int vlapic_apicv_write(struct vcpu *v, unsigned int offset);
+
struct vlapic *vlapic_lowest_prio(
struct domain *d, struct vlapic *source,
int short_hand, uint8_t dest, uint8_t dest_mode);
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -182,6 +182,7 @@ extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
extern u32 vmx_secondary_exec_control;
@@ -230,6 +231,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr
SECONDARY_EXEC_UNRESTRICTED_GUEST)
#define cpu_has_vmx_ple \
(vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
+#define cpu_has_vmx_apic_reg_virt \
+ (vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT)

/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -129,6 +129,7 @@ void vmx_update_cpu_exec_control(struct
#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
#define EXIT_REASON_INVPCID 58

/*
++++++ 25921-x86-APICV-delivery.patch ++++++
References: FATE#313605

# HG changeset patch
# User Jiongxi Li <jiongxi.li@xxxxxxxxx>
# Date 1347912311 -3600
# Node ID 713b8849b11afa05f1dde157a3f5086fa3aaad08
# Parent ec60de627945f17ec2ce5c14e1224b59403875f7
xen: enable Virtual-interrupt delivery

Virtual interrupt delivery avoids Xen to inject vAPIC interrupts
manually, which is fully taken care of by the hardware. This needs
some special awareness into existing interrupr injection path:
For pending interrupt from vLAPIC, instead of direct injection, we may
need update architecture specific indicators before resuming to guest.
Before returning to guest, RVI should be updated if any pending IRRs
EOI exit bitmap controls whether an EOI write should cause VM-Exit. If
set, a trap-like induced EOI VM-Exit is triggered. The approach here
is to manipulate EOI exit bitmap based on value of TMR. Level
triggered irq requires a hook in vLAPIC EOI write, so that vIOAPIC EOI
is triggered and emulated

Signed-off-by: Gang Wei <gang.wei@xxxxxxxxx>
Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
Signed-off-by: Jiongxi Li <jiongxi.li@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -145,6 +145,9 @@ int vlapic_set_irq(struct vlapic *vlapic
if ( trig )
vlapic_set_vector(vec, &vlapic->regs->data[APIC_TMR]);

+ if ( hvm_funcs.update_eoi_exit_bitmap )
+ hvm_funcs.update_eoi_exit_bitmap(vlapic_vcpu(vlapic), vec ,trig);
+
/* We may need to wake up target vcpu, besides set pending bit here */
return !vlapic_test_and_set_irr(vec, vlapic);
}
@@ -410,6 +413,14 @@ void vlapic_EOI_set(struct vlapic *vlapi
hvm_dpci_msi_eoi(current->domain, vector);
}

+void vlapic_handle_EOI_induced_exit(struct vlapic *vlapic, int vector)
+{
+ if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) )
+ vioapic_update_EOI(vlapic_domain(vlapic), vector);
+
+ hvm_dpci_msi_eoi(current->domain, vector);
+}
+
int vlapic_ipi(
struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high)
{
@@ -1000,6 +1011,14 @@ void vlapic_adjust_i8259_target(struct d
pt_adjust_global_vcpu_target(v);
}

+int vlapic_virtual_intr_delivery_enabled(void)
+{
+ if ( hvm_funcs.virtual_intr_delivery_enabled )
+ return hvm_funcs.virtual_intr_delivery_enabled();
+ else
+ return 0;
+}
+
int vlapic_has_pending_irq(struct vcpu *v)
{
struct vlapic *vlapic = vcpu_vlapic(v);
@@ -1012,6 +1031,9 @@ int vlapic_has_pending_irq(struct vcpu *
if ( irr == -1 )
return -1;

+ if ( vlapic_virtual_intr_delivery_enabled() )
+ return irr;
+
isr = vlapic_find_highest_isr(vlapic);
isr = (isr != -1) ? isr : 0;
if ( (isr & 0xf0) >= (irr & 0xf0) )
@@ -1024,6 +1046,9 @@ int vlapic_ack_pending_irq(struct vcpu *
{
struct vlapic *vlapic = vcpu_vlapic(v);

+ if ( vlapic_virtual_intr_delivery_enabled() )
+ return 1;
+
vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
vlapic_clear_irr(vector, vlapic);

--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -206,6 +206,7 @@ void vmx_intr_assist(void)
struct vcpu *v = current;
unsigned int tpr_threshold = 0;
enum hvm_intblk intblk;
+ int pt_vector = -1;

/* Block event injection when single step with MTF. */
if ( unlikely(v->arch.hvm_vcpu.single_step) )
@@ -216,7 +217,7 @@ void vmx_intr_assist(void)
}

/* Crank the handle on interrupt state. */
- pt_update_irq(v);
+ pt_vector = pt_update_irq(v);

do {
intack = hvm_vcpu_has_pending_irq(v);
@@ -227,16 +228,34 @@ void vmx_intr_assist(void)
goto out;

intblk = hvm_interrupt_blocked(v, intack);
- if ( intblk == hvm_intblk_tpr )
+ if ( cpu_has_vmx_virtual_intr_delivery )
+ {
+ /* Set "Interrupt-window exiting" for ExtINT */
+ if ( (intblk != hvm_intblk_none) &&
+ ( (intack.source == hvm_intsrc_pic) ||
+ ( intack.source == hvm_intsrc_vector) ) )
+ {
+ enable_intr_window(v, intack);
+ goto out;
+ }
+
+ if ( __vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK )
+ {
+ if ( (intack.source == hvm_intsrc_pic) ||
+ (intack.source == hvm_intsrc_nmi) ||
+ (intack.source == hvm_intsrc_mce) )
+ enable_intr_window(v, intack);
+
+ goto out;
+ }
+ } else if ( intblk == hvm_intblk_tpr )
{
ASSERT(vlapic_enabled(vcpu_vlapic(v)));
ASSERT(intack.source == hvm_intsrc_lapic);
tpr_threshold = intack.vector >> 4;
goto out;
- }
-
- if ( (intblk != hvm_intblk_none) ||
- (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) )
+ } else if ( (intblk != hvm_intblk_none) ||
+ (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) )
{
enable_intr_window(v, intack);
goto out;
@@ -253,6 +272,44 @@ void vmx_intr_assist(void)
{
hvm_inject_hw_exception(TRAP_machine_check, HVM_DELIVER_NO_ERROR_CODE);
}
+ else if ( cpu_has_vmx_virtual_intr_delivery &&
+ intack.source != hvm_intsrc_pic &&
+ intack.source != hvm_intsrc_vector )
+ {
+ unsigned long status = __vmread(GUEST_INTR_STATUS);
+
+ /*
+ * Set eoi_exit_bitmap for periodic timer interrup to cause EOI-induced
VM
+ * exit, then pending periodic time interrups have the chance to be
injected
+ * for compensation
+ */
+ if (pt_vector != -1)
+ vmx_set_eoi_exit_bitmap(v, pt_vector);
+
+ /* we need update the RVI field */
+ status &= ~(unsigned long)0x0FF;
+ status |= (unsigned long)0x0FF &
+ intack.vector;
+ __vmwrite(GUEST_INTR_STATUS, status);
+ if (v->arch.hvm_vmx.eoi_exitmap_changed) {
+#ifdef __i386__
+#define UPDATE_EOI_EXITMAP(v, e) { \
+ if (test_and_clear_bit(e, &v->arch.hvm_vmx.eoi_exitmap_changed)) {
\
+ __vmwrite(EOI_EXIT_BITMAP##e,
v->arch.hvm_vmx.eoi_exit_bitmap[e]); \
+ __vmwrite(EOI_EXIT_BITMAP##e##_HIGH,
v->arch.hvm_vmx.eoi_exit_bitmap[e] >> 32);}}
+#else
+#define UPDATE_EOI_EXITMAP(v, e) { \
+ if (test_and_clear_bit(e, &v->arch.hvm_vmx.eoi_exitmap_changed)) {
\
+ __vmwrite(EOI_EXIT_BITMAP##e,
v->arch.hvm_vmx.eoi_exit_bitmap[e]);}}
+#endif
+ UPDATE_EOI_EXITMAP(v, 0);
+ UPDATE_EOI_EXITMAP(v, 1);
+ UPDATE_EOI_EXITMAP(v, 2);
+ UPDATE_EOI_EXITMAP(v, 3);
+ }
+
+ pt_intr_post(v, intack);
+ }
else
{
HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
@@ -262,11 +319,16 @@ void vmx_intr_assist(void)

/* Is there another IRQ to queue up behind this one? */
intack = hvm_vcpu_has_pending_irq(v);
- if ( unlikely(intack.source != hvm_intsrc_none) )
- enable_intr_window(v, intack);
+ if ( !cpu_has_vmx_virtual_intr_delivery ||
+ intack.source == hvm_intsrc_pic ||
+ intack.source == hvm_intsrc_vector )
+ {
+ if ( unlikely(intack.source != hvm_intsrc_none) )
+ enable_intr_window(v, intack);
+ }

out:
- if ( cpu_has_vmx_tpr_shadow )
+ if ( !cpu_has_vmx_virtual_intr_delivery && cpu_has_vmx_tpr_shadow )
__vmwrite(TPR_THRESHOLD, tpr_threshold);
}

--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -90,6 +90,7 @@ static void __init vmx_display_features(
P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
+ P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery");
#undef P

if ( !printed )
@@ -188,11 +189,12 @@ static int vmx_init_vmcs_config(void)
opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST;

/*
- * "APIC Register Virtualization"
+ * "APIC Register Virtualization" and "Virtual Interrupt Delivery"
* can be set only when "use TPR shadow" is set
*/
if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
- opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT;
+ opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;


_vmx_secondary_exec_control = adjust_vmx_controls(
@@ -787,6 +789,22 @@ static int construct_vmcs(struct vcpu *v
__vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
__vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));

+ if ( cpu_has_vmx_virtual_intr_delivery )
+ {
+ /* EOI-exit bitmap */
+ v->arch.hvm_vmx.eoi_exit_bitmap[0] = (uint64_t)0;
+ __vmwrite(EOI_EXIT_BITMAP0, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
+ v->arch.hvm_vmx.eoi_exit_bitmap[1] = (uint64_t)0;
+ __vmwrite(EOI_EXIT_BITMAP1, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
+ v->arch.hvm_vmx.eoi_exit_bitmap[2] = (uint64_t)0;
+ __vmwrite(EOI_EXIT_BITMAP2, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
+ v->arch.hvm_vmx.eoi_exit_bitmap[3] = (uint64_t)0;
+ __vmwrite(EOI_EXIT_BITMAP3, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
+
+ /* Initialise Guest Interrupt Status (RVI and SVI) to 0 */
+ __vmwrite(GUEST_INTR_STATUS, 0);
+ }
+
/* Host data selectors. */
__vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
__vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
@@ -1028,6 +1046,30 @@ int vmx_add_host_load_msr(u32 msr)
return 0;
}

+void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector)
+{
+ int index, offset, changed;
+
+ index = vector >> 6;
+ offset = vector & 63;
+ changed = !test_and_set_bit(offset,
+ (uint64_t *)&v->arch.hvm_vmx.eoi_exit_bitmap[index]);
+ if (changed)
+ set_bit(index, &v->arch.hvm_vmx.eoi_exitmap_changed);
+}
+
+void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector)
+{
+ int index, offset, changed;
+
+ index = vector >> 6;
+ offset = vector & 63;
+ changed = test_and_clear_bit(offset,
+ (uint64_t *)&v->arch.hvm_vmx.eoi_exit_bitmap[index]);
+ if (changed)
+ set_bit(index, &v->arch.hvm_vmx.eoi_exitmap_changed);
+}
+
int vmx_create_vmcs(struct vcpu *v)
{
struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1502,6 +1502,22 @@ static void vmx_set_info_guest(struct vc
vmx_vmcs_exit(v);
}

+static void vmx_update_eoi_exit_bitmap(struct vcpu *v, u8 vector, u8 trig)
+{
+ if ( cpu_has_vmx_virtual_intr_delivery )
+ {
+ if (trig)
+ vmx_set_eoi_exit_bitmap(v, vector);
+ else
+ vmx_clear_eoi_exit_bitmap(v, vector);
+ }
+}
+
+static int vmx_virtual_intr_delivery_enabled(void)
+{
+ return cpu_has_vmx_virtual_intr_delivery;
+}
+
static struct hvm_function_table __read_mostly vmx_function_table = {
.name = "VMX",
.cpu_up_prepare = vmx_cpu_up_prepare,
@@ -1548,7 +1564,9 @@ static struct hvm_function_table __read_
.nhvm_vmcx_guest_intercepts_trap = nvmx_intercepts_exception,
.nhvm_vcpu_vmexit_trap = nvmx_vmexit_trap,
.nhvm_intr_blocked = nvmx_intr_blocked,
- .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources
+ .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
+ .update_eoi_exit_bitmap = vmx_update_eoi_exit_bitmap,
+ .virtual_intr_delivery_enabled = vmx_virtual_intr_delivery_enabled
};

struct hvm_function_table * __init start_vmx(void)
@@ -2284,6 +2302,17 @@ static int vmx_handle_apic_write(void)
return vlapic_apicv_write(current, offset);
}

+/*
+ * When "Virtual Interrupt Delivery" is enabled, this function is used
+ * to handle EOI-induced VM exit
+ */
+void vmx_handle_EOI_induced_exit(struct vlapic *vlapic, int vector)
+{
+ ASSERT(cpu_has_vmx_virtual_intr_delivery);
+
+ vlapic_handle_EOI_induced_exit(vlapic, vector);
+}
+
void vmx_vmexit_handler(struct cpu_user_regs *regs)
{
unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0;
@@ -2677,6 +2706,16 @@ void vmx_vmexit_handler(struct cpu_user_
hvm_inject_hw_exception(TRAP_gp_fault, 0);
break;

+ case EXIT_REASON_EOI_INDUCED:
+ {
+ int vector;
+ exit_qualification = __vmread(EXIT_QUALIFICATION);
+ vector = exit_qualification & 0xff;
+
+ vmx_handle_EOI_induced_exit(vcpu_vlapic(current), vector);
+ break;
+ }
+
case EXIT_REASON_IO_INSTRUCTION:
exit_qualification = __vmread(EXIT_QUALIFICATION);
if ( exit_qualification & 0x10 )
--- a/xen/arch/x86/hvm/vpt.c
+++ b/xen/arch/x86/hvm/vpt.c
@@ -212,7 +212,7 @@ static void pt_timer_fn(void *data)
pt_unlock(pt);
}

-void pt_update_irq(struct vcpu *v)
+int pt_update_irq(struct vcpu *v)
{
struct list_head *head = &v->arch.hvm_vcpu.tm_list;
struct periodic_time *pt, *temp, *earliest_pt = NULL;
@@ -245,7 +245,7 @@ void pt_update_irq(struct vcpu *v)
if ( earliest_pt == NULL )
{
spin_unlock(&v->arch.hvm_vcpu.tm_lock);
- return;
+ return -1;
}

earliest_pt->irq_issued = 1;
@@ -263,6 +263,17 @@ void pt_update_irq(struct vcpu *v)
hvm_isa_irq_deassert(v->domain, irq);
hvm_isa_irq_assert(v->domain, irq);
}
+
+ /*
+ * If periodic timer interrut is handled by lapic, its vector in
+ * IRR is returned and used to set eoi_exit_bitmap for virtual
+ * interrupt delivery case. Otherwise return -1 to do nothing.
+ */
+ if ( vlapic_accept_pic_intr(v) &&
+ (&v->domain->arch.hvm_domain)->vpic[0].int_output )
+ return -1;
+ else
+ return pt_irq_vector(earliest_pt, hvm_intsrc_lapic);
}

static struct periodic_time *is_pt_irq(
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -180,6 +180,10 @@ struct hvm_function_table {

enum hvm_intblk (*nhvm_intr_blocked)(struct vcpu *v);
void (*nhvm_domain_relinquish_resources)(struct domain *d);
+
+ /* Virtual interrupt delivery */
+ void (*update_eoi_exit_bitmap)(struct vcpu *v, u8 vector, u8 trig);
+ int (*virtual_intr_delivery_enabled)(void);
};

extern struct hvm_function_table hvm_funcs;
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -100,6 +100,7 @@ int vlapic_accept_pic_intr(struct vcpu *
void vlapic_adjust_i8259_target(struct domain *d);

void vlapic_EOI_set(struct vlapic *vlapic);
+void vlapic_handle_EOI_induced_exit(struct vlapic *vlapic, int vector);

int vlapic_ipi(struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high);

--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -110,6 +110,9 @@ struct arch_vmx_struct {
unsigned int host_msr_count;
struct vmx_msr_entry *host_msr_area;

+ uint32_t eoi_exitmap_changed;
+ uint64_t eoi_exit_bitmap[4];
+
unsigned long host_cr0;

/* Is the guest in real mode? */
@@ -183,6 +186,7 @@ extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
extern u32 vmx_secondary_exec_control;
@@ -233,6 +237,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr
(vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
#define cpu_has_vmx_apic_reg_virt \
(vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT)
+#define cpu_has_vmx_virtual_intr_delivery \
+ (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)

/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
@@ -251,6 +257,7 @@ enum vmcs_field {
GUEST_GS_SELECTOR = 0x0000080a,
GUEST_LDTR_SELECTOR = 0x0000080c,
GUEST_TR_SELECTOR = 0x0000080e,
+ GUEST_INTR_STATUS = 0x00000810,
HOST_ES_SELECTOR = 0x00000c00,
HOST_CS_SELECTOR = 0x00000c02,
HOST_SS_SELECTOR = 0x00000c04,
@@ -278,6 +285,14 @@ enum vmcs_field {
APIC_ACCESS_ADDR_HIGH = 0x00002015,
EPT_POINTER = 0x0000201a,
EPT_POINTER_HIGH = 0x0000201b,
+ EOI_EXIT_BITMAP0 = 0x0000201c,
+ EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
+ EOI_EXIT_BITMAP1 = 0x0000201e,
+ EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
+ EOI_EXIT_BITMAP2 = 0x00002020,
+ EOI_EXIT_BITMAP2_HIGH = 0x00002021,
+ EOI_EXIT_BITMAP3 = 0x00002022,
+ EOI_EXIT_BITMAP3_HIGH = 0x00002023,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
@@ -398,6 +413,8 @@ int vmx_write_guest_msr(u32 msr, u64 val
int vmx_add_guest_msr(u32 msr);
int vmx_add_host_load_msr(u32 msr);
void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to);
+void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector);
+void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector);

#endif /* ASM_X86_HVM_VMX_VMCS_H__ */

--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -119,6 +119,7 @@ void vmx_update_cpu_exec_control(struct
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EOI_INDUCED 45
#define EXIT_REASON_ACCESS_GDTR_OR_IDTR 46
#define EXIT_REASON_ACCESS_LDTR_OR_TR 47
#define EXIT_REASON_EPT_VIOLATION 48
--- a/xen/include/asm-x86/hvm/vpt.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -141,7 +141,7 @@ struct pl_time { /* platform time */

void pt_save_timer(struct vcpu *v);
void pt_restore_timer(struct vcpu *v);
-void pt_update_irq(struct vcpu *v);
+int pt_update_irq(struct vcpu *v);
void pt_intr_post(struct vcpu *v, struct hvm_intack intack);
void pt_migrate(struct vcpu *v);

++++++ 25922-x86-APICV-x2APIC.patch ++++++
References: FATE#313605

# HG changeset patch
# User Jiongxi Li <jiongxi.li@xxxxxxxxx>
# Date 1347912362 -3600
# Node ID c2578dd96b8318e108fff0f340411135dedaa47d
# Parent 713b8849b11afa05f1dde157a3f5086fa3aaad08
xen: add virtual x2apic support for apicv

basically to benefit from apicv, we need clear MSR bitmap for
corresponding x2apic MSRs:
0x800 - 0x8ff: no read intercept for apicv register virtualization
TPR,EOI,SELF-IPI: no write intercept for virtual interrupt
delivery

Signed-off-by: Jiongxi Li <jiongxi.li@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -658,7 +658,7 @@ static void vmx_set_host_env(struct vcpu
(unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
}

-void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr)
+void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
{
unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;

@@ -673,14 +673,18 @@ void vmx_disable_intercept_for_msr(struc
*/
if ( msr <= 0x1fff )
{
- __clear_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */
- __clear_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low */
+ if (type & MSR_TYPE_R)
+ __clear_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */
+ if (type & MSR_TYPE_W)
+ __clear_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low
*/
}
else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
{
msr &= 0x1fff;
- __clear_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high */
- __clear_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high */
+ if (type & MSR_TYPE_R)
+ __clear_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high
*/
+ if (type & MSR_TYPE_W)
+ __clear_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high
*/
}
}

@@ -776,13 +780,25 @@ static int construct_vmcs(struct vcpu *v
v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
__vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));

- vmx_disable_intercept_for_msr(v, MSR_FS_BASE);
- vmx_disable_intercept_for_msr(v, MSR_GS_BASE);
- vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
- vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
- vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
+ vmx_disable_intercept_for_msr(v, MSR_FS_BASE, MSR_TYPE_R | MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_GS_BASE, MSR_TYPE_R | MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R |
MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R |
MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R |
MSR_TYPE_W);
if ( cpu_has_vmx_pat && paging_mode_hap(d) )
- vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R |
MSR_TYPE_W);
+ if ( cpu_has_vmx_apic_reg_virt )
+ {
+ int msr;
+ for (msr = MSR_IA32_APICBASE_MSR; msr <= MSR_IA32_APICBASE_MSR +
0xff; msr++)
+ vmx_disable_intercept_for_msr(v, msr, MSR_TYPE_R);
+ }
+ if ( cpu_has_vmx_virtual_intr_delivery )
+ {
+ vmx_disable_intercept_for_msr(v, MSR_IA32_APICTPR_MSR, MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_APICEOI_MSR, MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_APICSELF_MSR,
MSR_TYPE_W);
+ }
}

/* I/O access bitmap. */
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2036,7 +2036,7 @@ static int vmx_msr_write_intercept(unsig
for ( ; (rc == 0) && lbr->count; lbr++ )
for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
- vmx_disable_intercept_for_msr(v, lbr->base + i);
+ vmx_disable_intercept_for_msr(v, lbr->base + i,
MSR_TYPE_R | MSR_TYPE_W);
}

if ( (rc < 0) ||
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -407,7 +407,9 @@ enum vmcs_field {

#define VMCS_VPID_WIDTH 16

-void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr);
+#define MSR_TYPE_R 1
+#define MSR_TYPE_W 2
+void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type);
int vmx_read_guest_msr(u32 msr, u64 *val);
int vmx_write_guest_msr(u32 msr, u64 val);
int vmx_add_guest_msr(u32 msr);
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -291,6 +291,9 @@
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
#define MSR_IA32_APICBASE_MSR 0x800
+#define MSR_IA32_APICTPR_MSR 0x808
+#define MSR_IA32_APICEOI_MSR 0x80b
+#define MSR_IA32_APICSELF_MSR 0x83f

#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
++++++ 25952-x86-MMIO-remap-permissions.patch ++++++
# HG changeset patch
# User Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
# Date 1348653367 -7200
# Node ID 8278d7d8fa485996f51134c5265fceaf239adf6a
# Parent b83f414ccf7a6e4e077a10bc422cf3f6c7d30566
x86: check remote MMIO remap permissions

When a domain is mapping pages from a different pg_owner domain, the
iomem_access checks are currently only applied to the pg_owner domain,
potentially allowing a domain with a more restrictive iomem_access
policy to have the pages mapped into its page tables. To catch this,
also check the owner of the page tables. The current domain does not
need to be checked because the ability to manipulate a domain's page
tables implies full access to the target domain, so checking that
domain's permission is sufficient.

Signed-off-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

Index: xen-4.2.0-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/mm.c
+++ xen-4.2.0-testing/xen/arch/x86/mm.c
@@ -883,6 +883,19 @@ get_page_from_l1e(
return -EINVAL;
}

+ if ( pg_owner != l1e_owner &&
+ !iomem_access_permitted(l1e_owner, mfn, mfn) )
+ {
+ if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */
+ {
+ MEM_LOG("Dom%u attempted to map I/O space %08lx in dom%u to
dom%u",
+ curr->domain->domain_id, mfn, pg_owner->domain_id,
+ l1e_owner->domain_id);
+ return -EPERM;
+ }
+ return -EINVAL;
+ }
+
if ( !(l1f & _PAGE_RW) ||
!rangeset_contains_singleton(mmio_ro_ranges, mfn) )
return 0;
++++++ 25957-x86-TSC-adjust-HVM.patch ++++++
References: FATE#313633

# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1348654362 -7200
# Node ID c47ef9592fb39325e33f8406b4bd736cc84482e5
# Parent 5d63c633a60b9a1d695594f9c17cf933240bec81
x86: Implement TSC adjust feature for HVM guest

IA32_TSC_ADJUST MSR is maintained separately for each logical
processor. A logical processor maintains and uses the IA32_TSC_ADJUST
MSR as follows:
1). On RESET, the value of the IA32_TSC_ADJUST MSR is 0;
2). If an execution of WRMSR to the IA32_TIME_STAMP_COUNTER MSR adds
(or subtracts) value X from the TSC, the logical processor also
adds (or subtracts) value X from the IA32_TSC_ADJUST MSR;
3). If an execution of WRMSR to the IA32_TSC_ADJUST MSR adds (or
subtracts) value X from that MSR, the logical processor also adds
(or subtracts) value X from the TSC.

This patch provides tsc adjust support for hvm guest, with it guest OS
would be happy when sync tsc.

Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

Index: xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c
@@ -244,6 +244,7 @@ int hvm_set_guest_pat(struct vcpu *v, u6
void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
{
uint64_t tsc;
+ uint64_t delta_tsc;

if ( v->domain->arch.vtsc )
{
@@ -255,10 +256,22 @@ void hvm_set_guest_tsc(struct vcpu *v, u
rdtscll(tsc);
}

- v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - tsc;
+ delta_tsc = guest_tsc - tsc;
+ v->arch.hvm_vcpu.msr_tsc_adjust += delta_tsc
+ - v->arch.hvm_vcpu.cache_tsc_offset;
+ v->arch.hvm_vcpu.cache_tsc_offset = delta_tsc;
+
hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
}

+void hvm_set_guest_tsc_adjust(struct vcpu *v, u64 tsc_adjust)
+{
+ v->arch.hvm_vcpu.cache_tsc_offset += tsc_adjust
+ - v->arch.hvm_vcpu.msr_tsc_adjust;
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ v->arch.hvm_vcpu.msr_tsc_adjust = tsc_adjust;
+}
+
u64 hvm_get_guest_tsc(struct vcpu *v)
{
uint64_t tsc;
@@ -277,6 +290,11 @@ u64 hvm_get_guest_tsc(struct vcpu *v)
return tsc + v->arch.hvm_vcpu.cache_tsc_offset;
}

+u64 hvm_get_guest_tsc_adjust(struct vcpu *v)
+{
+ return v->arch.hvm_vcpu.msr_tsc_adjust;
+}
+
void hvm_migrate_timers(struct vcpu *v)
{
rtc_migrate_timers(v);
@@ -2798,6 +2816,10 @@ int hvm_msr_read_intercept(unsigned int
*msr_content = hvm_get_guest_tsc(v);
break;

+ case MSR_IA32_TSC_ADJUST:
+ *msr_content = hvm_get_guest_tsc_adjust(v);
+ break;
+
case MSR_TSC_AUX:
*msr_content = hvm_msr_tsc_aux(v);
break;
@@ -2911,6 +2933,10 @@ int hvm_msr_write_intercept(unsigned int
hvm_set_guest_tsc(v, msr_content);
break;

+ case MSR_IA32_TSC_ADJUST:
+ hvm_set_guest_tsc_adjust(v, msr_content);
+ break;
+
case MSR_TSC_AUX:
v->arch.hvm_vcpu.msr_tsc_aux = (uint32_t)msr_content;
if ( cpu_has_rdtscp
@@ -3482,6 +3508,8 @@ void hvm_vcpu_reset_state(struct vcpu *v
v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);

+ v->arch.hvm_vcpu.msr_tsc_adjust = 0;
+
paging_update_paging_modes(v);

v->arch.flags |= TF_kernel_mode;
Index: xen-4.2.0-testing/xen/include/asm-x86/hvm/vcpu.h
===================================================================
--- xen-4.2.0-testing.orig/xen/include/asm-x86/hvm/vcpu.h
+++ xen-4.2.0-testing/xen/include/asm-x86/hvm/vcpu.h
@@ -137,6 +137,7 @@ struct hvm_vcpu {
struct hvm_vcpu_asid n1asid;

u32 msr_tsc_aux;
+ u64 msr_tsc_adjust;

/* VPMU */
struct vpmu_struct vpmu;
Index: xen-4.2.0-testing/xen/include/asm-x86/msr-index.h
===================================================================
--- xen-4.2.0-testing.orig/xen/include/asm-x86/msr-index.h
+++ xen-4.2.0-testing/xen/include/asm-x86/msr-index.h
@@ -284,6 +284,7 @@
#define MSR_IA32_PLATFORM_ID 0x00000017
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_IA32_EBC_FREQUENCY_ID 0x0000002c
+#define MSR_IA32_TSC_ADJUST 0x0000003b

#define MSR_IA32_APICBASE 0x0000001b
#define MSR_IA32_APICBASE_BSP (1<<8)
++++++ 25958-x86-TSC-adjust-sr.patch ++++++
References: FATE#313633

# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1348654418 -7200
# Node ID 56fb977ce6eb4626a02d4a7a34e85009bb8ee3e0
# Parent c47ef9592fb39325e33f8406b4bd736cc84482e5
x86: Save/restore TSC adjust during HVM guest migration

Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/tools/misc/xen-hvmctx.c
+++ b/tools/misc/xen-hvmctx.c
@@ -390,6 +390,13 @@ static void dump_vmce_vcpu(void)
printf(" VMCE_VCPU: caps %" PRIx64 "\n", p.caps);
}

+static void dump_tsc_adjust(void)
+{
+ HVM_SAVE_TYPE(TSC_ADJUST) p;
+ READ(p);
+ printf(" TSC_ADJUST: tsc_adjust %" PRIx64 "\n", p.tsc_adjust);
+}
+
int main(int argc, char **argv)
{
int entry, domid;
@@ -457,6 +464,7 @@ int main(int argc, char **argv)
case HVM_SAVE_CODE(VIRIDIAN_DOMAIN): dump_viridian_domain(); break;
case HVM_SAVE_CODE(VIRIDIAN_VCPU): dump_viridian_vcpu(); break;
case HVM_SAVE_CODE(VMCE_VCPU): dump_vmce_vcpu(); break;
+ case HVM_SAVE_CODE(TSC_ADJUST): dump_tsc_adjust(); break;
case HVM_SAVE_CODE(END): break;
default:
printf(" ** Don't understand type %u: skipping\n",
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -610,6 +610,46 @@ void hvm_domain_destroy(struct domain *d
hvm_destroy_cacheattr_region_list(d);
}

+static int hvm_save_tsc_adjust(struct domain *d, hvm_domain_context_t *h)
+{
+ struct vcpu *v;
+ struct hvm_tsc_adjust ctxt;
+ int err = 0;
+
+ for_each_vcpu ( d, v )
+ {
+ ctxt.tsc_adjust = v->arch.hvm_vcpu.msr_tsc_adjust;
+ err = hvm_save_entry(TSC_ADJUST, v->vcpu_id, h, &ctxt);
+ if ( err )
+ break;
+ }
+
+ return err;
+}
+
+static int hvm_load_tsc_adjust(struct domain *d, hvm_domain_context_t *h)
+{
+ unsigned int vcpuid = hvm_load_instance(h);
+ struct vcpu *v;
+ struct hvm_tsc_adjust ctxt;
+
+ if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
+ {
+ dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
+ d->domain_id, vcpuid);
+ return -EINVAL;
+ }
+
+ if ( hvm_load_entry(TSC_ADJUST, h, &ctxt) != 0 )
+ return -EINVAL;
+
+ v->arch.hvm_vcpu.msr_tsc_adjust = ctxt.tsc_adjust;
+ return 0;
+}
+
+HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust,
+ hvm_load_tsc_adjust, 1, HVMSR_PER_VCPU);
+
static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
{
struct vcpu *v;
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -581,9 +581,15 @@ struct hvm_vmce_vcpu {

DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu);

+struct hvm_tsc_adjust {
+ uint64_t tsc_adjust;
+};
+
+DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust);
+
/*
* Largest type-code in use
*/
-#define HVM_SAVE_CODE_MAX 18
+#define HVM_SAVE_CODE_MAX 19

#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
++++++ 25959-x86-TSC-adjust-expose.patch ++++++
References: FATE#313633

# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1348654470 -7200
# Node ID 3aa66543a51ba77cb73e8c874e2416d065426a22
# Parent 56fb977ce6eb4626a02d4a7a34e85009bb8ee3e0
x86: Expose TSC adjust to HVM guest

Intel latest SDM (17.13.3) release a new MSR CPUID.7.0.EBX[1]=1
indicates TSC_ADJUST MSR 0x3b is supported.

This patch expose it to hvm guest.

Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -128,6 +128,7 @@

/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
#define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_TSC_ADJUST 1 /* Tsc thread offset */
#define X86_FEATURE_BMI1 3 /* 1st group bit manipulation extensions */
#define X86_FEATURE_HLE 4 /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 5 /* AVX2 instructions */
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -362,7 +362,8 @@ static void xc_cpuid_hvm_policy(

case 0x00000007: /* Intel-defined CPU features */
if ( input[1] == 0 ) {
- regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+ regs[1] &= (bitmaskof(X86_FEATURE_TSC_ADJUST) |
+ bitmaskof(X86_FEATURE_BMI1) |
bitmaskof(X86_FEATURE_HLE) |
bitmaskof(X86_FEATURE_AVX2) |
bitmaskof(X86_FEATURE_SMEP) |
++++++ 25975-x86-IvyBridge.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1349172840 -7200
# Node ID 87bf99fad7a9f018530d13213f57610621838085
# Parent 5fbdbf585f5f2ee9a3e3c75a8a9f9f2cc6eda65c
x86/Intel: add further support for Ivy Bridge CPU models

And some initial Haswell ones at once.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Nakajima, Jun" <jun.nakajima@xxxxxxxxx>

--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -105,11 +105,15 @@ static void do_get_hw_residencies(void *

switch ( c->x86_model )
{
- /* Ivy bridge */
- case 0x3A:
/* Sandy bridge */
case 0x2A:
case 0x2D:
+ /* Ivy bridge */
+ case 0x3A:
+ case 0x3E:
+ /* Haswell */
+ case 0x3C:
+ case 0x45:
GET_PC2_RES(hw_res->pc2);
GET_CC7_RES(hw_res->cc7);
/* fall through */
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1820,7 +1820,9 @@ static const struct lbr_info *last_branc
/* Sandy Bridge */
case 42: case 45:
/* Ivy Bridge */
- case 58:
+ case 58: case 62:
+ /* Haswell */
+ case 60: case 69:
return nh_lbr;
break;
/* Atom */
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -747,6 +747,7 @@ int vmx_vpmu_initialise(struct vcpu *v,
case 46:
case 47:
case 58:
+ case 62:
ret = core2_vpmu_initialise(v, vpmu_flags);
if ( !ret )
vpmu->arch_vpmu_ops = &core2_vpmu_ops;
++++++ 26062-ACPI-ERST-move-data.patch ++++++
# HG changeset patch
# User Huang Ying <ying.huang@xxxxxxxxx>
# Date 1350475926 -7200
# Node ID ec8a091efcce717584b00ce76e3cec40a6247ebc
# Parent 4b4c0c7a6031820ab521fdd6764cb0df157f44bf
ACPI/APEI: fix ERST MOVE_DATA instruction implementation

The src_base and dst_base fields in apei_exec_context are physical
address, so they should be ioremaped before being used in ERST
MOVE_DATA instruction.

Reported-by: Javier Martinez Canillas <martinez.javier@xxxxxxxxx>
Reported-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Huang Ying <ying.huang@xxxxxxxxx>

Replace use of ioremap() by __acpi_map_table()/set_fixmap(). Fix error
handling.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/drivers/acpi/apei/erst.c
+++ b/xen/drivers/acpi/apei/erst.c
@@ -247,15 +247,64 @@ static int erst_exec_move_data(struct ap
{
int rc;
u64 offset;
+#ifdef CONFIG_X86
+ enum fixed_addresses idx;
+#endif
+ void *src, *dst;
+
+ /* ioremap does not work in interrupt context */
+ if (in_irq()) {
+ printk(KERN_WARNING
+ "MOVE_DATA cannot be used in interrupt context\n");
+ return -EBUSY;
+ }

rc = __apei_exec_read_register(entry, &offset);
if (rc)
return rc;
- memmove((void *)(unsigned long)(ctx->dst_base + offset),
- (void *)(unsigned long)(ctx->src_base + offset),
- ctx->var2);

- return 0;
+#ifdef CONFIG_X86
+ switch (ctx->var2) {
+ case 0:
+ return 0;
+ case 1 ... PAGE_SIZE:
+ break;
+ default:
+ printk(KERN_WARNING
+ "MOVE_DATA cannot be used for %#"PRIx64" bytes of
data\n",
+ ctx->var2);
+ return -EOPNOTSUPP;
+ }
+
+ src = __acpi_map_table(ctx->src_base + offset, ctx->var2);
+#else
+ src = ioremap(ctx->src_base + offset, ctx->var2);
+#endif
+ if (!src)
+ return -ENOMEM;
+
+#ifdef CONFIG_X86
+ BUILD_BUG_ON(FIX_ACPI_PAGES < 4);
+ idx = virt_to_fix((unsigned long)src + 2 * PAGE_SIZE);
+ offset += ctx->dst_base;
+ dst = (void *)fix_to_virt(idx) + (offset & ~PAGE_MASK);
+ set_fixmap(idx, offset);
+ if (PFN_DOWN(offset) != PFN_DOWN(offset + ctx->var2 - 1)) {
+ idx = virt_to_fix((unsigned long)dst + PAGE_SIZE);
+ set_fixmap(idx, offset + PAGE_SIZE);
+ }
+#else
+ dst = ioremap(ctx->dst_base + offset, ctx->var2);
+#endif
+ if (dst) {
+ memmove(dst, src, ctx->var2);
+ iounmap(dst);
+ } else
+ rc = -ENOMEM;
+
+ iounmap(src);
+
+ return rc;
}

static struct apei_exec_ins_type erst_ins_type[] = {
++++++ 26077-stubdom_fix_compile_errors_in_grub.patch ++++++
changeset: 26077:33348baecf37
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 18 09:34:59 2012 +0100
files: stubdom/grub.patches/70compiler_warnings.diff
description:
stubdom: fix compile errors in grub

Building xen.rpm in SLES11 started to fail due to these compiler
warnings:

[ 1436s] ../grub-upstream/netboot/fsys_tftp.c:213: warning: operation on
'block' may be undefined
[ 1437s] ../grub-upstream/netboot/main.c:444: warning: operation on 'block' may
be undefined

[ 1234s] E: xen sequence-point ../grub-upstream/netboot/fsys_tftp.c:213
[ 1234s] E: xen sequence-point ../grub-upstream/netboot/main.c:444

The reason for this is that the assignment is done twice:
tp.u.ack.block = ((uint16_t)( (((uint16_t)((block = prevblock)) &
(uint16_t)0x00ffU) << 8) | (((uint16_t)((block = prevblock)) &
(uint16_t)0xff00U) >> 8)));

Fix this package build error by adding another patch for grub, which
moves the assignment out of the macro usage.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r 8dcab28b8081 -r 33348baecf37
stubdom/grub.patches/70compiler_warnings.diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/stubdom/grub.patches/70compiler_warnings.diff Thu Oct 18 09:34:59
2012 +0100
@@ -0,0 +1,45 @@
+[ 1436s] ../grub-upstream/netboot/fsys_tftp.c:213: warning: operation on
'block' may be undefined
+[ 1437s] ../grub-upstream/netboot/main.c:444: warning: operation on 'block'
may be undefined
+
+[ 1234s] E: xen sequence-point ../grub-upstream/netboot/fsys_tftp.c:213
+[ 1234s] E: xen sequence-point ../grub-upstream/netboot/main.c:444
+
+---
+ netboot/fsys_tftp.c | 5 ++++-
+ netboot/main.c | 5 ++++-
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+Index: grub-0.97/netboot/fsys_tftp.c
+===================================================================
+--- grub-0.97.orig/netboot/fsys_tftp.c
++++ grub-0.97/netboot/fsys_tftp.c
+@@ -209,8 +209,11 @@ buf_fill (int abort)
+ break;
+
+ if ((block || bcounter) && (block != prevblock + (unsigned short) 1))
++ {
++ block = prevblock;
+ /* Block order should be continuous */
+- tp.u.ack.block = htons (block = prevblock);
++ tp.u.ack.block = htons (block);
++ }
+
+ /* Should be continuous. */
+ tp.opcode = abort ? htons (TFTP_ERROR) : htons (TFTP_ACK);
+Index: grub-0.97/netboot/main.c
+===================================================================
+--- grub-0.97.orig/netboot/main.c
++++ grub-0.97/netboot/main.c
+@@ -440,8 +440,11 @@ tftp (const char *name, int (*fnc) (unsi
+ break;
+
+ if ((block || bcounter) && (block != prevblock + 1))
++ {
++ block = prevblock;
+ /* Block order should be continuous */
+- tp.u.ack.block = htons (block = prevblock);
++ tp.u.ack.block = htons (block);
++ }
+
+ /* Should be continuous. */
+ tp.opcode = htons (TFTP_ACK);
++++++ 26078-hotplug-Linux_remove_hotplug_support_rely_on_udev_instead.patch
++++++
changeset: 26078:019ca95dfa34
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 18 09:35:00 2012 +0100
files: Makefile README install.sh tools/hotplug/Linux/Makefile
tools/hotplug/Linux/xen-backend.agent
description:
hotplug/Linux: remove hotplug support, rely on udev instead

Hotplug has been replaced by udev since several years. Remove the
hotplug related files and install udev unconditionally.

This makes it possible to remove udev from rpm BuildRequires which
reduces the buildtime dependency chain. For openSuSE:Factory it was
done just now:
http://lists.opensuse.org/opensuse-buildservice/2012-10/msg00085.html

The patch by itself will have no practical impact unless someone
attempts to build and run a Xen dom0 on a really old base system. e.g.
circa SLES9/2007 or earlier

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r 33348baecf37 -r 019ca95dfa34 Makefile
--- a/Makefile Thu Oct 18 09:34:59 2012 +0100
+++ b/Makefile Thu Oct 18 09:35:00 2012 +0100
@@ -223,7 +223,6 @@ uninstall:
$(MAKE) -C xen uninstall
rm -rf $(D)$(CONFIG_DIR)/init.d/xendomains $(D)$(CONFIG_DIR)/init.d/xend
rm -rf $(D)$(CONFIG_DIR)/init.d/xencommons
$(D)$(CONFIG_DIR)/init.d/xen-watchdog
- rm -rf $(D)$(CONFIG_DIR)/hotplug/xen-backend.agent
rm -f $(D)$(CONFIG_DIR)/udev/rules.d/xen-backend.rules
rm -f $(D)$(CONFIG_DIR)/udev/rules.d/xend.rules
rm -f $(D)$(SYSCONFIG_DIR)/xendomains
diff -r 33348baecf37 -r 019ca95dfa34 README
--- a/README Thu Oct 18 09:34:59 2012 +0100
+++ b/README Thu Oct 18 09:35:00 2012 +0100
@@ -54,7 +54,7 @@ provided by your OS distributor:
* pkg-config
* bridge-utils package (/sbin/brctl)
* iproute package (/sbin/ip)
- * hotplug or udev
+ * udev
* GNU bison and GNU flex
* GNU gettext
* 16-bit x86 assembler, loader and compiler (dev86 rpm or bin86 & bcc debs)
@@ -120,9 +120,9 @@ 4. To rebuild an existing tree without m

make install and make dist differ in that make install does the
right things for your local machine (installing the appropriate
- version of hotplug or udev scripts, for example), but make dist
- includes all versions of those scripts, so that you can copy the dist
- directory to another machine and install from that distribution.
+ version of udev scripts, for example), but make dist includes all
+ versions of those scripts, so that you can copy the dist directory
+ to another machine and install from that distribution.

Python Runtime Libraries
========================
diff -r 33348baecf37 -r 019ca95dfa34 install.sh
--- a/install.sh Thu Oct 18 09:34:59 2012 +0100
+++ b/install.sh Thu Oct 18 09:35:00 2012 +0100
@@ -27,20 +27,6 @@ echo "Installing Xen from '$src' to '$ds
echo "Installing Xen from '$src' to '$dst'..."
(cd $src; tar -cf - * ) | tar -C "$tmp" -xf -

-[ -x "$(which udevinfo)" ] && \
- UDEV_VERSION=$(udevinfo -V | sed -e 's/^[^0-9]*
\([0-9]\{1,\}\)[^0-9]\{0,\}/\1/')
-
-[ -z "$UDEV_VERSION" -a -x /sbin/udevadm ] && \
- UDEV_VERSION=$(/sbin/udevadm info -V | awk '{print $NF}')
-
-if [ -n "$UDEV_VERSION" ] && [ $UDEV_VERSION -ge 059 ]; then
- echo " - installing for udev-based system"
- rm -rf "$tmp/etc/hotplug"
-else
- echo " - installing for hotplug-based system"
- rm -rf "$tmp/etc/udev"
-fi
-
echo " - modifying permissions"
chmod -R a+rX "$tmp"

diff -r 33348baecf37 -r 019ca95dfa34 tools/hotplug/Linux/Makefile
--- a/tools/hotplug/Linux/Makefile Thu Oct 18 09:34:59 2012 +0100
+++ b/tools/hotplug/Linux/Makefile Thu Oct 18 09:35:00 2012 +0100
@@ -27,31 +27,8 @@ XEN_SCRIPT_DATA += block-common.sh vtpm-
XEN_SCRIPT_DATA += block-common.sh vtpm-common.sh vtpm-hotplug-common.sh
XEN_SCRIPT_DATA += vtpm-migration.sh vtpm-impl

-XEN_HOTPLUG_DIR = $(CONFIG_DIR)/hotplug
-XEN_HOTPLUG_SCRIPTS = xen-backend.agent
-
-UDEVVER = 0
-ifeq ($(shell [ -x /sbin/udevadm ] && echo 1),1)
-UDEVVER = $(shell /sbin/udevadm info -V | sed -e 's/^[^0-9]*
\([0-9]\{1,\}\)[^0-9]\{0,\}/\1/' )
-endif
-ifeq ($(shell [ -x /usr/bin/udevinfo ] && echo 1),1)
-UDEVVER = $(shell /usr/bin/udevinfo -V | sed -e 's/^[^0-9]*
\([0-9]\{1,\}\)[^0-9]\{0,\}/\1/' )
-endif
-
UDEV_RULES_DIR = $(CONFIG_DIR)/udev
UDEV_RULES = xen-backend.rules xend.rules
-
-DI = $(if $(DISTDIR),$(shell readlink -f $(DISTDIR)),)
-DE = $(if $(DESTDIR),$(shell readlink -f $(DESTDIR)),)
-ifeq ($(findstring $(DI),$(DE)),$(DI))
-HOTPLUGS=install-hotplug install-udev
-else
-ifeq ($(shell [ $(UDEVVER) -ge 059 ] && echo 1),1)
-HOTPLUGS=install-udev
-else
-HOTPLUGS=install-hotplug
-endif
-endif

.PHONY: all
all:
@@ -60,7 +37,7 @@ build:
build:

.PHONY: install
-install: all install-initd install-scripts $(HOTPLUGS)
+install: all install-initd install-scripts install-udev

# See docs/misc/distro_mapping.txt for INITD_DIR location
.PHONY: install-initd
@@ -87,15 +64,6 @@ install-scripts:
$(INSTALL_DATA) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \
done

-.PHONY: install-hotplug
-install-hotplug:
- [ -d $(DESTDIR)$(XEN_HOTPLUG_DIR) ] || \
- $(INSTALL_DIR) $(DESTDIR)$(XEN_HOTPLUG_DIR)
- set -e; for i in $(XEN_HOTPLUG_SCRIPTS); \
- do \
- $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_HOTPLUG_DIR); \
- done
-
.PHONY: install-udev
install-udev:
[ -d $(DESTDIR)$(UDEV_RULES_DIR) ] || \
diff -r 33348baecf37 -r 019ca95dfa34 tools/hotplug/Linux/xen-backend.agent
--- a/tools/hotplug/Linux/xen-backend.agent Thu Oct 18 09:34:59 2012 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-#! /bin/bash
-
-PATH=/etc/xen/scripts:$PATH
-
-. /etc/xen/scripts/locking.sh
-
-claim_lock xenbus_hotplug_global
-
-case "$XENBUS_TYPE" in
- tap)
- /etc/xen/scripts/blktap "$ACTION"
- ;;
- vbd)
- /etc/xen/scripts/block "$ACTION"
- ;;
- vtpm)
- /etc/xen/scripts/vtpm "$ACTION"
- ;;
- vif)
- [ -n "$script" ] && $script "$ACTION"
- ;;
- vscsi)
- /etc/xen/scripts/vscsi "$ACTION"
- ;;
-esac
-
-case "$ACTION" in
- add)
- ;;
- remove)
- /etc/xen/scripts/xen-hotplug-cleanup
- ;;
- online)
- ;;
- offline)
- ;;
-esac
-
-release_lock xenbus_hotplug_global
++++++ 26079-hotplug-Linux_close_lockfd_after_lock_attempt.patch ++++++
changeset: 26079:b3b03536789a
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 18 09:35:01 2012 +0100
files: tools/hotplug/Linux/locking.sh
description:
hotplug/Linux: close lockfd after lock attempt

When a HVM guest is shutdown some of the 'remove' events can not claim
the lock for some reason. Instead they try to grab the lock in a busy
loop, until udev reaps the xen-hotplug-cleanup helper.
After analyzing the resulting logfile its not obvious what the cause is.
The only explanation is that bash (?) gets confused if the same lockfd
is opened again and again. Closing it in each iteration seem to fix the
issue.

This was observed with sles11sp2 (bash 3.2) and 4.2 xend.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <Ian.campbell@xxxxxxxxxx>
[ ijc -- added the comment ]
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r 019ca95dfa34 -r b3b03536789a tools/hotplug/Linux/locking.sh
--- a/tools/hotplug/Linux/locking.sh Thu Oct 18 09:35:00 2012 +0100
+++ b/tools/hotplug/Linux/locking.sh Thu Oct 18 09:35:01 2012 +0100
@@ -59,6 +59,9 @@ claim_lock()
print "y\n" if $fd_inum eq $file_inum;
' "$_lockfile" )
if [ x$rightfile = xy ]; then break; fi
+ # Some versions of bash appear to be buggy if the same
+ # $_lockfile is opened repeatedly. Close the current fd here.
+ eval "exec $_lockfd<&-"
done
}

++++++ 26081-stubdom_fix_rpmlint_warning_spurious-executable-perm.patch ++++++
changeset: 26081:02064298ebcb
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 18 09:35:03 2012 +0100
files: stubdom/Makefile
description:
stubdom: fix rpmlint warning spurious-executable-perm

[ 1758s] xen-tools.x86_64: E: spurious-executable-perm (Badness: 50)
/usr/lib/xen/boot/xenstore-stubdom.gz
[ 1758s] The file is installed with executable permissions, but was identified
as one
[ 1758s] that probably should not be executable. Verify if the executable bits
are
[ 1758s] desired, and remove if not. NOTE: example scripts should be packaged
under
[ 1758s] %docdir/examples, which will avoid this warning.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r 25b2f53d2583 -r 02064298ebcb stubdom/Makefile
--- a/stubdom/Makefile Thu Oct 18 09:35:02 2012 +0100
+++ b/stubdom/Makefile Thu Oct 18 09:35:03 2012 +0100
@@ -396,7 +396,7 @@ install-grub: pv-grub

install-xenstore: xenstore-stubdom
$(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot"
- $(INSTALL_PROG) mini-os-$(XEN_TARGET_ARCH)-xenstore/mini-os.gz
"$(DESTDIR)/usr/lib/xen/boot/xenstore-stubdom.gz"
+ $(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-xenstore/mini-os.gz
"$(DESTDIR)/usr/lib/xen/boot/xenstore-stubdom.gz"

#######
# clean
++++++ 26082-blktap2-libvhd_fix_rpmlint_warning_spurious-executable-perm.patch
++++++
changeset: 26082:8cf26ace9ca0
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 18 09:35:03 2012 +0100
files: tools/blktap2/vhd/lib/Makefile
description:
blktap2/libvhd: fix rpmlint warning spurious-executable-perm

[ 1758s] xen-devel.x86_64: E: spurious-executable-perm (Badness: 50)
/usr/lib64/libvhd.a
[ 1758s] The file is installed with executable permissions, but was identified
as one
[ 1758s] that probably should not be executable. Verify if the executable bits
are
[ 1758s] desired, and remove if not. NOTE: example scripts should be packaged
under
[ 1758s] %docdir/examples, which will avoid this warning.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r 02064298ebcb -r 8cf26ace9ca0 tools/blktap2/vhd/lib/Makefile
--- a/tools/blktap2/vhd/lib/Makefile Thu Oct 18 09:35:03 2012 +0100
+++ b/tools/blktap2/vhd/lib/Makefile Thu Oct 18 09:35:03 2012 +0100
@@ -68,7 +68,7 @@ libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR

install: all
$(INSTALL_DIR) -p $(DESTDIR)$(INST-DIR)
- $(INSTALL_PROG) libvhd.a $(DESTDIR)$(INST-DIR)
+ $(INSTALL_DATA) libvhd.a $(DESTDIR)$(INST-DIR)
$(INSTALL_PROG) libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR)
$(DESTDIR)$(INST-DIR)
ln -sf libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR)
$(DESTDIR)$(INST-DIR)/libvhd.so.$(LIBVHD-MAJOR)
ln -sf libvhd.so.$(LIBVHD-MAJOR) $(DESTDIR)$(INST-DIR)/libvhd.so
++++++ 26083-blktap_fix_rpmlint_warning_spurious-executable-perm.patch ++++++
changeset: 26083:3fbeb019d522
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 18 09:35:04 2012 +0100
files: tools/blktap/lib/Makefile
description:
blktap: fix rpmlint warning spurious-executable-perm

[ 1758s] xen-devel.x86_64: E: spurious-executable-perm (Badness: 50)
/usr/lib64/libblktap.a
[ 1758s] The file is installed with executable permissions, but was identified
as one
[ 1758s] that probably should not be executable. Verify if the executable bits
are
[ 1758s] desired, and remove if not. NOTE: example scripts should be packaged
under
[ 1758s] %docdir/examples, which will avoid this warning.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r 8cf26ace9ca0 -r 3fbeb019d522 tools/blktap/lib/Makefile
--- a/tools/blktap/lib/Makefile Thu Oct 18 09:35:03 2012 +0100
+++ b/tools/blktap/lib/Makefile Thu Oct 18 09:35:04 2012 +0100
@@ -23,23 +23,25 @@ OBJS_PIC = $(SRCS:.c=.opic)
OBJS_PIC = $(SRCS:.c=.opic)
IBINS :=

-LIB = libblktap.a libblktap.so.$(MAJOR).$(MINOR)
+LIB = libblktap.a
+LIB_SO = libblktap.so.$(MAJOR).$(MINOR)

.PHONY: all
-all: $(LIB)
+all: $(LIB) $(LIB_SO)

.PHONY: install
install: all
$(INSTALL_DIR) $(DESTDIR)$(LIBDIR)
$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)
- $(INSTALL_PROG) $(LIB) $(DESTDIR)$(LIBDIR)
+ $(INSTALL_PROG) $(LIB_SO) $(DESTDIR)$(LIBDIR)
+ $(INSTALL_DATA) $(LIB) $(DESTDIR)$(LIBDIR)
ln -sf libblktap.so.$(MAJOR).$(MINOR)
$(DESTDIR)$(LIBDIR)/libblktap.so.$(MAJOR)
ln -sf libblktap.so.$(MAJOR) $(DESTDIR)$(LIBDIR)/libblktap.so
$(INSTALL_DATA) blktaplib.h $(DESTDIR)$(INCLUDEDIR)

.PHONY: clean
clean:
- rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen TAGS
+ rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) $(LIB_SO) *~ $(DEPS) xen TAGS

libblktap.so.$(MAJOR).$(MINOR): $(OBJS_PIC)
$(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_LDFLAGS) \
++++++ 26084-hotplug_install_hotplugpath.sh_as_data_file.patch ++++++
changeset: 26084:fe9a0eb9aaaa
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 18 09:35:05 2012 +0100
files: tools/hotplug/common/Makefile
description:
hotplug: install hotplugpath.sh as data file

rpmlint complains a script helper which is only sourced:

[ 1875s] xen-tools.i586: W: script-without-shebang
/etc/xen/scripts/hotplugpath.sh
[ 1875s] This text file has executable bits set or is located in a path
dedicated for
[ 1875s] executables, but lacks a shebang and cannot thus be executed. If the
file is
[ 1875s] meant to be an executable script, add the shebang, otherwise remove the
[ 1875s] executable bits or move the file elsewhere.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r 3fbeb019d522 -r fe9a0eb9aaaa tools/hotplug/common/Makefile
--- a/tools/hotplug/common/Makefile Thu Oct 18 09:35:04 2012 +0100
+++ b/tools/hotplug/common/Makefile Thu Oct 18 09:35:05 2012 +0100
@@ -6,8 +6,8 @@ HOTPLUGPATH="hotplugpath.sh"
# OS-independent hotplug scripts go in this directory

# Xen scripts to go there.
-XEN_SCRIPTS = $(HOTPLUGPATH)
-XEN_SCRIPT_DATA =
+XEN_SCRIPTS =
+XEN_SCRIPT_DATA = $(HOTPLUGPATH)

genpath-target = $(call buildmakevars2file,$(HOTPLUGPATH))
$(eval $(genpath-target))
++++++ 26085-stubdom_install_stubdompath.sh_as_data_file.patch ++++++
changeset: 26085:e32f4301f384
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 18 09:35:06 2012 +0100
files: stubdom/Makefile
description:
stubdom: install stubdompath.sh as data file

rpmlint complains a script helper which is only sourced:

[ 1875s] xen-tools.i586: W: script-without-shebang
/usr/lib/xen/bin/stubdompath.sh
[ 1875s] This text file has executable bits set or is located in a path
dedicated for
[ 1875s] executables, but lacks a shebang and cannot thus be executed. If the
file is
[ 1875s] meant to be an executable script, add the shebang, otherwise remove the
[ 1875s] executable bits or move the file elsewhere.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r fe9a0eb9aaaa -r e32f4301f384 stubdom/Makefile
--- a/stubdom/Makefile Thu Oct 18 09:35:05 2012 +0100
+++ b/stubdom/Makefile Thu Oct 18 09:35:06 2012 +0100
@@ -386,7 +386,8 @@ install-readme:

install-ioemu: ioemu-stubdom
$(INSTALL_DIR) "$(DESTDIR)$(LIBEXEC)"
- $(INSTALL_PROG) stubdompath.sh stubdom-dm "$(DESTDIR)$(LIBEXEC)"
+ $(INSTALL_PROG) stubdom-dm "$(DESTDIR)$(LIBEXEC)"
+ $(INSTALL_DATA) stubdompath.sh "$(DESTDIR)$(LIBEXEC)"
$(INSTALL_DIR) "$(DESTDIR)$(XENFIRMWAREDIR)"
$(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-ioemu/mini-os.gz
"$(DESTDIR)$(XENFIRMWAREDIR)/ioemu-stubdom.gz"

++++++ 26086-hotplug-Linux_correct_sysconfig_tag_in_xendomains.patch ++++++
changeset: 26086:ba6b1db89ec8
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 18 09:35:07 2012 +0100
files: tools/hotplug/Linux/init.d/sysconfig.xendomains
description:
hotplug/Linux: correct sysconfig tag in xendomains

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r e32f4301f384 -r ba6b1db89ec8
tools/hotplug/Linux/init.d/sysconfig.xendomains
--- a/tools/hotplug/Linux/init.d/sysconfig.xendomains Thu Oct 18 09:35:06
2012 +0100
+++ b/tools/hotplug/Linux/init.d/sysconfig.xendomains Thu Oct 18 09:35:07
2012 +0100
@@ -1,4 +1,4 @@
-## Path: System/xen
+## Path: System/Virtualization
## Description: xen domain start/stop on boot
## Type: string
## Default:
++++++ 26087-hotplug-Linux_install_sysconfig_files_as_data_files.patch ++++++
changeset: 26087:6239ace16749
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 18 09:35:07 2012 +0100
files: tools/hotplug/Linux/Makefile
description:
hotplug/Linux: install sysconfig files as data files

rpmlint complains about wrong permissions of config files:

[ 455s] xen-tools.i586: W: script-without-shebang
/var/adm/fillup-templates/sysconfig.xendomains
[ 455s] xen-tools.i586: W: script-without-shebang
/var/adm/fillup-templates/sysconfig.xencommons
[ 455s] This text file has executable bits set or is located in a path
dedicated for
[ 455s] executables, but lacks a shebang and cannot thus be executed. If the
file is
[ 455s] meant to be an executable script, add the shebang, otherwise remove the
[ 455s] executable bits or move the file elsewhere.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r ba6b1db89ec8 -r 6239ace16749 tools/hotplug/Linux/Makefile
--- a/tools/hotplug/Linux/Makefile Thu Oct 18 09:35:07 2012 +0100
+++ b/tools/hotplug/Linux/Makefile Thu Oct 18 09:35:07 2012 +0100
@@ -46,9 +46,9 @@ install-initd:
[ -d $(DESTDIR)$(SYSCONFIG_DIR) ] || $(INSTALL_DIR)
$(DESTDIR)$(SYSCONFIG_DIR)
$(INSTALL_PROG) $(XEND_INITD) $(DESTDIR)$(INITD_DIR)
$(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)$(INITD_DIR)
- $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG)
$(DESTDIR)$(SYSCONFIG_DIR)/xendomains
+ $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG)
$(DESTDIR)$(SYSCONFIG_DIR)/xendomains
$(INSTALL_PROG) $(XENCOMMONS_INITD) $(DESTDIR)$(INITD_DIR)
- $(INSTALL_PROG) $(XENCOMMONS_SYSCONFIG)
$(DESTDIR)$(SYSCONFIG_DIR)/xencommons
+ $(INSTALL_DATA) $(XENCOMMONS_SYSCONFIG)
$(DESTDIR)$(SYSCONFIG_DIR)/xencommons
$(INSTALL_PROG) init.d/xen-watchdog $(DESTDIR)$(INITD_DIR)

.PHONY: install-scripts
++++++ 26114-pygrub-list-entries.patch ++++++
# HG changeset patch
# User Charles Arnold <carnold@xxxxxxxx>
# Date 1351249508 -3600
# Node ID 6f9e46917eb8771914041b98f714e8f485fca5ef
# Parent 03af0abd2b72dfab3f2e50dd502108de8603f741
pygrub: Add option to list grub entries

The argument to "--entry" allows 2 syntaxes, either directly the entry
number in menu.lst, or the whole string behind the "title" key word.
This poses the following issue:

From Dom0 there is no way to guess the number and, or the complete
title string because this string contains the kernel version, which
will change with a kernel update.

This patch adds [-l|--list-entries] as an argument to pygrub.

Signed-off-by: Charles Arnold <carnold@xxxxxxxx>
Acked-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>

diff -r 03af0abd2b72 -r 6f9e46917eb8 tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub Fri Oct 26 12:03:12 2012 +0100
+++ b/tools/pygrub/src/pygrub Fri Oct 26 12:05:08 2012 +0100
@@ -595,7 +595,17 @@ def run_grub(file, entry, fs, cfg_args):
sel = g.run()

g = Grub(file, fs)
- if interactive:
+
+ if list_entries:
+ for i in range(len(g.cf.images)):
+ img = g.cf.images[i]
+ print "title: %s" % img.title
+ print " root: %s" % img.root
+ print " kernel: %s" % img.kernel[1]
+ print " args: %s" % img.args
+ print " initrd: %s" % img.initrd[1]
+
+ if interactive and not list_entries:
curses.wrapper(run_main)
else:
sel = g.cf.default
@@ -702,7 +712,7 @@ if __name__ == "__main__":
sel = None

def usage():
- print >> sys.stderr, "Usage: %s [-q|--quiet] [-i|--interactive]
[-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=]
[--output-directory=] [--output-format=sxp|simple|simple0] <image>"
%(sys.argv[0],)
+ print >> sys.stderr, "Usage: %s [-q|--quiet] [-i|--interactive]
[-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=]
[--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0]
<image>" %(sys.argv[0],)

def copy_from_image(fs, file_to_read, file_type, output_directory,
not_really):
@@ -736,8 +746,8 @@ if __name__ == "__main__":
dataoff += len(data)

try:
- opts, args = getopt.gnu_getopt(sys.argv[1:], 'qinh::',
- ["quiet", "interactive", "not-really",
"help",
+ opts, args = getopt.gnu_getopt(sys.argv[1:], 'qilnh::',
+ ["quiet", "interactive", "list-entries",
"not-really", "help",
"output=", "output-format=",
"output-directory=",
"entry=", "kernel=",
"ramdisk=", "args=", "isconfig", "debug"])
@@ -753,6 +763,7 @@ if __name__ == "__main__":
output = None
entry = None
interactive = True
+ list_entries = False
isconfig = False
debug = False
not_really = False
@@ -771,6 +782,8 @@ if __name__ == "__main__":
interactive = False
elif o in ("-i", "--interactive"):
interactive = True
+ elif o in ("-l", "--list-entries"):
+ list_entries = True
elif o in ("-n", "--not-really"):
not_really = True
elif o in ("-h", "--help"):
@@ -855,6 +868,9 @@ if __name__ == "__main__":
fs = None
continue

+ if list_entries:
+ sys.exit(0)
+
# Did looping through partitions find us a kernel?
if not fs:
raise RuntimeError, "Unable to find partition containing kernel"
++++++ 26129-ACPI-BGRT-invalidate.patch ++++++
++++ 643 lines (skipped)

++++++ 26133-IOMMU-defer-BM-disable.patch ++++++
References: bnc#787169

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1352709367 -3600
# Node ID fdb69dd527cd01a46f87efb380050559dcf12d37
# Parent 286ef4ced2164f4e9bf52fd0c52248182e69a6e6
IOMMU: don't immediately disable bus mastering on faults

Instead, give the owning domain at least a small opportunity of fixing
things up, and allow for rare faults to not bring down the device at
all.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Acked-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -564,7 +564,7 @@ static hw_irq_controller iommu_msi_type

static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
{
- u16 domain_id, device_id, bdf, cword;
+ u16 domain_id, device_id, bdf;
u32 code;
u64 *addr;
int count = 0;
@@ -615,18 +615,10 @@ static void parse_event_log_entry(struct
"fault address = 0x%"PRIx64"\n",
event_str[code-1], domain_id, device_id, *addr);

- /* Tell the device to stop DMAing; we can't rely on the guest to
- * control it for us. */
for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
- {
- cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
- PCI_SLOT(bdf), PCI_FUNC(bdf),
- PCI_COMMAND);
- pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf),
- PCI_FUNC(bdf), PCI_COMMAND,
- cword & ~PCI_COMMAND_MASTER);
- }
+ pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
+ PCI_DEVFN2(bdf));
}
else
{
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -214,6 +214,7 @@ static int device_assigned(u16 seg, u8 b
static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
{
struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct pci_dev *pdev;
int rc = 0;

if ( !iommu_enabled || !hd->platform_ops )
@@ -227,6 +228,10 @@ static int assign_device(struct domain *
return -EXDEV;

spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev(seg, bus, devfn);
+ if ( pdev )
+ pdev->fault.count = 0;
+
if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
goto done;

@@ -378,6 +383,8 @@ int deassign_device(struct domain *d, u1
return ret;
}

+ pdev->fault.count = 0;
+
if ( !has_arch_pdevs(d) && need_iommu(d) )
{
d->need_iommu = 0;
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -637,6 +637,36 @@ int __init pci_device_detect(u16 seg, u8
return 1;
}

+void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
+{
+ struct pci_dev *pdev;
+ s_time_t now = NOW();
+ u16 cword;
+
+ spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev(seg, bus, devfn);
+ if ( pdev )
+ {
+ if ( now < pdev->fault.time ||
+ now - pdev->fault.time > MILLISECS(10) )
+ pdev->fault.count >>= 1;
+ pdev->fault.time = now;
+ if ( ++pdev->fault.count < PT_FAULT_THRESHOLD )
+ pdev = NULL;
+ }
+ spin_unlock(&pcidevs_lock);
+
+ if ( !pdev )
+ return;
+
+ /* Tell the device to stop DMAing; we can't rely on the guest to
+ * control it for us. */
+ cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ PCI_COMMAND);
+ pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+}
+
/*
* scan pci devices to add all existed PCI devices to alldevs_list,
* and setup pci hierarchy in array bus2bridge.
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -936,7 +936,7 @@ static void __do_iommu_page_fault(struct
while (1)
{
u8 fault_reason;
- u16 source_id, cword;
+ u16 source_id;
u32 data;
u64 guest_addr;
int type;
@@ -969,14 +969,8 @@ static void __do_iommu_page_fault(struct
iommu_page_fault_do_one(iommu, type, fault_reason,
source_id, guest_addr);

- /* Tell the device to stop DMAing; we can't rely on the guest to
- * control it for us. */
- cword = pci_conf_read16(iommu->intel->drhd->segment,
- PCI_BUS(source_id), PCI_SLOT(source_id),
- PCI_FUNC(source_id), PCI_COMMAND);
- pci_conf_write16(iommu->intel->drhd->segment, PCI_BUS(source_id),
- PCI_SLOT(source_id), PCI_FUNC(source_id),
- PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+ pci_check_disable_device(iommu->intel->drhd->segment,
+ PCI_BUS(source_id), PCI_DEVFN2(source_id));

fault_index++;
if ( fault_index > cap_num_fault_regs(iommu->cap) )
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -64,6 +64,11 @@ struct pci_dev {
const u8 devfn;
struct pci_dev_info info;
struct arch_pci_dev arch;
+ struct {
+ s_time_t time;
+ unsigned int count;
+#define PT_FAULT_THRESHOLD 10
+ } fault;
u64 vf_rlen[6];
};

@@ -106,6 +111,7 @@ void arch_pci_ro_device(int seg, int bdf
struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(
struct domain *, int seg, int bus, int devfn);
+void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);

uint8_t pci_conf_read8(
unsigned int seg, unsigned int bus, unsigned int dev, unsigned int func,
++++++ 26183-x86-HPET-masking.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1353575003 -3600
# Node ID c139ca92edca2fab8ec95deb7fd9e4246c3fe28d
# Parent af6b72a224e99a4a516fbc2eecc06ada569304e8
x86/HPET: fix FSB interrupt masking

HPET_TN_FSB is not really suitable for masking interrupts - it merely
switches between the two delivery methods. The right way of masking is
through the HPET_TN_ENABLE bit (which really is an interrupt enable,
not a counter enable or some such). This is even more so with certain
chip sets not even allowing HPET_TN_FSB to be cleared on some of the
channels.

Further, all the setup of the channel should happen before actually
enabling the interrupt, which requires splitting legacy and FSB logic.

Finally this also fixes an S3 resume problem (HPET_TN_FSB did not get
set in hpet_broadcast_resume(), and hpet_msi_unmask() doesn't get
called from the general resume code either afaict).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -236,7 +236,7 @@ static void hpet_msi_unmask(struct irq_d
struct hpet_event_channel *ch = desc->action->dev_id;

cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
- cfg |= HPET_TN_FSB;
+ cfg |= HPET_TN_ENABLE;
hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
}

@@ -246,7 +246,7 @@ static void hpet_msi_mask(struct irq_des
struct hpet_event_channel *ch = desc->action->dev_id;

cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
- cfg &= ~HPET_TN_FSB;
+ cfg &= ~HPET_TN_ENABLE;
hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
}

@@ -319,8 +319,14 @@ static void __hpet_setup_msi_irq(struct
static int __init hpet_setup_msi_irq(unsigned int irq, struct
hpet_event_channel *ch)
{
int ret;
+ u32 cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
irq_desc_t *desc = irq_to_desc(irq);

+ /* set HPET Tn as oneshot */
+ cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
+ cfg |= HPET_TN_FSB | HPET_TN_32BIT;
+ hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
+
desc->handler = &hpet_msi_type;
ret = request_irq(irq, hpet_interrupt_handler, 0, "HPET", ch);
if ( ret < 0 )
@@ -541,11 +547,14 @@ void __init hpet_broadcast_init(void)

for ( i = 0; i < n; i++ )
{
- /* set HPET Tn as oneshot */
- cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
- cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
- cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
- hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
+ if ( i == 0 && (cfg & HPET_CFG_LEGACY) )
+ {
+ /* set HPET T0 as oneshot */
+ cfg = hpet_read32(HPET_Tn_CFG(0));
+ cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
+ cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+ hpet_write32(cfg, HPET_Tn_CFG(0));
+ }

/*
* The period is a femto seconds value. We need to calculate the scaled
@@ -602,6 +611,8 @@ void hpet_broadcast_resume(void)
cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+ if ( !(hpet_events[i].flags & HPET_EVT_LEGACY) )
+ cfg |= HPET_TN_FSB;
hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));

hpet_events[i].next_event = STIME_MAX;
++++++ 26189-xenstore-chmod.patch ++++++
# HG changeset patch
# Parent 8b93ac0c93f3fb8a140b4688ba71841ac927d4e3
xenstore-chmod: handle arbitrary number of perms rather than MAX_PERMS constant

Constant MAX_PERMS 16 is too small to use in some occasions, e.g. if
there are more than 16 domU(s) on one hypervisor (it's easy to
achieve) and one wants to do xenstore-chmod PATH to all domU(s). So,
remove MAX_PERMS limitation and make it as arbitrary number of perms.

Signed-off-by: Chunyan Liu <cyliu@xxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

diff -r 8b93ac0c93f3 tools/xenstore/xenstore_client.c
--- a/tools/xenstore/xenstore_client.c Tue Nov 13 11:19:17 2012 +0000
+++ b/tools/xenstore/xenstore_client.c Mon Nov 26 11:33:38 2012 +0800
@@ -25,7 +25,6 @@
#define PATH_SEP '/'
#define MAX_PATH_LEN 256

-#define MAX_PERMS 16

enum mode {
MODE_unknown,
@@ -407,44 +406,41 @@ perform(enum mode mode, int optind, int
output("%s\n", list[i]);
}
free(list);
- optind++;
- break;
- }
- case MODE_ls: {
- do_ls(xsh, argv[optind], 0, prefix);
- optind++;
- break;
+ optind++;
+ break;
+ }
+ case MODE_ls: {
+ do_ls(xsh, argv[optind], 0, prefix);
+ optind++;
+ break;
}
case MODE_chmod: {
- struct xs_permissions perms[MAX_PERMS];
- int nperms = 0;
/* save path pointer: */
char *path = argv[optind++];
- for (; argv[optind]; optind++, nperms++)
+ int nperms = argc - optind;
+ struct xs_permissions perms[nperms];
+ int i;
+ for (i = 0; argv[optind]; optind++, i++)
{
- if (MAX_PERMS <= nperms)
- errx(1, "Too many permissions specified. "
- "Maximum per invocation is %d.", MAX_PERMS);
-
- perms[nperms].id = atoi(argv[optind]+1);
+ perms[i].id = atoi(argv[optind]+1);

switch (argv[optind][0])
{
case 'n':
- perms[nperms].perms = XS_PERM_NONE;
+ perms[i].perms = XS_PERM_NONE;
break;
case 'r':
- perms[nperms].perms = XS_PERM_READ;
+ perms[i].perms = XS_PERM_READ;
break;
case 'w':
- perms[nperms].perms = XS_PERM_WRITE;
+ perms[i].perms = XS_PERM_WRITE;
break;
case 'b':
- perms[nperms].perms = XS_PERM_READ | XS_PERM_WRITE;
+ perms[i].perms = XS_PERM_READ | XS_PERM_WRITE;
break;
default:
errx(1, "Invalid permission specification: '%c'",
- argv[optind][0]);
+ argv[optind][0]);
}
}

++++++ 26200-IOMMU-debug-verbose.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1354118456 -3600
# Node ID 836697b197462f89a4d296da9482d1719dcc0836
# Parent 1fce7522daa6bab9fce93b95adf592193c904097
IOMMU: imply "verbose" from "debug"

I think that generally enabling debugging code without also enabling
verbose output is rather pointless; if someone really wants this, they
can always pass e.g. "iommu=debug,no-verbose".

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -91,7 +91,11 @@ static void __init parse_iommu_param(cha
else if ( !strcmp(s, "intremap") )
iommu_intremap = val;
else if ( !strcmp(s, "debug") )
+ {
iommu_debug = val;
+ if ( val )
+ iommu_verbose = 1;
+ }
else if ( !strcmp(s, "amd-iommu-perdev-intremap") )
amd_iommu_perdev_intremap = val;
else if ( !strcmp(s, "dom0-passthrough") )
++++++ 26235-IOMMU-ATS-max-queue-depth.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1354697534 -3600
# Node ID 670b07e8d7382229639af0d1df30071e6c1ebb19
# Parent bc624b00d6d601f00a53c2f7502a82dcef60f882
IOMMU/ATS: fix maximum queue depth calculation

The capabilities register field is a 5-bit value, and the 5 bits all
being zero actually means 32 entries.

Under the assumption that amd_iommu_flush_iotlb() really just tried
to correct for the miscalculation above when adding 32 to the value,
that adjustment is also being removed.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
Acked-by: Wei Huang <wei.huang2@xxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -321,7 +321,7 @@ void amd_iommu_flush_iotlb(struct pci_de

req_id = get_dma_requestor_id(iommu->seg, bdf);
queueid = req_id;
- maxpend = (ats_pdev->ats_queue_depth + 32) & 0xff;
+ maxpend = ats_pdev->ats_queue_depth & 0xff;

/* send INVALIDATE_IOTLB_PAGES command */
spin_lock_irqsave(&iommu->lock, flags);
--- a/xen/drivers/passthrough/ats.h
+++ b/xen/drivers/passthrough/ats.h
@@ -30,7 +30,7 @@ struct pci_ats_dev {

#define ATS_REG_CAP 4
#define ATS_REG_CTL 6
-#define ATS_QUEUE_DEPTH_MASK 0xF
+#define ATS_QUEUE_DEPTH_MASK 0x1f
#define ATS_ENABLE (1<<15)

extern struct list_head ats_devices;
--- a/xen/drivers/passthrough/x86/ats.c
+++ b/xen/drivers/passthrough/x86/ats.c
@@ -93,7 +93,8 @@ int enable_ats_device(int seg, int bus,
pdev->devfn = devfn;
value = pci_conf_read16(seg, bus, PCI_SLOT(devfn),
PCI_FUNC(devfn), pos + ATS_REG_CAP);
- pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK;
+ pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK ?:
+ ATS_QUEUE_DEPTH_MASK + 1;
list_add(&pdev->list, &ats_devices);
}

++++++ 26252-VMX-nested-rflags.patch ++++++
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
# Date 1354812866 0
# Node ID 312f0713dfc98635fd9ed4b42481581489faa28f
# Parent bfd8e96fa3f157630f9698401a1f040ca1776c8e
nested vmx: fix rflags status in virtual vmexit

As stated in SDM, all bits (except for those 1-reserved) in rflags
would be set to 0 in VM exit. Therefore we need to follow this logic
in virtual_vmexit.

Signed-off-by: Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -990,7 +990,8 @@ static void virtual_vmexit(struct cpu_us

regs->eip = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RIP);
regs->esp = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RSP);
- regs->eflags = __vmread(GUEST_RFLAGS);
+ /* VM exit clears all bits except bit 1 */
+ regs->eflags = 0x2;

/* updating host cr0 to sync TS bit */
__vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
++++++ 26253-VMX-nested-rdtsc.patch ++++++
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
# Date 1354812981 0
# Node ID a09150b57ace2fa786dcaefa958f0b197b1b6d4c
# Parent 312f0713dfc98635fd9ed4b42481581489faa28f
nested vmx: fix handling of RDTSC

If L0 is to handle the TSC access, then we need to update guest EIP by
calling update_guest_eip().

Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1613,7 +1613,7 @@ static int get_instruction_length(void)
return len;
}

-static void update_guest_eip(void)
+void update_guest_eip(void)
{
struct cpu_user_regs *regs = guest_cpu_user_regs();
unsigned long x;
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1558,6 +1558,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
tsc += __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET);
regs->eax = (uint32_t)tsc;
regs->edx = (uint32_t)(tsc >> 32);
+ update_guest_eip();

return 1;
}
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -396,6 +396,8 @@ void ept_p2m_init(struct p2m_domain *p2m
void ept_walk_table(struct domain *d, unsigned long gfn);
void setup_ept_dump(void);

+void update_guest_eip(void);
+
/* EPT violation qualifications definitions */
#define _EPT_READ_VIOLATION 0
#define EPT_READ_VIOLATION (1UL<<_EPT_READ_VIOLATION)
++++++ 26254-VMX-nested-dr.patch ++++++
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
# Date 1354813009 0
# Node ID e6eb1e52da7cfcb1a7697b35b4d842f35107d1ed
# Parent a09150b57ace2fa786dcaefa958f0b197b1b6d4c
nested vmx: fix DR access VM exit

For DR register, we use lazy restore mechanism when access
it. Therefore when receiving such VM exit, L0 should be responsible to
switch to the right DR values, then inject to L1 hypervisor.

Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1585,7 +1585,8 @@ int nvmx_n2_vmexit_handler(struct cpu_us
break;
case EXIT_REASON_DR_ACCESS:
ctrl = __n2_exec_control(v);
- if ( ctrl & CPU_BASED_MOV_DR_EXITING )
+ if ( (ctrl & CPU_BASED_MOV_DR_EXITING) &&
+ v->arch.hvm_vcpu.flag_dr_dirty )
nvcpu->nv_vmexit_pending = 1;
break;
case EXIT_REASON_INVLPG:
++++++ 26255-VMX-nested-ia32e-mode.patch ++++++
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
# Date 1354813046 0
# Node ID 1ed1507fa0407f1da715d04fe1b510e81ca4fb31
# Parent e6eb1e52da7cfcb1a7697b35b4d842f35107d1ed
nested vmx: enable IA32E mode while do VM entry

Some VMMs may check the platform capability to judge whether long
mode guest is supported. Therefore we need to expose this bit to
guest VMM.

Xen on Xen works fine in current solution because Xen doesn't
check this capability but directly set it in VMCS if guest
supports long mode.

Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1351,7 +1351,7 @@ int nvmx_msr_read_intercept(unsigned int
case MSR_IA32_VMX_ENTRY_CTLS:
/* bit 0-8, and 12 must be 1 (refer G5 of SDM) */
data = 0x11ff;
- data = (data << 32) | data;
+ data = ((data | VM_ENTRY_IA32E_MODE) << 32) | data;
break;

case IA32_FEATURE_CONTROL_MSR:
++++++ 26258-VMX-nested-intr-delivery.patch ++++++
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
# Date 1354813139 0
# Node ID 90831c29bfde6aac013b7e5ec98934a4953c31c9
# Parent 25dd352265ca23750f1a1a983124b36f518c4384
nested vmx: fix interrupt delivery to L2 guest

While delivering interrupt into L2 guest, L0 hypervisor need to check
whether L1 hypervisor wants to own the interrupt, if not, directly
inject the interrupt into L2 guest.

Signed-off-by: Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -163,7 +163,7 @@ enum hvm_intblk nvmx_intr_blocked(struct

static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
{
- u32 exit_ctrl;
+ u32 ctrl;

if ( nvmx_intr_blocked(v) != hvm_intblk_none )
{
@@ -176,11 +176,14 @@ static int nvmx_intr_intercept(struct vc
if ( intack.source == hvm_intsrc_pic ||
intack.source == hvm_intsrc_lapic )
{
+ ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
PIN_BASED_VM_EXEC_CONTROL);
+ if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
+ return 0;
+
vmx_inject_extint(intack.vector);

- exit_ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
- VM_EXIT_CONTROLS);
- if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
+ ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS);
+ if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
{
/* for now, duplicate the ack path in vmx_intr_assist */
hvm_vcpu_ack_pending_irq(v, intack);
++++++ 26262-x86-EFI-secure-shim.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1354884272 -3600
# Node ID b62bd62b26836fafe19cf41fec194bcf33e2ead6
# Parent cb542e58da25211843eb79998ea8568ebe9c8056
x86/EFI: add code interfacing with the secure boot shim

... to validate the kernel image (which is required to be in PE
format, as is e.g. the case for the Linux bzImage when built with
CONFIG_EFI_STUB).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/efi/boot.c
+++ b/xen/arch/x86/efi/boot.c
@@ -24,6 +24,18 @@
#include <asm/msr.h>
#include <asm/processor.h>

+#define SHIM_LOCK_PROTOCOL_GUID \
+ { 0x605dab50, 0xe046, 0x4300, {0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b,
0x23} }
+
+typedef EFI_STATUS
+(/* _not_ EFIAPI */ *EFI_SHIM_LOCK_VERIFY) (
+ IN VOID *Buffer,
+ IN UINT32 Size);
+
+typedef struct {
+ EFI_SHIM_LOCK_VERIFY Verify;
+} EFI_SHIM_LOCK_PROTOCOL;
+
extern char start[];
extern u32 cpuid_ext_features;

@@ -628,12 +640,14 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
static EFI_GUID __initdata gop_guid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
static EFI_GUID __initdata bio_guid = BLOCK_IO_PROTOCOL;
static EFI_GUID __initdata devp_guid = DEVICE_PATH_PROTOCOL;
+ static EFI_GUID __initdata shim_lock_guid = SHIM_LOCK_PROTOCOL_GUID;
EFI_LOADED_IMAGE *loaded_image;
EFI_STATUS status;
unsigned int i, argc;
CHAR16 **argv, *file_name, *cfg_file_name = NULL;
UINTN cols, rows, depth, size, map_key, info_size, gop_mode = ~0;
EFI_HANDLE *handles = NULL;
+ EFI_SHIM_LOCK_PROTOCOL *shim_lock;
EFI_GRAPHICS_OUTPUT_PROTOCOL *gop = NULL;
EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info;
EFI_FILE_HANDLE dir_handle;
@@ -823,6 +837,11 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
read_file(dir_handle, s2w(&name), &kernel);
efi_bs->FreePool(name.w);

+ if ( !EFI_ERROR(efi_bs->LocateProtocol(&shim_lock_guid, NULL,
+ (void **)&shim_lock)) &&
+ shim_lock->Verify(kernel.ptr, kernel.size) != EFI_SUCCESS )
+ blexit(L"Dom0 kernel image could not be verified\r\n");
+
name.s = get_value(&cfg, section.s, "ramdisk");
if ( name.s )
{
++++++ 26266-sched-ratelimit-check.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1355134467 -3600
# Node ID 8d209624ea83b272e1ebd713a928c38d4782f4f1
# Parent f96a0cda12160f497981a37f6922a1ed7db9a462
scheduler: fix rate limit range checking

For one, neither of the two checks permitted for the documented value
of zero (disabling the functionality altogether).

Second, the range checking of the command line parameter was done by
the credit scheduler's initialization code, despite it being a generic
scheduler option.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -846,8 +846,9 @@ csched_sys_cntl(const struct scheduler *
case XEN_SYSCTL_SCHEDOP_putinfo:
if (params->tslice_ms > XEN_SYSCTL_CSCHED_TSLICE_MAX
|| params->tslice_ms < XEN_SYSCTL_CSCHED_TSLICE_MIN
- || params->ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
- || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN
+ || (params->ratelimit_us
+ && (params->ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
+ || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN))
|| MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms) )
goto out;
prv->tslice_ms = params->tslice_ms;
@@ -1607,17 +1608,6 @@ csched_init(struct scheduler *ops)
sched_credit_tslice_ms = CSCHED_DEFAULT_TSLICE_MS;
}

- if ( sched_ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
- || sched_ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN )
- {
- printk("WARNING: sched_ratelimit_us outside of valid range [%d,%d].\n"
- " Resetting to default %u\n",
- XEN_SYSCTL_SCHED_RATELIMIT_MIN,
- XEN_SYSCTL_SCHED_RATELIMIT_MAX,
- SCHED_DEFAULT_RATELIMIT_US);
- sched_ratelimit_us = SCHED_DEFAULT_RATELIMIT_US;
- }
-
prv->tslice_ms = sched_credit_tslice_ms;
prv->ticks_per_tslice = CSCHED_TICKS_PER_TSLICE;
if ( prv->tslice_ms < prv->ticks_per_tslice )
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -1322,6 +1322,18 @@ void __init scheduler_init(void)
if ( SCHED_OP(&ops, init) )
panic("scheduler returned error on init\n");

+ if ( sched_ratelimit_us &&
+ (sched_ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
+ || sched_ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN) )
+ {
+ printk("WARNING: sched_ratelimit_us outside of valid range [%d,%d].\n"
+ " Resetting to default %u\n",
+ XEN_SYSCTL_SCHED_RATELIMIT_MIN,
+ XEN_SYSCTL_SCHED_RATELIMIT_MAX,
+ SCHED_DEFAULT_RATELIMIT_US);
+ sched_ratelimit_us = SCHED_DEFAULT_RATELIMIT_US;
+ }
+
idle_domain = domain_create(DOMID_IDLE, 0, 0);
BUG_ON(IS_ERR(idle_domain));
idle_domain->vcpu = idle_vcpu;
++++++ 26287-sched-credit-pick-idle.patch ++++++
# HG changeset patch
# User Dario Faggioli <dario.faggioli@xxxxxxxxxx>
# Date 1355854218 0
# Node ID 127c2c47d440eb7f3248ab5561909e326af7e328
# Parent d5c0389bf26c89969ebce71927f34f6b923af949
xen: sched_credit: improve picking up the idle CPU for a VCPU

In _csched_cpu_pick() we try to select the best possible CPU for
running a VCPU, considering the characteristics of the underlying
hardware (i.e., how many threads, core, sockets, and how busy they
are). What we want is "the idle execution vehicle with the most
idling neighbours in its grouping".

In order to achieve it, we select a CPU from the VCPU's affinity,
giving preference to its current processor if possible, as the basis
for the comparison with all the other CPUs. Problem is, to discount
the VCPU itself when computing this "idleness" (in an attempt to be
fair wrt its current processor), we arbitrarily and unconditionally
consider that selected CPU as idle, even when it is not the case,
for instance:
1. If the CPU is not the one where the VCPU is running (perhaps due
to the affinity being changed);
2. The CPU is where the VCPU is running, but it has other VCPUs in
its runq, so it won't go idle even if the VCPU in question goes.

This is exemplified in the trace below:

] 3.466115364 x|------|------| d10v1 22005(2:2:5) 3 [ a 1 8 ]
... ... ...
3.466122856 x|------|------| d10v1 runstate_change d10v1
running->offline
3.466123046 x|------|------| d?v? runstate_change d32767v0
runnable->running
... ... ...
] 3.466126887 x|------|------| d32767v0 28004(2:8:4) 3 [ a 1 8 ]

22005(...) line (the first line) means _csched_cpu_pick() was called
on VCPU 1 of domain 10, while it is running on CPU 0, and it choose
CPU 8, which is busy ('|'), even if there are plenty of idle
CPUs. That is because, as a consequence of changing the VCPU affinity,
CPU 8 was chosen as the basis for the comparison, and therefore
considered idle (its bit gets unconditionally set in the bitmask
representing the idle CPUs). 28004(...) line means the VCPU is woken
up and queued on CPU 8's runq, where it waits for a context switch or
a migration, in order to be able to execute.

This change fixes things by only considering the "guessed" CPU idle if
the VCPU in question is both running there and is its only runnable
VCPU.

Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -72,6 +72,9 @@
#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
+/* Is the first element of _cpu's runq its idle vcpu? */
+#define IS_RUNQ_IDLE(_cpu) (list_empty(RUNQ(_cpu)) || \
+ is_idle_vcpu(__runq_elem(RUNQ(_cpu)->next)->vcpu))


/*
@@ -487,9 +490,14 @@ _csched_cpu_pick(const struct scheduler
* distinct cores first and guarantees we don't do something stupid
* like run two VCPUs on co-hyperthreads while there are idle cores
* or sockets.
+ *
+ * Notice that, when computing the "idleness" of cpu, we may want to
+ * discount vc. That is, iff vc is the currently running and the only
+ * runnable vcpu on cpu, we add cpu to the idlers.
*/
cpumask_and(&idlers, &cpu_online_map, CSCHED_PRIV(ops)->idlers);
- cpumask_set_cpu(cpu, &idlers);
+ if ( vc->processor == cpu && IS_RUNQ_IDLE(cpu) )
+ cpumask_set_cpu(cpu, &idlers);
cpumask_and(&cpus, &cpus, &idlers);
cpumask_clear_cpu(cpu, &cpus);

++++++ 26294-x86-AMD-Fam15-way-access-filter.patch ++++++
# HG changeset patch
# User Andre Przywara <osp@xxxxxxxxx>
# Date 1355913729 -3600
# Node ID 5fb0b8b838dab0b331abfa675fd2b2214ac90760
# Parent b04de677de31f26ba4b8f2f382ca4dfffcff9a79
x86, amd: Disable way access filter on Piledriver CPUs

The Way Access Filter in recent AMD CPUs may hurt the performance of
some workloads, caused by aliasing issues in the L1 cache.
This patch disables it on the affected CPUs.

The issue is similar to that one of last year:
http://lkml.indiana.edu/hypermail/linux/kernel/1107.3/00041.html
This new patch does not replace the old one, we just need another
quirk for newer CPUs.

The performance penalty without the patch depends on the
circumstances, but is a bit less than the last year's 3%.

The workloads affected would be those that access code from the same
physical page under different virtual addresses, so different
processes using the same libraries with ASLR or multiple instances of
PIE-binaries. The code needs to be accessed simultaneously from both
cores of the same compute unit.

More details can be found here:
http://developer.amd.com/Assets/SharedL1InstructionCacheonAMD15hCPU.pdf

CPUs affected are anything with the core known as Piledriver.
That includes the new parts of the AMD A-Series (aka Trinity) and the
just released new CPUs of the FX-Series (aka Vishera).
The model numbering is a bit odd here: FX CPUs have model 2,
A-Series has model 10h, with possible extensions to 1Fh. Hence the
range of model ids.

Signed-off-by: Andre Przywara <osp@xxxxxxxxx>

Add and use MSR_AMD64_IC_CFG. Update the value whenever it is found to
not have all bits set, rather than just when it's zero.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -493,6 +493,14 @@ static void __devinit init_amd(struct cp
}
}

+ /*
+ * The way access filter has a performance penalty on some workloads.
+ * Disable it on the affected CPUs.
+ */
+ if (c->x86 == 0x15 && c->x86_model >= 0x02 && c->x86_model < 0x20 &&
+ !rdmsr_safe(MSR_AMD64_IC_CFG, value) && (value & 0x1e) != 0x1e)
+ wrmsr_safe(MSR_AMD64_IC_CFG, value | 0x1e);
+
amd_get_topology(c);

/* Pointless to use MWAIT on Family10 as it does not deep sleep. */
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -206,6 +206,7 @@

/* AMD64 MSRs */
#define MSR_AMD64_NB_CFG 0xc001001f
+#define MSR_AMD64_IC_CFG 0xc0011021
#define MSR_AMD64_DC_CFG 0xc0011022
#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT 46

++++++ 26320-IOMMU-domctl-assign-seg.patch ++++++
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
# Date 1357290407 -3600
# Node ID 8fd5635f451b073ddc99e928c975e8a7743d1321
# Parent c4114a042410d3bdec3a77c30b2e85366d7fbe1d
passthrough/domctl: use correct struct in union

This appears to be a copy paste error from c/s 23861:ec7c81fbe0de.

It is safe, functionally speaking, as both the xen_domctl_assign_device
and xen_domctl_get_device_group structure start with a 'uint32_t
machine_sbdf'. We should however use the correct union structure.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -592,7 +592,7 @@ int iommu_do_domctl(
if ( ret )
break;

- seg = domctl->u.get_device_group.machine_sbdf >> 16;
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
devfn = domctl->u.assign_device.machine_sbdf & 0xff;

@@ -621,7 +621,7 @@ int iommu_do_domctl(
if ( ret )
goto assign_device_out;

- seg = domctl->u.get_device_group.machine_sbdf >> 16;
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
devfn = domctl->u.assign_device.machine_sbdf & 0xff;

@@ -649,7 +649,7 @@ int iommu_do_domctl(
if ( ret )
goto deassign_device_out;

- seg = domctl->u.get_device_group.machine_sbdf >> 16;
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
devfn = domctl->u.assign_device.machine_sbdf & 0xff;

++++++ 26324-IOMMU-assign-params.patch ++++++
References: bnc#787169

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357559364 -3600
# Node ID 62dd78a4e3fc9d190840549f13b4d613f2d19c41
# Parent 64b36dde26bc3c4fc80312cc9eeb0e511f0cf94b
IOMMU: adjust (re)assign operation parameters

... to use a (struct pci_dev *, devfn) pair.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@xxxxxxxxx>

--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -328,34 +328,31 @@ void amd_iommu_disable_domain_device(str
disable_ats_device(iommu->seg, bus, devfn);
}

-static int reassign_device( struct domain *source, struct domain *target,
- u16 seg, u8 bus, u8 devfn)
+static int reassign_device(struct domain *source, struct domain *target,
+ u8 devfn, struct pci_dev *pdev)
{
- struct pci_dev *pdev;
struct amd_iommu *iommu;
int bdf;
struct hvm_iommu *t = domain_hvm_iommu(target);

- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev_by_domain(source, seg, bus, devfn);
- if ( !pdev )
- return -ENODEV;
-
- bdf = PCI_BDF2(bus, devfn);
- iommu = find_iommu_for_device(seg, bdf);
+ bdf = PCI_BDF2(pdev->bus, pdev->devfn);
+ iommu = find_iommu_for_device(pdev->seg, bdf);
if ( !iommu )
{
AMD_IOMMU_DEBUG("Fail to find iommu."
" %04x:%02x:%x02.%x cannot be assigned to dom%d\n",
- seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
target->domain_id);
return -ENODEV;
}

amd_iommu_disable_domain_device(source, iommu, bdf);

- list_move(&pdev->domain_list, &target->arch.pdev_list);
- pdev->domain = target;
+ if ( devfn == pdev->devfn )
+ {
+ list_move(&pdev->domain_list, &target->arch.pdev_list);
+ pdev->domain = target;
+ }

/* IO page tables might be destroyed after pci-detach the last device
* In this case, we have to re-allocate root table for next pci-attach.*/
@@ -364,17 +361,18 @@ static int reassign_device( struct domai

amd_iommu_setup_domain_device(target, iommu, bdf);
AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
- seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
source->domain_id, target->domain_id);

return 0;
}

-static int amd_iommu_assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+static int amd_iommu_assign_device(struct domain *d, u8 devfn,
+ struct pci_dev *pdev)
{
- struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
- int bdf = (bus << 8) | devfn;
- int req_id = get_dma_requestor_id(seg, bdf);
+ struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
+ int bdf = PCI_BDF2(pdev->bus, devfn);
+ int req_id = get_dma_requestor_id(pdev->seg, bdf);

if ( ivrs_mappings[req_id].unity_map_enable )
{
@@ -386,7 +384,7 @@ static int amd_iommu_assign_device(struc
ivrs_mappings[req_id].read_permission);
}

- return reassign_device(dom0, d, seg, bus, devfn);
+ return reassign_device(dom0, d, devfn, pdev);
}

static void deallocate_next_page_table(struct page_info* pg, int level)
@@ -451,12 +449,6 @@ static void amd_iommu_domain_destroy(str
amd_iommu_flush_all_pages(d);
}

-static int amd_iommu_return_device(
- struct domain *s, struct domain *t, u16 seg, u8 bus, u8 devfn)
-{
- return reassign_device(s, t, seg, bus, devfn);
-}
-
static int amd_iommu_add_device(struct pci_dev *pdev)
{
struct amd_iommu *iommu;
@@ -596,7 +588,7 @@ const struct iommu_ops amd_iommu_ops = {
.teardown = amd_iommu_domain_destroy,
.map_page = amd_iommu_map_page,
.unmap_page = amd_iommu_unmap_page,
- .reassign_device = amd_iommu_return_device,
+ .reassign_device = reassign_device,
.get_device_group_id = amd_iommu_group_id,
.update_ire_from_apic = amd_iommu_ioapic_update_ire,
.update_ire_from_msi = amd_iommu_msi_msg_update_ire,
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -232,11 +232,16 @@ static int assign_device(struct domain *
return -EXDEV;

spin_lock(&pcidevs_lock);
- pdev = pci_get_pdev(seg, bus, devfn);
- if ( pdev )
- pdev->fault.count = 0;
+ pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
+ if ( !pdev )
+ {
+ rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV;
+ goto done;
+ }
+
+ pdev->fault.count = 0;

- if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
+ if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
goto done;

if ( has_arch_pdevs(d) && !need_iommu(d) )
@@ -367,18 +372,11 @@ int deassign_device(struct domain *d, u1
return -EINVAL;

ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev(seg, bus, devfn);
+ pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
if ( !pdev )
return -ENODEV;

- if ( pdev->domain != d )
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "d%d: deassign a device not owned\n", d->domain_id);
- return -EINVAL;
- }
-
- ret = hd->platform_ops->reassign_device(d, dom0, seg, bus, devfn);
+ ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
if ( ret )
{
dprintk(XENLOG_ERR VTDPREFIX,
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1689,17 +1689,10 @@ out:
static int reassign_device_ownership(
struct domain *source,
struct domain *target,
- u16 seg, u8 bus, u8 devfn)
+ u8 devfn, struct pci_dev *pdev)
{
- struct pci_dev *pdev;
int ret;

- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev_by_domain(source, seg, bus, devfn);
-
- if (!pdev)
- return -ENODEV;
-
/*
* Devices assigned to untrusted domains (here assumed to be any domU)
* can attempt to send arbitrary LAPIC/MSI messages. We are unprotected
@@ -1708,16 +1701,19 @@ static int reassign_device_ownership(
if ( (target != dom0) && !iommu_intremap )
untrusted_msi = 1;

- ret = domain_context_unmap(source, seg, bus, devfn);
+ ret = domain_context_unmap(source, pdev->seg, pdev->bus, devfn);
if ( ret )
return ret;

- ret = domain_context_mapping(target, seg, bus, devfn);
+ ret = domain_context_mapping(target, pdev->seg, pdev->bus, devfn);
if ( ret )
return ret;

- list_move(&pdev->domain_list, &target->arch.pdev_list);
- pdev->domain = target;
+ if ( devfn == pdev->devfn )
+ {
+ list_move(&pdev->domain_list, &target->arch.pdev_list);
+ pdev->domain = target;
+ }

return ret;
}
@@ -2207,36 +2203,26 @@ int __init intel_vtd_setup(void)
}

static int intel_iommu_assign_device(
- struct domain *d, u16 seg, u8 bus, u8 devfn)
+ struct domain *d, u8 devfn, struct pci_dev *pdev)
{
struct acpi_rmrr_unit *rmrr;
int ret = 0, i;
- struct pci_dev *pdev;
- u16 bdf;
+ u16 bdf, seg;
+ u8 bus;

if ( list_empty(&acpi_drhd_units) )
return -ENODEV;

- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev(seg, bus, devfn);
- if (!pdev)
- return -ENODEV;
-
- if (pdev->domain != dom0)
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "IOMMU: assign a assigned device\n");
- return -EBUSY;
- }
-
- ret = reassign_device_ownership(dom0, d, seg, bus, devfn);
+ ret = reassign_device_ownership(dom0, d, devfn, pdev);
if ( ret )
goto done;

/* FIXME: Because USB RMRR conflicts with guest bios region,
* ignore USB RMRR temporarily.
*/
- if ( is_usb_device(seg, bus, devfn) )
+ seg = pdev->seg;
+ bus = pdev->bus;
+ if ( is_usb_device(seg, bus, pdev->devfn) )
{
ret = 0;
goto done;
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -123,13 +123,13 @@ struct iommu_ops {
int (*add_device)(struct pci_dev *pdev);
int (*enable_device)(struct pci_dev *pdev);
int (*remove_device)(struct pci_dev *pdev);
- int (*assign_device)(struct domain *d, u16 seg, u8 bus, u8 devfn);
+ int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *);
void (*teardown)(struct domain *d);
int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
unsigned int flags);
int (*unmap_page)(struct domain *d, unsigned long gfn);
int (*reassign_device)(struct domain *s, struct domain *t,
- u16 seg, u8 bus, u8 devfn);
+ u8 devfn, struct pci_dev *);
int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned
int value);
void (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg
*msg);
++++++ 26325-IOMMU-add-remove-params.patch ++++++
References: bnc#787169

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357559482 -3600
# Node ID 75cc4943b1ff509c4074800a23ff51d773233b8a
# Parent 62dd78a4e3fc9d190840549f13b4d613f2d19c41
IOMMU: adjust add/remove operation parameters

... to use a (struct pci_dev *, devfn) pair.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@xxxxxxxxx>

--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -83,14 +83,14 @@ static void disable_translation(u32 *dte
}

static void amd_iommu_setup_domain_device(
- struct domain *domain, struct amd_iommu *iommu, int bdf)
+ struct domain *domain, struct amd_iommu *iommu,
+ u8 devfn, struct pci_dev *pdev)
{
void *dte;
unsigned long flags;
int req_id, valid = 1;
int dte_i = 0;
- u8 bus = PCI_BUS(bdf);
- u8 devfn = PCI_DEVFN2(bdf);
+ u8 bus = pdev->bus;

struct hvm_iommu *hd = domain_hvm_iommu(domain);

@@ -103,7 +103,7 @@ static void amd_iommu_setup_domain_devic
dte_i = 1;

/* get device-table entry */
- req_id = get_dma_requestor_id(iommu->seg, bdf);
+ req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);

spin_lock_irqsave(&iommu->lock, flags);
@@ -115,7 +115,7 @@ static void amd_iommu_setup_domain_devic
(u32 *)dte, page_to_maddr(hd->root_table), hd->domain_id,
hd->paging_mode, valid);

- if ( pci_ats_device(iommu->seg, bus, devfn) &&
+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
iommu_dte_set_iotlb((u32 *)dte, dte_i);

@@ -132,32 +132,31 @@ static void amd_iommu_setup_domain_devic

ASSERT(spin_is_locked(&pcidevs_lock));

- if ( pci_ats_device(iommu->seg, bus, devfn) &&
- !pci_ats_enabled(iommu->seg, bus, devfn) )
+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
+ !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
{
- struct pci_dev *pdev;
+ if ( devfn == pdev->devfn )
+ enable_ats_device(iommu->seg, bus, devfn);

- enable_ats_device(iommu->seg, bus, devfn);
-
- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev(iommu->seg, bus, devfn);
-
- ASSERT( pdev != NULL );
amd_iommu_flush_iotlb(pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
}
}

-static void __init amd_iommu_setup_dom0_device(struct pci_dev *pdev)
+static int __init amd_iommu_setup_dom0_device(u8 devfn, struct pci_dev *pdev)
{
int bdf = PCI_BDF2(pdev->bus, pdev->devfn);
struct amd_iommu *iommu = find_iommu_for_device(pdev->seg, bdf);

- if ( likely(iommu != NULL) )
- amd_iommu_setup_domain_device(pdev->domain, iommu, bdf);
- else
+ if ( unlikely(!iommu) )
+ {
AMD_IOMMU_DEBUG("No iommu for device %04x:%02x:%02x.%u\n",
pdev->seg, pdev->bus,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
+ return -ENODEV;
+ }
+
+ amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
+ return 0;
}

int __init amd_iov_detect(void)
@@ -291,16 +290,16 @@ static void __init amd_iommu_dom0_init(s
}

void amd_iommu_disable_domain_device(struct domain *domain,
- struct amd_iommu *iommu, int bdf)
+ struct amd_iommu *iommu,
+ u8 devfn, struct pci_dev *pdev)
{
void *dte;
unsigned long flags;
int req_id;
- u8 bus = PCI_BUS(bdf);
- u8 devfn = PCI_DEVFN2(bdf);
+ u8 bus = pdev->bus;

BUG_ON ( iommu->dev_table.buffer == NULL );
- req_id = get_dma_requestor_id(iommu->seg, bdf);
+ req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);

spin_lock_irqsave(&iommu->lock, flags);
@@ -308,7 +307,7 @@ void amd_iommu_disable_domain_device(str
{
disable_translation((u32 *)dte);

- if ( pci_ats_device(iommu->seg, bus, devfn) &&
+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
iommu_dte_set_iotlb((u32 *)dte, 0);

@@ -323,7 +322,8 @@ void amd_iommu_disable_domain_device(str

ASSERT(spin_is_locked(&pcidevs_lock));

- if ( pci_ats_device(iommu->seg, bus, devfn) &&
+ if ( devfn == pdev->devfn &&
+ pci_ats_device(iommu->seg, bus, devfn) &&
pci_ats_enabled(iommu->seg, bus, devfn) )
disable_ats_device(iommu->seg, bus, devfn);
}
@@ -346,7 +346,7 @@ static int reassign_device(struct domain
return -ENODEV;
}

- amd_iommu_disable_domain_device(source, iommu, bdf);
+ amd_iommu_disable_domain_device(source, iommu, devfn, pdev);

if ( devfn == pdev->devfn )
{
@@ -359,7 +359,7 @@ static int reassign_device(struct domain
if ( t->root_table == NULL )
allocate_domain_resources(t);

- amd_iommu_setup_domain_device(target, iommu, bdf);
+ amd_iommu_setup_domain_device(target, iommu, devfn, pdev);
AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
source->domain_id, target->domain_id);
@@ -449,7 +449,7 @@ static void amd_iommu_domain_destroy(str
amd_iommu_flush_all_pages(d);
}

-static int amd_iommu_add_device(struct pci_dev *pdev)
+static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev)
{
struct amd_iommu *iommu;
u16 bdf;
@@ -462,16 +462,16 @@ static int amd_iommu_add_device(struct p
{
AMD_IOMMU_DEBUG("Fail to find iommu."
" %04x:%02x:%02x.%u cannot be assigned to dom%d\n",
- pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn), pdev->domain->domain_id);
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->domain->domain_id);
return -ENODEV;
}

- amd_iommu_setup_domain_device(pdev->domain, iommu, bdf);
+ amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
return 0;
}

-static int amd_iommu_remove_device(struct pci_dev *pdev)
+static int amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
{
struct amd_iommu *iommu;
u16 bdf;
@@ -484,12 +484,12 @@ static int amd_iommu_remove_device(struc
{
AMD_IOMMU_DEBUG("Fail to find iommu."
" %04x:%02x:%02x.%u cannot be removed from dom%d\n",
- pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn), pdev->domain->domain_id);
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->domain->domain_id);
return -ENODEV;
}

- amd_iommu_disable_domain_device(pdev->domain, iommu, bdf);
+ amd_iommu_disable_domain_device(pdev->domain, iommu, devfn, pdev);
return 0;
}

--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -167,7 +167,7 @@ int iommu_add_device(struct pci_dev *pde
if ( !iommu_enabled || !hd->platform_ops )
return 0;

- return hd->platform_ops->add_device(pdev);
+ return hd->platform_ops->add_device(pdev->devfn, pdev);
}

int iommu_enable_device(struct pci_dev *pdev)
@@ -197,7 +197,7 @@ int iommu_remove_device(struct pci_dev *
if ( !iommu_enabled || !hd->platform_ops )
return 0;

- return hd->platform_ops->remove_device(pdev);
+ return hd->platform_ops->remove_device(pdev->devfn, pdev);
}

/*
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -715,7 +715,7 @@ int __init scan_pci_devices(void)

struct setup_dom0 {
struct domain *d;
- void (*handler)(struct pci_dev *);
+ int (*handler)(u8 devfn, struct pci_dev *);
};

static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg)
@@ -734,7 +734,7 @@ static int __init _setup_dom0_pci_device

pdev->domain = ctxt->d;
list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
- ctxt->handler(pdev);
+ ctxt->handler(devfn, pdev);
}
}

@@ -742,7 +742,7 @@ static int __init _setup_dom0_pci_device
}

void __init setup_dom0_pci_devices(
- struct domain *d, void (*handler)(struct pci_dev *))
+ struct domain *d, int (*handler)(u8 devfn, struct pci_dev *))
{
struct setup_dom0 ctxt = { .d = d, .handler = handler };

--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -52,7 +52,7 @@ int nr_iommus;

static struct tasklet vtd_fault_tasklet;

-static void setup_dom0_device(struct pci_dev *);
+static int setup_dom0_device(u8 devfn, struct pci_dev *);
static void setup_dom0_rmrr(struct domain *d);

static int domain_iommu_domid(struct domain *d,
@@ -1904,7 +1904,7 @@ static int rmrr_identity_mapping(struct
return 0;
}

-static int intel_iommu_add_device(struct pci_dev *pdev)
+static int intel_iommu_add_device(u8 devfn, struct pci_dev *pdev)
{
struct acpi_rmrr_unit *rmrr;
u16 bdf;
@@ -1915,8 +1915,7 @@ static int intel_iommu_add_device(struct
if ( !pdev->domain )
return -EINVAL;

- ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus,
- pdev->devfn);
+ ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
if ( ret )
{
dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n",
@@ -1928,7 +1927,7 @@ static int intel_iommu_add_device(struct
{
if ( rmrr->segment == pdev->seg &&
PCI_BUS(bdf) == pdev->bus &&
- PCI_DEVFN2(bdf) == pdev->devfn )
+ PCI_DEVFN2(bdf) == devfn )
{
ret = rmrr_identity_mapping(pdev->domain, rmrr);
if ( ret )
@@ -1953,7 +1952,7 @@ static int intel_iommu_enable_device(str
return ret >= 0 ? 0 : ret;
}

-static int intel_iommu_remove_device(struct pci_dev *pdev)
+static int intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
{
struct acpi_rmrr_unit *rmrr;
u16 bdf;
@@ -1971,19 +1970,22 @@ static int intel_iommu_remove_device(str
{
if ( rmrr->segment == pdev->seg &&
PCI_BUS(bdf) == pdev->bus &&
- PCI_DEVFN2(bdf) == pdev->devfn )
+ PCI_DEVFN2(bdf) == devfn )
return 0;
}
}

- return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus,
- pdev->devfn);
+ return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus, devfn);
}

-static void __init setup_dom0_device(struct pci_dev *pdev)
+static int __init setup_dom0_device(u8 devfn, struct pci_dev *pdev)
{
- domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, pdev->devfn);
- pci_vtd_quirk(pdev);
+ int err;
+
+ err = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
+ if ( !err && devfn == pdev->devfn )
+ pci_vtd_quirk(pdev);
+ return err;
}

void clear_fault_bits(struct iommu *iommu)
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -120,9 +120,9 @@ bool_t pt_irq_need_timer(uint32_t flags)
struct iommu_ops {
int (*init)(struct domain *d);
void (*dom0_init)(struct domain *d);
- int (*add_device)(struct pci_dev *pdev);
+ int (*add_device)(u8 devfn, struct pci_dev *);
int (*enable_device)(struct pci_dev *pdev);
- int (*remove_device)(struct pci_dev *pdev);
+ int (*remove_device)(u8 devfn, struct pci_dev *);
int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *);
void (*teardown)(struct domain *d);
int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -100,7 +100,8 @@ struct pci_dev *pci_lock_pdev(int seg, i
struct pci_dev *pci_lock_domain_pdev(
struct domain *, int seg, int bus, int devfn);

-void setup_dom0_pci_devices(struct domain *, void (*)(struct pci_dev *));
+void setup_dom0_pci_devices(struct domain *,
+ int (*)(u8 devfn, struct pci_dev *));
void pci_release_devices(struct domain *d);
int pci_add_segment(u16 seg);
const unsigned long *pci_get_ro_map(u16 seg);
++++++ 26326-VT-d-context-map-params.patch ++++++
References: bnc#787169

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357559549 -3600
# Node ID afb598bd0f5436bea15b7ef842e8ad5c6adefa1a
# Parent 75cc4943b1ff509c4074800a23ff51d773233b8a
VT-d: adjust context map/unmap parameters

... to use a (struct pci_dev *, devfn) pair.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@xxxxxxxxx>

--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -95,7 +95,7 @@ void free_pgtable_maddr(u64 maddr);
void *map_vtd_domain_page(u64 maddr);
void unmap_vtd_domain_page(void *va);
int domain_context_mapping_one(struct domain *domain, struct iommu *iommu,
- u8 bus, u8 devfn);
+ u8 bus, u8 devfn, const struct pci_dev *);
int domain_context_unmap_one(struct domain *domain, struct iommu *iommu,
u8 bus, u8 devfn);

--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1308,7 +1308,7 @@ static void __init intel_iommu_dom0_init
int domain_context_mapping_one(
struct domain *domain,
struct iommu *iommu,
- u8 bus, u8 devfn)
+ u8 bus, u8 devfn, const struct pci_dev *pdev)
{
struct hvm_iommu *hd = domain_hvm_iommu(domain);
struct context_entry *context, *context_entries;
@@ -1325,11 +1325,9 @@ int domain_context_mapping_one(
if ( context_present(*context) )
{
int res = 0;
- struct pci_dev *pdev = NULL;

- /* First try to get domain ownership from device structure. If that's
+ /* Try to get domain ownership from device structure. If that's
* not available, try to read it from the context itself. */
- pdev = pci_get_pdev(seg, bus, devfn);
if ( pdev )
{
if ( pdev->domain != domain )
@@ -1448,13 +1446,12 @@ int domain_context_mapping_one(
}

static int domain_context_mapping(
- struct domain *domain, u16 seg, u8 bus, u8 devfn)
+ struct domain *domain, u8 devfn, const struct pci_dev *pdev)
{
struct acpi_drhd_unit *drhd;
int ret = 0;
u32 type;
- u8 secbus;
- struct pci_dev *pdev = pci_get_pdev(seg, bus, devfn);
+ u8 seg = pdev->seg, bus = pdev->bus, secbus;

drhd = acpi_find_matched_drhd_unit(pdev);
if ( !drhd )
@@ -1475,8 +1472,9 @@ static int domain_context_mapping(
dprintk(VTDPREFIX, "d%d:PCIe: map %04x:%02x:%02x.%u\n",
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
- if ( !ret && ats_device(pdev, drhd) > 0 )
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
+ pdev);
+ if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
enable_ats_device(seg, bus, devfn);

break;
@@ -1487,14 +1485,16 @@ static int domain_context_mapping(
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));

- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
+ pdev);
if ( ret )
break;

if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 1 )
break;

- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
+ pci_get_pdev(seg, bus, devfn));

/*
* Devices behind PCIe-to-PCI/PCIx bridge may generate different
@@ -1503,7 +1503,8 @@ static int domain_context_mapping(
*/
if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE &&
(secbus != pdev->bus || pdev->devfn != 0) )
- ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
+ ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0,
+ pci_get_pdev(seg, secbus, 0));

break;

@@ -1576,18 +1577,15 @@ int domain_context_unmap_one(
}

static int domain_context_unmap(
- struct domain *domain, u16 seg, u8 bus, u8 devfn)
+ struct domain *domain, u8 devfn, const struct pci_dev *pdev)
{
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
int ret = 0;
u32 type;
- u8 tmp_bus, tmp_devfn, secbus;
- struct pci_dev *pdev = pci_get_pdev(seg, bus, devfn);
+ u8 seg = pdev->seg, bus = pdev->bus, tmp_bus, tmp_devfn, secbus;
int found = 0;

- BUG_ON(!pdev);
-
drhd = acpi_find_matched_drhd_unit(pdev);
if ( !drhd )
return -ENODEV;
@@ -1607,7 +1605,7 @@ static int domain_context_unmap(
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = domain_context_unmap_one(domain, iommu, bus, devfn);
- if ( !ret && ats_device(pdev, drhd) > 0 )
+ if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
disable_ats_device(seg, bus, devfn);

break;
@@ -1701,11 +1699,11 @@ static int reassign_device_ownership(
if ( (target != dom0) && !iommu_intremap )
untrusted_msi = 1;

- ret = domain_context_unmap(source, pdev->seg, pdev->bus, devfn);
+ ret = domain_context_unmap(source, devfn, pdev);
if ( ret )
return ret;

- ret = domain_context_mapping(target, pdev->seg, pdev->bus, devfn);
+ ret = domain_context_mapping(target, devfn, pdev);
if ( ret )
return ret;

@@ -1915,7 +1913,7 @@ static int intel_iommu_add_device(u8 dev
if ( !pdev->domain )
return -EINVAL;

- ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
+ ret = domain_context_mapping(pdev->domain, devfn, pdev);
if ( ret )
{
dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n",
@@ -1975,14 +1973,14 @@ static int intel_iommu_remove_device(u8
}
}

- return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus, devfn);
+ return domain_context_unmap(pdev->domain, devfn, pdev);
}

static int __init setup_dom0_device(u8 devfn, struct pci_dev *pdev)
{
int err;

- err = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
+ err = domain_context_mapping(pdev->domain, devfn, pdev);
if ( !err && devfn == pdev->devfn )
pci_vtd_quirk(pdev);
return err;
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -292,7 +292,7 @@ static void map_me_phantom_function(stru
/* map or unmap ME phantom function */
if ( map )
domain_context_mapping_one(domain, drhd->iommu, 0,
- PCI_DEVFN(dev, 7));
+ PCI_DEVFN(dev, 7), NULL);
else
domain_context_unmap_one(domain, drhd->iommu, 0,
PCI_DEVFN(dev, 7));
++++++ 26327-AMD-IOMMU-flush-params.patch ++++++
References: bnc#787169

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357559599 -3600
# Node ID 2a2c63f641ee3bda4ad552eb0b3ea479d37590cc
# Parent afb598bd0f5436bea15b7ef842e8ad5c6adefa1a
AMD IOMMU: adjust flush function parameters

... to use a (struct pci_dev *, devfn) pair.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@xxxxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -287,12 +287,12 @@ void invalidate_iommu_all(struct amd_iom
send_iommu_command(iommu, cmd);
}

-void amd_iommu_flush_iotlb(struct pci_dev *pdev,
+void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
uint64_t gaddr, unsigned int order)
{
unsigned long flags;
struct amd_iommu *iommu;
- unsigned int bdf, req_id, queueid, maxpend;
+ unsigned int req_id, queueid, maxpend;
struct pci_ats_dev *ats_pdev;

if ( !ats_enabled )
@@ -305,8 +305,8 @@ void amd_iommu_flush_iotlb(struct pci_de
if ( !pci_ats_enabled(ats_pdev->seg, ats_pdev->bus, ats_pdev->devfn) )
return;

- bdf = PCI_BDF2(ats_pdev->bus, ats_pdev->devfn);
- iommu = find_iommu_for_device(ats_pdev->seg, bdf);
+ iommu = find_iommu_for_device(ats_pdev->seg,
+ PCI_BDF2(ats_pdev->bus, ats_pdev->devfn));

if ( !iommu )
{
@@ -319,7 +319,7 @@ void amd_iommu_flush_iotlb(struct pci_de
if ( !iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
return;

- req_id = get_dma_requestor_id(iommu->seg, bdf);
+ req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(ats_pdev->bus, devfn));
queueid = req_id;
maxpend = ats_pdev->ats_queue_depth & 0xff;

@@ -339,7 +339,7 @@ static void amd_iommu_flush_all_iotlbs(s
return;

for_each_pdev( d, pdev )
- amd_iommu_flush_iotlb(pdev, gaddr, order);
+ amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order);
}

/* Flush iommu cache after p2m changes. */
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -138,7 +138,7 @@ static void amd_iommu_setup_domain_devic
if ( devfn == pdev->devfn )
enable_ats_device(iommu->seg, bus, devfn);

- amd_iommu_flush_iotlb(pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
+ amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
}
}

--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -78,8 +78,8 @@ void iommu_dte_set_guest_cr3(u32 *dte, u
void amd_iommu_flush_all_pages(struct domain *d);
void amd_iommu_flush_pages(struct domain *d, unsigned long gfn,
unsigned int order);
-void amd_iommu_flush_iotlb(struct pci_dev *pdev, uint64_t gaddr,
- unsigned int order);
+void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
+ uint64_t gaddr, unsigned int order);
void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf);
void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf);
void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
++++++ 26328-IOMMU-pdev-type.patch ++++++
References: bnc#787169

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357559679 -3600
# Node ID 11fa145c880ee814aaf56a7f47f47ee3e5560c7c
# Parent 2a2c63f641ee3bda4ad552eb0b3ea479d37590cc
IOMMU/PCI: consolidate pdev_type() and cache its result for a given device

Add an "unknown" device types as well as one for PCI-to-PCIe bridges
(the latter of which other IOMMU code with or without this patch
doesn't appear to handle properly).

Make sure we don't mistake a device for which we can't access its
config space as a legacy PCI device (after all we in fact don't know
how to deal with such a device, and hence shouldn't try to).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@xxxxxxxxx>

--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -144,7 +144,7 @@ static struct pci_dev *alloc_pdev(struct
spin_lock_init(&pdev->msix_table_lock);

/* update bus2bridge */
- switch ( pdev_type(pseg->nr, bus, devfn) )
+ switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
{
u8 sec_bus, sub_bus;

@@ -184,7 +184,7 @@ static struct pci_dev *alloc_pdev(struct
static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev)
{
/* update bus2bridge */
- switch ( pdev_type(pseg->nr, pdev->bus, pdev->devfn) )
+ switch ( pdev->type )
{
u8 dev, func, sec_bus, sub_bus;

@@ -202,6 +202,9 @@ static void free_pdev(struct pci_seg *ps
pseg->bus2bridge[sec_bus] = pseg->bus2bridge[pdev->bus];
spin_unlock(&pseg->bus2bridge_lock);
break;
+
+ default:
+ break;
}

list_del(&pdev->alldevs_list);
@@ -563,20 +566,30 @@ void pci_release_devices(struct domain *

#define PCI_CLASS_BRIDGE_PCI 0x0604

-int pdev_type(u16 seg, u8 bus, u8 devfn)
+enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn)
{
u16 class_device, creg;
u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn);
int pos = pci_find_cap_offset(seg, bus, d, f, PCI_CAP_ID_EXP);

class_device = pci_conf_read16(seg, bus, d, f, PCI_CLASS_DEVICE);
- if ( class_device == PCI_CLASS_BRIDGE_PCI )
+ switch ( class_device )
{
+ case PCI_CLASS_BRIDGE_PCI:
if ( !pos )
return DEV_TYPE_LEGACY_PCI_BRIDGE;
creg = pci_conf_read16(seg, bus, d, f, pos + PCI_EXP_FLAGS);
- return ((creg & PCI_EXP_FLAGS_TYPE) >> 4) == PCI_EXP_TYPE_PCI_BRIDGE ?
- DEV_TYPE_PCIe2PCI_BRIDGE : DEV_TYPE_PCIe_BRIDGE;
+ switch ( (creg & PCI_EXP_FLAGS_TYPE) >> 4 )
+ {
+ case PCI_EXP_TYPE_PCI_BRIDGE:
+ return DEV_TYPE_PCIe2PCI_BRIDGE;
+ case PCI_EXP_TYPE_PCIE_BRIDGE:
+ return DEV_TYPE_PCI2PCIe_BRIDGE;
+ }
+ return DEV_TYPE_PCIe_BRIDGE;
+
+ case 0x0000: case 0xffff:
+ return DEV_TYPE_PCI_UNKNOWN;
}

return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI;
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -426,7 +426,6 @@ void io_apic_write_remap_rte(

static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
{
- int type;
u16 seg;
u8 bus, devfn, secbus;
int ret;
@@ -437,8 +436,7 @@ static void set_msi_source_id(struct pci
seg = pdev->seg;
bus = pdev->bus;
devfn = pdev->devfn;
- type = pdev_type(seg, bus, devfn);
- switch ( type )
+ switch ( pdev->type )
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
@@ -470,7 +468,7 @@ static void set_msi_source_id(struct pci
default:
dprintk(XENLOG_WARNING VTDPREFIX,
"d%d: unknown(%u): %04x:%02x:%02x.%u\n",
- pdev->domain->domain_id, type,
+ pdev->domain->domain_id, pdev->type,
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
break;
}
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1450,7 +1450,6 @@ static int domain_context_mapping(
{
struct acpi_drhd_unit *drhd;
int ret = 0;
- u32 type;
u8 seg = pdev->seg, bus = pdev->bus, secbus;

drhd = acpi_find_matched_drhd_unit(pdev);
@@ -1459,8 +1458,7 @@ static int domain_context_mapping(

ASSERT(spin_is_locked(&pcidevs_lock));

- type = pdev_type(seg, bus, devfn);
- switch ( type )
+ switch ( pdev->type )
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
@@ -1510,7 +1508,7 @@ static int domain_context_mapping(

default:
dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n",
- domain->domain_id, type,
+ domain->domain_id, pdev->type,
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = -EINVAL;
break;
@@ -1582,7 +1580,6 @@ static int domain_context_unmap(
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
int ret = 0;
- u32 type;
u8 seg = pdev->seg, bus = pdev->bus, tmp_bus, tmp_devfn, secbus;
int found = 0;

@@ -1591,8 +1588,7 @@ static int domain_context_unmap(
return -ENODEV;
iommu = drhd->iommu;

- type = pdev_type(seg, bus, devfn);
- switch ( type )
+ switch ( pdev->type )
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
@@ -1639,7 +1635,7 @@ static int domain_context_unmap(

default:
dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n",
- domain->domain_id, type,
+ domain->domain_id, pdev->type,
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = -EINVAL;
goto out;
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -62,6 +62,17 @@ struct pci_dev {
const u16 seg;
const u8 bus;
const u8 devfn;
+
+ enum pdev_type {
+ DEV_TYPE_PCI_UNKNOWN,
+ DEV_TYPE_PCIe_ENDPOINT,
+ DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch
+ DEV_TYPE_PCIe2PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge
+ DEV_TYPE_PCI2PCIe_BRIDGE, // PCI/PCIx-to-PCIe bridge
+ DEV_TYPE_LEGACY_PCI_BRIDGE, // Legacy PCI bridge
+ DEV_TYPE_PCI,
+ } type;
+
struct pci_dev_info info;
struct arch_pci_dev arch;
struct {
@@ -83,18 +94,10 @@ struct pci_dev {

extern spinlock_t pcidevs_lock;

-enum {
- DEV_TYPE_PCIe_ENDPOINT,
- DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch
- DEV_TYPE_PCIe2PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge
- DEV_TYPE_LEGACY_PCI_BRIDGE, // Legacy PCI bridge
- DEV_TYPE_PCI,
-};
-
bool_t pci_known_segment(u16 seg);
int pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func);
int scan_pci_devices(void);
-int pdev_type(u16 seg, u8 bus, u8 devfn);
+enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn);
int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus);
struct pci_dev *pci_lock_pdev(int seg, int bus, int devfn);
struct pci_dev *pci_lock_domain_pdev(
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -371,6 +371,9 @@
#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */
#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */
#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */
+#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIE Bridge */
+#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */
+#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */
#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */
#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */
#define PCI_EXP_DEVCAP 4 /* Device capabilities */
++++++ 26329-IOMMU-phantom-dev.patch ++++++
References: bnc#787169

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357559742 -3600
# Node ID c9a01b396cb4eaedef30e9a6ed615115a9f8bfc5
# Parent 11fa145c880ee814aaf56a7f47f47ee3e5560c7c
IOMMU: add phantom function support

Apart from generating device context entries for the base function,
all phantom functions also need context entries to be generated for
them.

In order to distinguish different use cases, a variant of
pci_get_pdev() is being introduced that, even when passed a phantom
function number, would return the underlying actual device.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@xxxxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -339,7 +339,15 @@ static void amd_iommu_flush_all_iotlbs(s
return;

for_each_pdev( d, pdev )
- amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order);
+ {
+ u8 devfn = pdev->devfn;
+
+ do {
+ amd_iommu_flush_iotlb(devfn, pdev, gaddr, order);
+ devfn += pdev->phantom_stride;
+ } while ( devfn != pdev->devfn &&
+ PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+ }
}

/* Flush iommu cache after p2m changes. */
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -692,7 +692,7 @@ void parse_ppr_log_entry(struct amd_iomm
devfn = PCI_DEVFN2(device_id);

spin_lock(&pcidevs_lock);
- pdev = pci_get_pdev(iommu->seg, bus, devfn);
+ pdev = pci_get_real_pdev(iommu->seg, bus, devfn);
spin_unlock(&pcidevs_lock);

if ( pdev )
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -612,7 +612,6 @@ static int update_paging_mode(struct dom
for_each_pdev( d, pdev )
{
bdf = (pdev->bus << 8) | pdev->devfn;
- req_id = get_dma_requestor_id(pdev->seg, bdf);
iommu = find_iommu_for_device(pdev->seg, bdf);
if ( !iommu )
{
@@ -621,16 +620,21 @@ static int update_paging_mode(struct dom
}

spin_lock_irqsave(&iommu->lock, flags);
- device_entry = iommu->dev_table.buffer +
- (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
-
- /* valid = 0 only works for dom0 passthrough mode */
- amd_iommu_set_root_page_table((u32 *)device_entry,
- page_to_maddr(hd->root_table),
- hd->domain_id,
- hd->paging_mode, 1);
-
- amd_iommu_flush_device(iommu, req_id);
+ do {
+ req_id = get_dma_requestor_id(pdev->seg, bdf);
+ device_entry = iommu->dev_table.buffer +
+ (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+
+ /* valid = 0 only works for dom0 passthrough mode */
+ amd_iommu_set_root_page_table((u32 *)device_entry,
+ page_to_maddr(hd->root_table),
+ hd->domain_id,
+ hd->paging_mode, 1);
+
+ amd_iommu_flush_device(iommu, req_id);
+ bdf += pdev->phantom_stride;
+ } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
+ PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
spin_unlock_irqrestore(&iommu->lock, flags);
}

--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -157,6 +157,8 @@ void __init iommu_dom0_init(struct domai
int iommu_add_device(struct pci_dev *pdev)
{
struct hvm_iommu *hd;
+ int rc;
+ u8 devfn;

if ( !pdev->domain )
return -EINVAL;
@@ -167,7 +169,20 @@ int iommu_add_device(struct pci_dev *pde
if ( !iommu_enabled || !hd->platform_ops )
return 0;

- return hd->platform_ops->add_device(pdev->devfn, pdev);
+ rc = hd->platform_ops->add_device(pdev->devfn, pdev);
+ if ( rc || !pdev->phantom_stride )
+ return rc;
+
+ for ( devfn = pdev->devfn ; ; )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ return 0;
+ rc = hd->platform_ops->add_device(devfn, pdev);
+ if ( rc )
+ printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+ }
}

int iommu_enable_device(struct pci_dev *pdev)
@@ -190,6 +205,8 @@ int iommu_enable_device(struct pci_dev *
int iommu_remove_device(struct pci_dev *pdev)
{
struct hvm_iommu *hd;
+ u8 devfn;
+
if ( !pdev->domain )
return -EINVAL;

@@ -197,6 +214,22 @@ int iommu_remove_device(struct pci_dev *
if ( !iommu_enabled || !hd->platform_ops )
return 0;

+ for ( devfn = pdev->devfn ; pdev->phantom_stride; )
+ {
+ int rc;
+
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ rc = hd->platform_ops->remove_device(devfn, pdev);
+ if ( !rc )
+ continue;
+
+ printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+ return rc;
+ }
+
return hd->platform_ops->remove_device(pdev->devfn, pdev);
}

@@ -244,6 +277,18 @@ static int assign_device(struct domain *
if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
goto done;

+ for ( ; pdev->phantom_stride; rc = 0 )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ rc = hd->platform_ops->assign_device(d, devfn, pdev);
+ if ( rc )
+ printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed
(%d)\n",
+ d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ rc);
+ }
+
if ( has_arch_pdevs(d) && !need_iommu(d) )
{
d->need_iommu = 1;
@@ -376,6 +421,21 @@ int deassign_device(struct domain *d, u1
if ( !pdev )
return -ENODEV;

+ while ( pdev->phantom_stride )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
+ if ( !ret )
+ continue;
+
+ printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
+ d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
+ return ret;
+ }
+
+ devfn = pdev->devfn;
ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
if ( ret )
{
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -146,6 +146,8 @@ static struct pci_dev *alloc_pdev(struct
/* update bus2bridge */
switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
{
+ int pos;
+ u16 cap;
u8 sec_bus, sub_bus;

case DEV_TYPE_PCIe_BRIDGE:
@@ -169,6 +171,20 @@ static struct pci_dev *alloc_pdev(struct
break;

case DEV_TYPE_PCIe_ENDPOINT:
+ pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn),
+ PCI_FUNC(devfn), PCI_CAP_ID_EXP);
+ BUG_ON(!pos);
+ cap = pci_conf_read16(pseg->nr, bus, PCI_SLOT(devfn),
+ PCI_FUNC(devfn), pos + PCI_EXP_DEVCAP);
+ if ( cap & PCI_EXP_DEVCAP_PHANTOM )
+ {
+ pdev->phantom_stride = 8 >> MASK_EXTR(cap,
+ PCI_EXP_DEVCAP_PHANTOM);
+ if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
+ pdev->phantom_stride = 0;
+ }
+ break;
+
case DEV_TYPE_PCI:
break;

@@ -266,6 +282,27 @@ struct pci_dev *pci_get_pdev(int seg, in
return NULL;
}

+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
+{
+ struct pci_dev *pdev;
+ int stride;
+
+ if ( seg < 0 || bus < 0 || devfn < 0 )
+ return NULL;
+
+ for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4;
+ !pdev && stride; stride >>= 1 )
+ {
+ if ( !(devfn & (8 - stride)) )
+ continue;
+ pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride));
+ if ( pdev && stride != pdev->phantom_stride )
+ pdev = NULL;
+ }
+
+ return pdev;
+}
+
struct pci_dev *pci_get_pdev_by_domain(
struct domain *d, int seg, int bus, int devfn)
{
@@ -464,8 +501,19 @@ int pci_add_device(u16 seg, u8 bus, u8 d

out:
spin_unlock(&pcidevs_lock);
- printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
- seg, bus, slot, func);
+ if ( !ret )
+ {
+ printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
+ seg, bus, slot, func);
+ while ( pdev->phantom_stride )
+ {
+ func += pdev->phantom_stride;
+ if ( PCI_SLOT(func) )
+ break;
+ printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n",
+ seg, bus, slot, func);
+ }
+ }
return ret;
}

@@ -657,7 +705,7 @@ void pci_check_disable_device(u16 seg, u
u16 cword;

spin_lock(&pcidevs_lock);
- pdev = pci_get_pdev(seg, bus, devfn);
+ pdev = pci_get_real_pdev(seg, bus, devfn);
if ( pdev )
{
if ( now < pdev->fault.time ||
@@ -674,6 +722,7 @@ void pci_check_disable_device(u16 seg, u

/* Tell the device to stop DMAing; we can't rely on the guest to
* control it for us. */
+ devfn = pdev->devfn;
cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
PCI_COMMAND);
pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
@@ -731,6 +780,27 @@ struct setup_dom0 {
int (*handler)(u8 devfn, struct pci_dev *);
};

+static void setup_one_dom0_device(const struct setup_dom0 *ctxt,
+ struct pci_dev *pdev)
+{
+ u8 devfn = pdev->devfn;
+
+ do {
+ int err = ctxt->handler(devfn, pdev);
+
+ if ( err )
+ {
+ printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ ctxt->d->domain_id, err);
+ if ( devfn == pdev->devfn )
+ return;
+ }
+ devfn += pdev->phantom_stride;
+ } while ( devfn != pdev->devfn &&
+ PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+}
+
static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg)
{
struct setup_dom0 *ctxt = arg;
@@ -747,7 +817,7 @@ static int __init _setup_dom0_pci_device

pdev->domain = ctxt->d;
list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
- ctxt->handler(devfn, pdev);
+ setup_one_dom0_device(ctxt, pdev);
}
}

--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -58,6 +58,9 @@ do {

#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + __must_be_array(x))

+#define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m)))
+#define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m))
+
#define reserve_bootmem(_p,_l) ((void)0)

struct domain;
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -63,6 +63,8 @@ struct pci_dev {
const u8 bus;
const u8 devfn;

+ u8 phantom_stride;
+
enum pdev_type {
DEV_TYPE_PCI_UNKNOWN,
DEV_TYPE_PCIe_ENDPOINT,
@@ -113,6 +115,7 @@ int pci_remove_device(u16 seg, u8 bus, u
int pci_ro_device(int seg, int bus, int devfn);
void arch_pci_ro_device(int seg, int bdf);
struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(
struct domain *, int seg, int bus, int devfn);
void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
++++++ 26330-VT-d-phantom-MSI.patch ++++++
References: bnc#787169

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357559812 -3600
# Node ID b514b7118958327605e33dd387944832bc8d734a
# Parent c9a01b396cb4eaedef30e9a6ed615115a9f8bfc5
VT-d: relax source qualifier for MSI of phantom functions

With ordinary requests allowed to come from phantom functions, the
remapping tables ought to be set up to allow for MSI triggers to come
from other than the "real" device too.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@xxxxxxxxx>

--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -438,13 +438,22 @@ static void set_msi_source_id(struct pci
devfn = pdev->devfn;
switch ( pdev->type )
{
+ unsigned int sq;
+
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
case DEV_TYPE_LEGACY_PCI_BRIDGE:
break;

case DEV_TYPE_PCIe_ENDPOINT:
- set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16, PCI_BDF2(bus, devfn));
+ switch ( pdev->phantom_stride )
+ {
+ case 1: sq = SQ_13_IGNORE_3; break;
+ case 2: sq = SQ_13_IGNORE_2; break;
+ case 4: sq = SQ_13_IGNORE_1; break;
+ default: sq = SQ_ALL_16; break;
+ }
+ set_ire_sid(ire, SVT_VERIFY_SID_SQ, sq, PCI_BDF2(bus, devfn));
break;

case DEV_TYPE_PCI:
++++++ 26331-IOMMU-phantom-dev-quirk.patch ++++++
References: bnc#787169

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357559889 -3600
# Node ID 23c4bbc0111dd807561b2c62cbc5798220943a0d
# Parent b514b7118958327605e33dd387944832bc8d734a
IOMMU: add option to specify devices behaving like ones using phantom functions

At least certain Marvell SATA controllers are known to issue bus master
requests with a non-zero function as origin, despite themselves being
single function devices.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@xxxxxxxxx>

--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -672,6 +672,16 @@ Defaults to booting secondary processors

Default: `on`

+### pci-phantom
+> `=[<seg>:]<bus>:<device>,<stride>`
+
+Mark a group of PCI devices as using phantom functions without actually
+advertising so, so the IOMMU can create translation contexts for them.
+
+All numbers specified must be hexadecimal ones.
+
+This option can be specified more than once (up to 8 times at present).
+
### ple\_gap
`= <integer>`

--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -123,6 +123,49 @@ const unsigned long *pci_get_ro_map(u16
return pseg ? pseg->ro_map : NULL;
}

+static struct phantom_dev {
+ u16 seg;
+ u8 bus, slot, stride;
+} phantom_devs[8];
+static unsigned int nr_phantom_devs;
+
+static void __init parse_phantom_dev(char *str) {
+ const char *s = str;
+ struct phantom_dev phantom;
+
+ if ( !s || !*s || nr_phantom_devs >= ARRAY_SIZE(phantom_devs) )
+ return;
+
+ phantom.seg = simple_strtol(s, &s, 16);
+ if ( *s != ':' )
+ return;
+
+ phantom.bus = simple_strtol(s + 1, &s, 16);
+ if ( *s == ',' )
+ {
+ phantom.slot = phantom.bus;
+ phantom.bus = phantom.seg;
+ phantom.seg = 0;
+ }
+ else if ( *s == ':' )
+ phantom.slot = simple_strtol(s + 1, &s, 16);
+ else
+ return;
+
+ if ( *s != ',' )
+ return;
+ switch ( phantom.stride = simple_strtol(s + 1, &s, 0) )
+ {
+ case 1: case 2: case 4:
+ if ( *s )
+ default:
+ return;
+ }
+
+ phantom_devs[nr_phantom_devs++] = phantom;
+}
+custom_param("pci-phantom", parse_phantom_dev);
+
static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
{
struct pci_dev *pdev;
@@ -183,6 +226,20 @@ static struct pci_dev *alloc_pdev(struct
if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
pdev->phantom_stride = 0;
}
+ else
+ {
+ unsigned int i;
+
+ for ( i = 0; i < nr_phantom_devs; ++i )
+ if ( phantom_devs[i].seg == pseg->nr &&
+ phantom_devs[i].bus == bus &&
+ phantom_devs[i].slot == PCI_SLOT(devfn) &&
+ phantom_devs[i].stride > PCI_FUNC(devfn) )
+ {
+ pdev->phantom_stride = phantom_devs[i].stride;
+ break;
+ }
+ }
break;

case DEV_TYPE_PCI:
++++++ 26332-x86-compat-show-guest-stack-mfn.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357561709 -3600
# Node ID 8e942f2f3b45edc5bb1f7a6e05de288342426f0d
# Parent 23c4bbc0111dd807561b2c62cbc5798220943a0d
x86: compat_show_guest_stack() should not truncate MFN

Re-using "addr" here was a mistake, as it is a 32-bit quantity.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/x86_64/compat/traps.c
+++ b/xen/arch/x86/x86_64/compat/traps.c
@@ -20,11 +20,12 @@ void compat_show_guest_stack(struct vcpu
if ( v != current )
{
struct vcpu *vcpu;
+ unsigned long mfn;

ASSERT(guest_kernel_mode(v, regs));
- addr = read_cr3() >> PAGE_SHIFT;
+ mfn = read_cr3() >> PAGE_SHIFT;
for_each_vcpu( v->domain, vcpu )
- if ( pagetable_get_pfn(vcpu->arch.guest_table) == addr )
+ if ( pagetable_get_pfn(vcpu->arch.guest_table) == mfn )
break;
if ( !vcpu )
{
++++++ 26333-x86-get_page_type-assert.patch ++++++
References: CVE-2013-0154 XSA-37 bnc#797031

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357564826 -3600
# Node ID e1facbde56ff4e5e85f9a4935abc99eb24367cd0
# Parent 8e942f2f3b45edc5bb1f7a6e05de288342426f0d
x86: fix assertion in get_page_type()

c/s 22998:e9fab50d7b61 (and immediately following ones) made it
possible that __get_page_type() returns other than -EINVAL, in
particular -EBUSY. Consequently, the assertion in get_page_type()
should check for only the return values we absolutely don't expect to
see there.

This is XSA-37 / CVE-2013-0154.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2603,7 +2603,7 @@ int get_page_type(struct page_info *page
int rc = __get_page_type(page, type, 0);
if ( likely(rc == 0) )
return 1;
- ASSERT(rc == -EINVAL);
+ ASSERT(rc != -EINTR && rc != -EAGAIN);
return 0;
}

++++++ 26340-VT-d-intremap-verify-legacy-bridge.patch ++++++
References: CVE-2012-5634 XSA-33 bnc#794316

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357748006 -3600
# Node ID 19fd1237ff0dfa3d97a896d6ed6fbbd33f816a9f
# Parent 56b0d5476c11bfd09986080dfa97923586ef474f
VT-d: fix interrupt remapping source validation for devices behind legacy
bridges

Using SVT_VERIFY_BUS here doesn't make sense; native Linux also
uses SVT_VERIFY_SID_SQ here instead.

This is XSA-33 / CVE-2012-5634.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -469,7 +469,7 @@ static void set_msi_source_id(struct pci
set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
(bus << 8) | pdev->bus);
else if ( pdev_type(seg, bus, devfn) == DEV_TYPE_LEGACY_PCI_BRIDGE
)
- set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
+ set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
PCI_BDF2(bus, devfn));
}
break;
++++++ 26341-hvm-firmware-passthrough.patch ++++++
fate#313584: pass bios information to XEN HVM guest

# HG changeset patch
# User Ross Philipson <ross.philipson@xxxxxxxxxx>
# Date 1357838188 0
# Node ID 07bf59a7ce837bd795e2df2f28166cfe41990d3d
# Parent 19fd1237ff0dfa3d97a896d6ed6fbbd33f816a9f
HVM xenstore strings and firmware passthrough header

Add public HVM definitions header for xenstore strings used in
HVMLOADER. In addition this header describes the use of the firmware
passthrough values set using xenstore.

Signed-off-by: Ross Philipson <ross.philipson@xxxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

diff -r 19fd1237ff0d -r 07bf59a7ce83 xen/include/public/hvm/hvm_xs_strings.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/public/hvm/hvm_xs_strings.h Thu Jan 10 17:16:28 2013 +0000
@@ -0,0 +1,79 @@
+/******************************************************************************
+ * hvm/hvm_xs_strings.h
+ *
+ * HVM xenstore strings used in HVMLOADER.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__
+#define __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__
+
+#define HVM_XS_HVMLOADER "hvmloader"
+#define HVM_XS_BIOS "hvmloader/bios"
+#define HVM_XS_GENERATION_ID_ADDRESS "hvmloader/generation-id-address"
+
+/* The following values allow additional ACPI tables to be added to the
+ * virtual ACPI BIOS that hvmloader constructs. The values specify the guest
+ * physical address and length of a block of ACPI tables to add. The format of
+ * the block is simply concatenated raw tables (which specify their own length
+ * in the ACPI header).
+ */
+#define HVM_XS_ACPI_PT_ADDRESS "hvmloader/acpi/address"
+#define HVM_XS_ACPI_PT_LENGTH "hvmloader/acpi/length"
+
+/* Any number of SMBIOS types can be passed through to an HVM guest using
+ * the following xenstore values. The values specify the guest physical
+ * address and length of a block of SMBIOS structures for hvmloader to use.
+ * The block is formatted in the following way:
+ *
+ * <length><struct><length><struct>...
+ *
+ * Each length separator is a 32b integer indicating the length of the next
+ * SMBIOS structure. For DMTF defined types (0 - 121), the passed in struct
+ * will replace the default structure in hvmloader. In addition, any
+ * OEM/vendortypes (128 - 255) will all be added.
+ */
+#define HVM_XS_SMBIOS_PT_ADDRESS "hvmloader/smbios/address"
+#define HVM_XS_SMBIOS_PT_LENGTH "hvmloader/smbios/length"
+
+/* Set to 1 to enable SMBIOS default portable battery (type 22) values. */
+#define HVM_XS_SMBIOS_DEFAULT_BATTERY "hvmloader/smbios/default_battery"
+
+/* The following xenstore values are used to override some of the default
+ * string values in the SMBIOS table constructed in hvmloader.
+ */
+#define HVM_XS_BIOS_STRINGS "bios-strings"
+#define HVM_XS_BIOS_VENDOR "bios-strings/bios-vendor"
+#define HVM_XS_BIOS_VERSION "bios-strings/bios-version"
+#define HVM_XS_SYSTEM_MANUFACTURER "bios-strings/system-manufacturer"
+#define HVM_XS_SYSTEM_PRODUCT_NAME "bios-strings/system-product-name"
+#define HVM_XS_SYSTEM_VERSION "bios-strings/system-version"
+#define HVM_XS_SYSTEM_SERIAL_NUMBER "bios-strings/system-serial-number"
+#define HVM_XS_ENCLOSURE_MANUFACTURER "bios-strings/enclosure-manufacturer"
+#define HVM_XS_ENCLOSURE_SERIAL_NUMBER "bios-strings/enclosure-serial-number"
+#define HVM_XS_BATTERY_MANUFACTURER "bios-strings/battery-manufacturer"
+#define HVM_XS_BATTERY_DEVICE_NAME "bios-strings/battery-device-name"
+
+/* 1 to 99 OEM strings can be set in xenstore using values of the form
+ * below. These strings will be loaded into the SMBIOS type 11 structure.
+ */
+#define HVM_XS_OEM_STRINGS "bios-strings/oem-%02d"
+
+#endif /* __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ */
++++++ 26342-hvm-firmware-passthrough.patch ++++++
fate#313584: pass bios information to XEN HVM guest

# HG changeset patch
# User Ross Philipson <ross.philipson@xxxxxxxxxx>
# Date 1357838241 0
# Node ID cabf395a6c849cc65e56f1640b18db0c3e0faf5d
# Parent 07bf59a7ce837bd795e2df2f28166cfe41990d3d
HVM firmware passthrough control tools support

Xen control tools support for loading the firmware passthrough blocks
during domain construction. SMBIOS and ACPI blocks are passed in using
the new xc_hvm_build_args structure. Each block is read and loaded
into the new domain address space behind the HVMLOADER image. The base
address for the two blocks is returned as an out parameter to the
caller via the args structure.

Signed-off-by: Ross Philipson <ross.philipson@xxxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

diff -r 07bf59a7ce83 -r cabf395a6c84 tools/libxc/xc_hvm_build_arm.c
--- a/tools/libxc/xc_hvm_build_arm.c Thu Jan 10 17:16:28 2013 +0000
+++ b/tools/libxc/xc_hvm_build_arm.c Thu Jan 10 17:17:21 2013 +0000
@@ -22,7 +22,7 @@
#include <xenguest.h>

int xc_hvm_build(xc_interface *xch, uint32_t domid,
- const struct xc_hvm_build_args *hvm_args)
+ struct xc_hvm_build_args *hvm_args)
{
errno = ENOSYS;
return -1;
diff -r 07bf59a7ce83 -r cabf395a6c84 tools/libxc/xc_hvm_build_x86.c
--- a/tools/libxc/xc_hvm_build_x86.c Thu Jan 10 17:16:28 2013 +0000
+++ b/tools/libxc/xc_hvm_build_x86.c Thu Jan 10 17:17:21 2013 +0000
@@ -49,6 +49,40 @@
#define NR_SPECIAL_PAGES 8
#define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x))

+static int modules_init(struct xc_hvm_build_args *args,
+ uint64_t vend, struct elf_binary *elf,
+ uint64_t *mstart_out, uint64_t *mend_out)
+{
+#define MODULE_ALIGN 1UL << 7
+#define MB_ALIGN 1UL << 20
+#define MKALIGN(x, a) (((uint64_t)(x) + (a) - 1) & ~(uint64_t)((a) - 1))
+ uint64_t total_len = 0, offset1 = 0;
+
+ if ( (args->acpi_module.length == 0)&&(args->smbios_module.length == 0) )
+ return 0;
+
+ /* Find the total length for the firmware modules with a reasonable large
+ * alignment size to align each the modules.
+ */
+ total_len = MKALIGN(args->acpi_module.length, MODULE_ALIGN);
+ offset1 = total_len;
+ total_len += MKALIGN(args->smbios_module.length, MODULE_ALIGN);
+
+ /* Want to place the modules 1Mb+change behind the loader image. */
+ *mstart_out = MKALIGN(elf->pend, MB_ALIGN) + (MB_ALIGN);
+ *mend_out = *mstart_out + total_len;
+
+ if ( *mend_out > vend )
+ return -1;
+
+ if ( args->acpi_module.length != 0 )
+ args->acpi_module.guest_addr_out = *mstart_out;
+ if ( args->smbios_module.length != 0 )
+ args->smbios_module.guest_addr_out = *mstart_out + offset1;
+
+ return 0;
+}
+
static void build_hvm_info(void *hvm_info_page, uint64_t mem_size,
uint64_t mmio_start, uint64_t mmio_size)
{
@@ -86,9 +120,8 @@ static void build_hvm_info(void *hvm_inf
hvm_info->checksum = -sum;
}

-static int loadelfimage(
- xc_interface *xch,
- struct elf_binary *elf, uint32_t dom, unsigned long *parray)
+static int loadelfimage(xc_interface *xch, struct elf_binary *elf,
+ uint32_t dom, unsigned long *parray)
{
privcmd_mmap_entry_t *entries = NULL;
unsigned long pfn_start = elf->pstart >> PAGE_SHIFT;
@@ -126,6 +159,66 @@ static int loadelfimage(
return rc;
}

+static int loadmodules(xc_interface *xch,
+ struct xc_hvm_build_args *args,
+ uint64_t mstart, uint64_t mend,
+ uint32_t dom, unsigned long *parray)
+{
+ privcmd_mmap_entry_t *entries = NULL;
+ unsigned long pfn_start;
+ unsigned long pfn_end;
+ size_t pages;
+ uint32_t i;
+ uint8_t *dest;
+ int rc = -1;
+
+ if ( (mstart == 0)||(mend == 0) )
+ return 0;
+
+ pfn_start = (unsigned long)(mstart >> PAGE_SHIFT);
+ pfn_end = (unsigned long)((mend + PAGE_SIZE - 1) >> PAGE_SHIFT);
+ pages = pfn_end - pfn_start;
+
+ /* Map address space for module list. */
+ entries = calloc(pages, sizeof(privcmd_mmap_entry_t));
+ if ( entries == NULL )
+ goto error_out;
+
+ for ( i = 0; i < pages; i++ )
+ entries[i].mfn = parray[(mstart >> PAGE_SHIFT) + i];
+
+ dest = xc_map_foreign_ranges(
+ xch, dom, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE, 1 << PAGE_SHIFT,
+ entries, pages);
+ if ( dest == NULL )
+ goto error_out;
+
+ /* Zero the range so padding is clear between modules */
+ memset(dest, 0, pages << PAGE_SHIFT);
+
+ /* Load modules into range */
+ if ( args->acpi_module.length != 0 )
+ {
+ memcpy(dest,
+ args->acpi_module.data,
+ args->acpi_module.length);
+ }
+ if ( args->smbios_module.length != 0 )
+ {
+ memcpy(dest + (args->smbios_module.guest_addr_out - mstart),
+ args->smbios_module.data,
+ args->smbios_module.length);
+ }
+
+ munmap(dest, pages << PAGE_SHIFT);
+ rc = 0;
+
+ error_out:
+ free(entries);
+
+ return rc;
+}
+
/*
* Check whether there exists mmio hole in the specified memory range.
* Returns 1 if exists, else returns 0.
@@ -140,7 +233,7 @@ static int check_mmio_hole(uint64_t star
}

static int setup_guest(xc_interface *xch,
- uint32_t dom, const struct xc_hvm_build_args *args,
+ uint32_t dom, struct xc_hvm_build_args *args,
char *image, unsigned long image_size)
{
xen_pfn_t *page_array = NULL;
@@ -153,6 +246,7 @@ static int setup_guest(xc_interface *xch
uint32_t *ident_pt;
struct elf_binary elf;
uint64_t v_start, v_end;
+ uint64_t m_start = 0, m_end = 0;
int rc;
xen_capabilities_info_t caps;
unsigned long stat_normal_pages = 0, stat_2mb_pages = 0,
@@ -178,11 +272,19 @@ static int setup_guest(xc_interface *xch
goto error_out;
}

+ if ( modules_init(args, v_end, &elf, &m_start, &m_end) != 0 )
+ {
+ ERROR("Insufficient space to load modules.");
+ goto error_out;
+ }
+
IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
" Loader: %016"PRIx64"->%016"PRIx64"\n"
+ " Modules: %016"PRIx64"->%016"PRIx64"\n"
" TOTAL: %016"PRIx64"->%016"PRIx64"\n"
" ENTRY ADDRESS: %016"PRIx64"\n",
elf.pstart, elf.pend,
+ m_start, m_end,
v_start, v_end,
elf_uval(&elf, elf.ehdr, e_entry));

@@ -337,6 +439,9 @@ static int setup_guest(xc_interface *xch
if ( loadelfimage(xch, &elf, dom, page_array) != 0 )
goto error_out;

+ if ( loadmodules(xch, args, m_start, m_end, dom, page_array) != 0 )
+ goto error_out;
+
if ( (hvm_info_page = xc_map_foreign_range(
xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
HVM_INFO_PFN)) == NULL )
@@ -413,7 +518,7 @@ static int setup_guest(xc_interface *xch
* Create a domain for a virtualized Linux, using files/filenames.
*/
int xc_hvm_build(xc_interface *xch, uint32_t domid,
- const struct xc_hvm_build_args *hvm_args)
+ struct xc_hvm_build_args *hvm_args)
{
struct xc_hvm_build_args args = *hvm_args;
void *image;
@@ -441,6 +546,15 @@ int xc_hvm_build(xc_interface *xch, uint

sts = setup_guest(xch, domid, &args, image, image_size);

+ if (!sts)
+ {
+ /* Return module load addresses to caller */
+ hvm_args->acpi_module.guest_addr_out =
+ args.acpi_module.guest_addr_out;
+ hvm_args->smbios_module.guest_addr_out =
+ args.smbios_module.guest_addr_out;
+ }
+
free(image);

return sts;
@@ -461,6 +575,7 @@ int xc_hvm_build_target_mem(xc_interface
{
struct xc_hvm_build_args args = {};

+ memset(&args, 0, sizeof(struct xc_hvm_build_args));
args.mem_size = (uint64_t)memsize << 20;
args.mem_target = (uint64_t)target << 20;
args.image_file_name = image_name;
diff -r 07bf59a7ce83 -r cabf395a6c84 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Thu Jan 10 17:16:28 2013 +0000
+++ b/tools/libxc/xenguest.h Thu Jan 10 17:17:21 2013 +0000
@@ -211,11 +211,23 @@ int xc_linux_build_mem(xc_interface *xch
unsigned int console_evtchn,
unsigned long *console_mfn);

+struct xc_hvm_firmware_module {
+ uint8_t *data;
+ uint32_t length;
+ uint64_t guest_addr_out;
+};
+
struct xc_hvm_build_args {
uint64_t mem_size; /* Memory size in bytes. */
uint64_t mem_target; /* Memory target in bytes. */
uint64_t mmio_size; /* Size of the MMIO hole in bytes. */
const char *image_file_name; /* File name of the image to load. */
+
+ /* Extra ACPI tables passed to HVMLOADER */
+ struct xc_hvm_firmware_module acpi_module;
+
+ /* Extra SMBIOS structures passed to HVMLOADER */
+ struct xc_hvm_firmware_module smbios_module;
};

/**
@@ -228,7 +240,7 @@ struct xc_hvm_build_args {
* are optional.
*/
int xc_hvm_build(xc_interface *xch, uint32_t domid,
- const struct xc_hvm_build_args *hvm_args);
+ struct xc_hvm_build_args *hvm_args);

int xc_hvm_build_target_mem(xc_interface *xch,
uint32_t domid,
diff -r 07bf59a7ce83 -r cabf395a6c84 tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c Thu Jan 10 17:16:28 2013 +0000
+++ b/tools/libxc/xg_private.c Thu Jan 10 17:17:21 2013 +0000
@@ -192,7 +192,7 @@ unsigned long csum_page(void *page)
__attribute__((weak))
int xc_hvm_build(xc_interface *xch,
uint32_t domid,
- const struct xc_hvm_build_args *hvm_args)
+ struct xc_hvm_build_args *hvm_args)
{
errno = ENOSYS;
return -1;
++++++ 26343-hvm-firmware-passthrough.patch ++++++
++++ 645 lines (skipped)

++++++ 26344-hvm-firmware-passthrough.patch ++++++
fate#313584: pass bios information to XEN HVM guest

# HG changeset patch
# User Ross Philipson <ross.philipson@xxxxxxxxxx>
# Date 1357838323 0
# Node ID b9c38bea15b117552ecb51809779c7cfef82dd44
# Parent a7ce196f40444fafbe8f13b2d80e4885d4321806
HVM firmware passthrough ACPI processing

ACPI table passthrough support allowing additional static tables and
SSDTs (AML code) to be loaded. These additional tables are added at
the end of the secondary table list in the RSDT/XSDT tables.

Signed-off-by: Ross Philipson <ross.philipson@xxxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

diff -r a7ce196f4044 -r b9c38bea15b1 tools/firmware/hvmloader/acpi/build.c
--- a/tools/firmware/hvmloader/acpi/build.c Thu Jan 10 17:18:10 2013 +0000
+++ b/tools/firmware/hvmloader/acpi/build.c Thu Jan 10 17:18:43 2013 +0000
@@ -23,6 +23,9 @@
#include "ssdt_pm.h"
#include "../config.h"
#include "../util.h"
+#include <xen/hvm/hvm_xs_strings.h>
+
+#define ACPI_MAX_SECONDARY_TABLES 16

#define align16(sz) (((sz) + 15) & ~15)
#define fixed_strcpy(d, s) strncpy((d), (s), sizeof(d))
@@ -198,6 +201,52 @@ static struct acpi_20_waet *construct_wa
return waet;
}

+static int construct_passthrough_tables(unsigned long *table_ptrs,
+ int nr_tables)
+{
+ const char *s;
+ uint8_t *acpi_pt_addr;
+ uint32_t acpi_pt_length;
+ struct acpi_header *header;
+ int nr_added;
+ int nr_max = (ACPI_MAX_SECONDARY_TABLES - nr_tables - 1);
+ uint32_t total = 0;
+ uint8_t *buffer;
+
+ s = xenstore_read(HVM_XS_ACPI_PT_ADDRESS, NULL);
+ if ( s == NULL )
+ return 0;
+
+ acpi_pt_addr = (uint8_t*)(uint32_t)strtoll(s, NULL, 0);
+ if ( acpi_pt_addr == NULL )
+ return 0;
+
+ s = xenstore_read(HVM_XS_ACPI_PT_LENGTH, NULL);
+ if ( s == NULL )
+ return 0;
+
+ acpi_pt_length = (uint32_t)strtoll(s, NULL, 0);
+
+ for ( nr_added = 0; nr_added < nr_max; nr_added++ )
+ {
+ if ( (acpi_pt_length - total) < sizeof(struct acpi_header) )
+ break;
+
+ header = (struct acpi_header*)acpi_pt_addr;
+
+ buffer = mem_alloc(header->length, 16);
+ if ( buffer == NULL )
+ break;
+ memcpy(buffer, header, header->length);
+
+ table_ptrs[nr_tables++] = (unsigned long)buffer;
+ total += header->length;
+ acpi_pt_addr += header->length;
+ }
+
+ return nr_added;
+}
+
static int construct_secondary_tables(unsigned long *table_ptrs,
struct acpi_info *info)
{
@@ -293,6 +342,9 @@ static int construct_secondary_tables(un
}
}

+ /* Load any additional tables passed through. */
+ nr_tables += construct_passthrough_tables(table_ptrs, nr_tables);
+
table_ptrs[nr_tables] = 0;
return nr_tables;
}
@@ -327,7 +379,7 @@ void acpi_build_tables(struct acpi_confi
struct acpi_10_fadt *fadt_10;
struct acpi_20_facs *facs;
unsigned char *dsdt;
- unsigned long secondary_tables[16];
+ unsigned long secondary_tables[ACPI_MAX_SECONDARY_TABLES];
int nr_secondaries, i;
unsigned long vm_gid_addr;

++++++ 26369-libxl-devid.patch ++++++
commit 5420f26507fc5c9853eb1076401a8658d72669da
Author: Jim Fehlig <jfehlig@xxxxxxxx>
Date: Fri Jan 11 12:22:26 2013 +0000

libxl: Set vfb and vkb devid if not done so by the caller

Other devices set a sensible devid if the caller has not done so.
Do the same for vfb and vkb. While at it, factor out the common code
used to determine a sensible devid, so it can be used by other
libxl__device_*_add functions.

Signed-off-by: Jim Fehlig <jfehlig@xxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

Index: xen-4.2.1-testing/tools/libxl/libxl.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl.c
+++ xen-4.2.1-testing/tools/libxl/libxl.c
@@ -1727,6 +1727,26 @@ out:
return;
}

+/* common function to get next device id */
+static int libxl__device_nextid(libxl__gc *gc, uint32_t domid, char *device)
+{
+ char *dompath, **l;
+ unsigned int nb;
+ int nextid = -1;
+
+ if (!(dompath = libxl__xs_get_dompath(gc, domid)))
+ return nextid;
+
+ l = libxl__xs_directory(gc, XBT_NULL,
+ GCSPRINTF("%s/device/%s", dompath, device), &nb);
+ if (l == NULL || nb == 0)
+ nextid = 0;
+ else
+ nextid = strtoul(l[nb - 1], NULL, 10) + 1;
+
+ return nextid;
+}
+

/******************************************************************************/

int libxl__device_disk_setdefault(libxl__gc *gc, libxl_device_disk *disk)
@@ -2563,8 +2583,7 @@ void libxl__device_nic_add(libxl__egc *e
flexarray_t *front;
flexarray_t *back;
libxl__device *device;
- char *dompath, **l;
- unsigned int nb, rc;
+ unsigned int rc;

rc = libxl__device_nic_setdefault(gc, nic, domid);
if (rc) goto out;
@@ -2581,16 +2600,10 @@ void libxl__device_nic_add(libxl__egc *e
}

if (nic->devid == -1) {
- if (!(dompath = libxl__xs_get_dompath(gc, domid))) {
+ if ((nic->devid = libxl__device_nextid(gc, domid, "vif") < 0)) {
rc = ERROR_FAIL;
goto out_free;
}
- if (!(l = libxl__xs_directory(gc, XBT_NULL,
- libxl__sprintf(gc, "%s/device/vif",
dompath), &nb))) {
- nic->devid = 0;
- } else {
- nic->devid = strtoul(l[nb - 1], NULL, 10) + 1;
- }
}

GCNEW(device);
@@ -2977,6 +2990,13 @@ int libxl__device_vkb_add(libxl__gc *gc,
goto out_free;
}

+ if (vkb->devid == -1) {
+ if ((vkb->devid = libxl__device_nextid(gc, domid, "vkb") < 0)) {
+ rc = ERROR_FAIL;
+ goto out_free;
+ }
+ }
+
rc = libxl__device_from_vkb(gc, domid, vkb, &device);
if (rc != 0) goto out_free;

@@ -3078,6 +3098,13 @@ int libxl__device_vfb_add(libxl__gc *gc,
goto out_free;
}

+ if (vfb->devid == -1) {
+ if ((vfb->devid = libxl__device_nextid(gc, domid, "vfb") < 0)) {
+ rc = ERROR_FAIL;
+ goto out_free;
+ }
+ }
+
rc = libxl__device_from_vfb(gc, domid, vfb, &device);
if (rc != 0) goto out_free;

++++++ 26370-libxc-x86-initial-mapping-fit.patch ++++++
# HG changeset patch
# User Ian Campbell <Ian.Campbell@xxxxxxxxxx>
# Date 1357906947 0
# Node ID ba2d73234d73fc0faa027cd9bdfd3ac90642733c
# Parent 84d87ca765be81c215ef3b67d2ed71acfba73553
libxc: x86: ensure that the initial mapping fits into the guest's memory

In particular we need to check that adding 512KB of slack and
rounding up to a 4MB boundary do not overflow the guest's memory
allocation. Otherwise we run off the end of the p2m when building the
guest's initial page tables and populate them with garbage.

Wei noticed this when build tiny (2MB) mini-os domains.

Reported-by: Wei Liu <Wei.Liu2@xxxxxxxxxx>
Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

--- a/tools/libxc/xc_dom_core.c
+++ b/tools/libxc/xc_dom_core.c
@@ -871,7 +871,8 @@ int xc_dom_build_image(struct xc_dom_ima
goto err;
if ( dom->arch_hooks->count_pgtables )
{
- dom->arch_hooks->count_pgtables(dom);
+ if ( dom->arch_hooks->count_pgtables(dom) != 0 )
+ goto err;
if ( (dom->pgtables > 0) &&
(xc_dom_alloc_segment(dom, &dom->pgtables_seg, "page tables", 0,
dom->pgtables * page_size) != 0) )
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -82,6 +82,7 @@ static int count_pgtables(struct xc_dom_
{
int pages, extra_pages;
xen_vaddr_t try_virt_end;
+ xen_pfn_t try_pfn_end;

extra_pages = dom->alloc_bootstack ? 1 : 0;
extra_pages += dom->extra_pages;
@@ -91,6 +92,17 @@ static int count_pgtables(struct xc_dom_
{
try_virt_end = round_up(dom->virt_alloc_end + pages * PAGE_SIZE_X86,
bits_to_mask(22)); /* 4MB alignment */
+
+ try_pfn_end = (try_virt_end - dom->parms.virt_base) >> PAGE_SHIFT_X86;
+
+ if ( try_pfn_end > dom->total_pages )
+ {
+ xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
+ "%s: not enough memory for initial mapping
(%#"PRIpfn" > %#"PRIpfn")",
+ __FUNCTION__, try_pfn_end, dom->total_pages);
+ return -ENOMEM;
+ }
+
dom->pg_l4 =
nr_page_tables(dom, dom->parms.virt_base, try_virt_end, l4_bits);
dom->pg_l3 =
++++++ 26372-tools-paths.patch ++++++
# HG changeset patch
# User Bamvor Jian Zhang <bjzhang@xxxxxxxx>
# Date 1357906948 0
# Node ID 2ad5792b4274d76ced39515cbd3f84898b181768
# Parent ba2d73234d73fc0faa027cd9bdfd3ac90642733c
fix wrong path while calling pygrub and libxl-save-helper

in current xen x86_64, the default libexec directory is /usr/lib/xen/bin,
while the private binder is /usr/lib64/xen/bin. but some commands(pygrub,
libxl-save-helper) located in private binder directory is called from
libexec directory which lead to the following error:
1, for pygrub bootloader:

libxl: debug: libxl_bootloader.c:429:bootloader_disk_attached_cb:
/usr/lib/xen/bin/pygrub doesn't exist, falling back to config path

2, for libxl-save-helper:

libxl: cannot execute /usr/lib/xen/bin/libxl-save-helper: No such file or
directory
libxl: error: libxl_utils.c:363:libxl_read_exactly: file/stream truncated
reading ipc msg header from domain 3 save/restore helper stdout pipe
libxl: error: libxl_exec.c:118:libxl_report_child_exitstatus: domain 3
save/restore helper [10222] exited with error status 255

there are two ways to fix above error. the first one is make such command
store in the /usr/lib/xen/bin and /usr/lib64/xen/bin(symbol link to
previous), e.g. qemu-dm. The second way is using private binder dir
instead of libexec dir. e.g. xenconsole.
For these cases, the latter one is suitable.

Signed-off-by: Bamvor Jian Zhang <bjzhang@xxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

Index: xen-4.2.1-testing/tools/libxl/libxl_bootloader.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_bootloader.c
+++ xen-4.2.1-testing/tools/libxl/libxl_bootloader.c
@@ -419,7 +419,7 @@ static void bootloader_disk_attached_cb(
const char *bltmp;
struct stat st;

- bltmp = libxl__abs_path(gc, bootloader, libxl__libexec_path());
+ bltmp = libxl__abs_path(gc, bootloader, libxl__private_bindir_path());
/* Check to see if the file exists in this location; if not,
* fall back to checking the path */
LOG(DEBUG, "Checking for bootloader in libexec path: %s", bltmp);
Index: xen-4.2.1-testing/tools/libxl/libxl_save_callout.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_save_callout.c
+++ xen-4.2.1-testing/tools/libxl/libxl_save_callout.c
@@ -172,7 +172,7 @@ static void run_helper(libxl__egc *egc,
shs->stdout_what = GCSPRINTF("domain %"PRIu32" save/restore helper"
" stdout pipe", domid);

- *arg++ = getenv("LIBXL_SAVE_HELPER") ?: LIBEXEC "/" "libxl-save-helper";
+ *arg++ = getenv("LIBXL_SAVE_HELPER") ?: PRIVATE_BINDIR "/"
"libxl-save-helper";
*arg++ = mode_arg;
const char **stream_fd_arg = arg++;
for (i=0; i<num_argnums; i++)
++++++ 26395-x86-FPU-context-conditional.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1358341015 -3600
# Node ID b4cbb83f9a1f57b4f26f2d35998cda42b904ea69
# Parent 327b812026fe62a990f1d81041729c42196983ca
x86: consistently mask floating point exceptions

c/s 23142:f5e8d152a565 resulted in v->arch.fpu_ctxt to point into the
save area allocated for xsave/xrstor (when they're available). The way
vcpu_restore_fpu_lazy() works (using fpu_init() for an uninitialized
vCPU only when there's no xsave support) causes this to load whatever
arch_set_info_guest() put there, irrespective of whether the i387 state
was specified to be valid in the respective input structure.

Consequently, with a cleared (al zeroes) incoming FPU context, and with
xsave available, one gets all exceptions unmasked (as opposed to to the
legacy case, where FINIT and LDMXCSR get used, masking all exceptions).
This causes e.g. para-virtualized NetWare to crash.

The behavior of arch_set_info_guest() is thus being made more hardware-
like for the FPU portion of it: Considering it to be similar to INIT,
it will leave untouched all floating point state now. An alternative
would be to make the behavior RESET-like, forcing all state to known
values, albeit - taking into account legacy behavior - not to precisely
the values RESET would enforce (which masks only SSE exceptions, but
not x87 ones); that would come closest to mimicing FINIT behavior in
the xsave case. Another option would be to continue copying whatever
was provided, but override (at least) FCW and MXCSR if VGCF_I387_VALID
isn't set.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -819,7 +819,9 @@ int arch_set_info_guest(

v->arch.vgc_flags = flags;

- memcpy(v->arch.fpu_ctxt, &c.nat->fpu_ctxt, sizeof(c.nat->fpu_ctxt));
+ if ( flags & VGCF_I387_VALID )
+ memcpy(v->arch.fpu_ctxt, &c.nat->fpu_ctxt, sizeof(c.nat->fpu_ctxt));
+
if ( !compat )
{
memcpy(&v->arch.user_regs, &c.nat->user_regs,
sizeof(c.nat->user_regs));
++++++ 26404-x86-forward-both-NMI-kinds.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1358427591 -3600
# Node ID 76598d4bf61ef0c575deba539ff99078c80e651e
# Parent 0dee85c061addb7124d77c5f6cfe2ea7bc03b760
x86: handle both NMI kinds if they occur simultaneously

We shouldn't assume PCI SERR excludes IOCHK.

Once at it, also remove the doubly redundant range restriction on
"reason" - the variable already is "unsigned char".

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -3357,10 +3357,10 @@ void do_nmi(struct cpu_user_regs *regs)
reason = inb(0x61);
if ( reason & 0x80 )
pci_serr_error(regs);
- else if ( reason & 0x40 )
+ if ( reason & 0x40 )
io_check_error(regs);
- else if ( !nmi_watchdog )
- unknown_nmi_error(regs, (unsigned char)(reason&0xff));
+ if ( !(reason & 0xc0) && !nmi_watchdog )
+ unknown_nmi_error(regs, reason);
}
}

++++++ 26418-x86-trampoline-consider-multiboot.patch ++++++
# HG changeset patch
# User Paolo Bonzini <pbonzini@xxxxxxxxxx>
# Date 1358505311 -3600
# Node ID 3b59a6c3e9b0fb5009bdfff97c8493bb9f0bec54
# Parent 025f202f3022c30d1ec3b6ffcb72861c43a32cf7
x86: find a better location for the real-mode trampoline

On some machines, the location at 0x40e does not point to the beginning
of the EBDA. Rather, it points to the beginning of the BIOS-reserved
area of the EBDA, while the option ROMs place their data below that
segment.

For this reason, 0x413 is actually a better source than 0x40e to get
the location of the real-mode trampoline. Xen was already using it
as a second source, and this patch keeps that working. However, just
in case, let's also fetch the information from the multiboot structure,
where the boot loader should have placed it. This way we don't
necessarily trust one of the BIOS or the multiboot loader more than
the other.

Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>

Retain the previous code, thus using the multiboot value only if it's
sane but lower than the BDA computed one. Also use the full 32-bit
mem_lower value and prefer MBI_MEMLIMITS over open coding it (requiring
a slight adjustment to multiboot.h to make its constants actually
usable in assembly code, which previously they were only meant to be).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -88,6 +88,20 @@ __start:
movzwl 0x413,%eax /* use base memory size on failure */
shl $10-4,%eax
1:
+ /*
+ * Compare the value in the BDA with the information from the
+ * multiboot structure (if available) and use the smallest.
+ */
+ testb $MBI_MEMLIMITS,(%ebx)
+ jz 2f /* not available? BDA value will be fine */
+ mov 4(%ebx),%edx
+ cmp $0x100,%edx /* is the multiboot value too small? */
+ jb 2f /* if so, do not use it */
+ shl $10-4,%edx
+ cmp %eax,%edx /* compare with BDA value */
+ cmovb %edx,%eax /* and use the smaller */
+
+2: /* Reserve 64kb for the trampoline */
sub $0x1000,%eax

/* From arch/x86/smpboot.c: start_eip had better be page-aligned! */
--- a/xen/include/xen/multiboot.h
+++ b/xen/include/xen/multiboot.h
@@ -18,6 +18,7 @@
#ifndef __MULTIBOOT_H__
#define __MULTIBOOT_H__

+#include "const.h"

/*
* Multiboot header structure.
@@ -31,17 +32,17 @@
/* The magic number passed by a Multiboot-compliant boot loader. */
#define MULTIBOOT_BOOTLOADER_MAGIC 0x2BADB002

-#define MBI_MEMLIMITS (1u<< 0)
-#define MBI_BOOTDEV (1u<< 1)
-#define MBI_CMDLINE (1u<< 2)
-#define MBI_MODULES (1u<< 3)
-#define MBI_AOUT_SYMS (1u<< 4)
-#define MBI_ELF_SYMS (1u<< 5)
-#define MBI_MEMMAP (1u<< 6)
-#define MBI_DRIVES (1u<< 7)
-#define MBI_BIOSCONFIG (1u<< 8)
-#define MBI_LOADERNAME (1u<< 9)
-#define MBI_APM (1u<<10)
+#define MBI_MEMLIMITS (_AC(1,u) << 0)
+#define MBI_BOOTDEV (_AC(1,u) << 1)
+#define MBI_CMDLINE (_AC(1,u) << 2)
+#define MBI_MODULES (_AC(1,u) << 3)
+#define MBI_AOUT_SYMS (_AC(1,u) << 4)
+#define MBI_ELF_SYMS (_AC(1,u) << 5)
+#define MBI_MEMMAP (_AC(1,u) << 6)
+#define MBI_DRIVES (_AC(1,u) << 7)
+#define MBI_BIOSCONFIG (_AC(1,u) << 8)
+#define MBI_LOADERNAME (_AC(1,u) << 9)
+#define MBI_APM (_AC(1,u) << 10)

#ifndef __ASSEMBLY__

++++++ 26427-x86-AMD-enable-WC+.patch ++++++
# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
# Date 1358508058 -3600
# Node ID 8f6dd5dc5d6cdd56050ed917a0c30903bbddcbf0
# Parent eb8e9a23925d7b77c344a4a99679a45f96754a17
x86/AMD: Enable WC+ memory type on family 10 processors

In some cases BIOS may not enable WC+ memory type on family 10 processors,
instead converting what would be WC+ memory to CD type. On guests using
nested pages this could result in performance degradation. This patch
enables WC+.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -534,6 +534,19 @@ static void __devinit init_amd(struct cp
}
#endif

+ if (c->x86 == 0x10) {
+ /*
+ * On family 10h BIOS may not have properly enabled WC+
+ * support, causing it to be converted to CD memtype. This may
+ * result in performance degradation for certain nested-paging
+ * guests. Prevent this conversion by clearing bit 24 in
+ * MSR_F10_BU_CFG2.
+ */
+ rdmsrl(MSR_F10_BU_CFG2, value);
+ value &= ~(1ULL << 24);
+ wrmsrl(MSR_F10_BU_CFG2, value);
+ }
+
/*
* Family 0x12 and above processors have APIC timer
* running in deep C states.
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -215,8 +215,9 @@
#define MSR_F10_MC4_MISC2 0xc0000409
#define MSR_F10_MC4_MISC3 0xc000040A

-/* AMD Family10h MMU control MSRs */
-#define MSR_F10_BU_CFG 0xc0011023
+/* AMD Family10h Bus Unit MSRs */
+#define MSR_F10_BU_CFG 0xc0011023
+#define MSR_F10_BU_CFG2 0xc001102a

/* Other AMD Fam10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
++++++ 26428-x86-HVM-RTC-update.patch ++++++
# HG changeset patch
# User Tim Deegan <tim@xxxxxxx>
# Date 1358508717 -3600
# Node ID 9e8c39bdc1fedd5dfc5aa7209cc5f77f813476c7
# Parent 8f6dd5dc5d6cdd56050ed917a0c30903bbddcbf0
x86/hvm: fix RTC setting.

When the guest writes one field of the RTC time, we must bring all the
other fields up to date for the current second before calculating the
new RTC time.

Signed-off-by: Tim Deegan <tim@xxxxxxx>
Tested-by: Phil Evans <Phil.Evans@xxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -399,10 +399,17 @@ static int rtc_ioport_write(void *opaque
case RTC_DAY_OF_MONTH:
case RTC_MONTH:
case RTC_YEAR:
- s->hw.cmos_data[s->hw.cmos_index] = data;
- /* if in set mode, do not update the time */
- if ( !(s->hw.cmos_data[RTC_REG_B] & RTC_SET) )
+ /* if in set mode, just write the register */
+ if ( (s->hw.cmos_data[RTC_REG_B] & RTC_SET) )
+ s->hw.cmos_data[s->hw.cmos_index] = data;
+ else
+ {
+ /* Fetch the current time and update just this field. */
+ s->current_tm = gmtime(get_localtime(d));
+ rtc_copy_date(s);
+ s->hw.cmos_data[s->hw.cmos_index] = data;
rtc_set_time(s);
+ }
alarm_timer_update(s);
break;
case RTC_REG_A:
++++++ 26440-x86-forward-SERR.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1358843590 -3600
# Node ID 5af4f2ab06f33ce441fa550333a9049c09a9ef28
# Parent 4b476378fc35e776196c29dc0e24b71529393a4c
x86: restore (optional) forwarding of PCI SERR induced NMI to Dom0

c/s 22949:54fe1011f86b removed the forwarding of NMIs to Dom0 when they
were caused by PCI SERR. NMI buttons as well as BMCs (like HP's iLO)
may however want such events to be seen in Dom0 (e.g. to trigger a
dump).

Therefore restore most of the functionality which named c/s removed
(adjusted for subsequent changes, and adjusting the public interface to
use the modern term, retaining the old one for backwards
compatibility).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -3201,6 +3201,7 @@ static void nmi_mce_softirq(void)
static void pci_serr_softirq(void)
{
printk("\n\nNMI - PCI system error (SERR)\n");
+ outb(inb(0x61) & 0x0b, 0x61); /* re-enable the PCI SERR error line. */
}

void async_exception_cleanup(struct vcpu *curr)
@@ -3291,9 +3292,20 @@ static void pci_serr_error(struct cpu_us
{
outb((inb(0x61) & 0x0f) | 0x04, 0x61); /* clear-and-disable the PCI SERR
error line. */

- /* Would like to print a diagnostic here but can't call printk()
- from NMI context -- raise a softirq instead. */
- raise_softirq(PCI_SERR_SOFTIRQ);
+ switch ( opt_nmi[0] )
+ {
+ case 'd': /* 'dom0' */
+ nmi_dom0_report(_XEN_NMIREASON_pci_serr);
+ case 'i': /* 'ignore' */
+ /* Would like to print a diagnostic here but can't call printk()
+ from NMI context -- raise a softirq instead. */
+ raise_softirq(PCI_SERR_SOFTIRQ);
+ break;
+ default: /* 'fatal' */
+ console_force_unlock();
+ printk("\n\nNMI - PCI system error (SERR)\n");
+ fatal_trap(TRAP_nmi, regs);
+ }
}

static void io_check_error(struct cpu_user_regs *regs)
--- a/xen/include/public/nmi.h
+++ b/xen/include/public/nmi.h
@@ -36,9 +36,14 @@
/* I/O-check error reported via ISA port 0x61, bit 6. */
#define _XEN_NMIREASON_io_error 0
#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error)
+ /* PCI SERR reported via ISA port 0x61, bit 7. */
+#define _XEN_NMIREASON_pci_serr 1
+#define XEN_NMIREASON_pci_serr (1UL << _XEN_NMIREASON_pci_serr)
+#if __XEN_INTERFACE_VERSION__ < 0x00040300 /* legacy alias of the above */
/* Parity error reported via ISA port 0x61, bit 7. */
#define _XEN_NMIREASON_parity_error 1
#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error)
+#endif
/* Unknown hardware-generated NMI. */
#define _XEN_NMIREASON_unknown 2
#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown)
++++++ 26443-ACPI-zap-DMAR.patch ++++++
# HG changeset patch
# User Tomasz Wroblewski <tomasz.wroblewski@xxxxxxxxxx>
# Date 1358933464 -3600
# Node ID 9efe4c0bf9c8d3ecf03868c69c24dad3218523a4
# Parent 7c6ecf2c1831a1c7f63a96f119a8891891463e54
fix acpi_dmar_zap/reinstate() (fixes S3 regression)

Fix S3 regression introduced by cs 23013:65d26504e843 (ACPI: large
cleanup). The dmar virtual pointer returned from acpi_get_table cannot
be safely stored away and used later, as the underlying
acpi_os_map_memory / __acpi_map_table functions overwrite the mapping
causing it to point to different tables than dmar (last fetched table is
used). This subsequently causes acpi_dmar_reinstate() and
acpi_dmar_zap() to write data to wrong table, causing its corruption and
problems with consecutive s3 resumes.

Added a new function to fetch ACPI table physical address, and
establishing separate static mapping for dmar_table pointer instead of
using acpi_get_table().

Signed-off-by: Tomasz Wroblewski <tomasz.wroblewski@xxxxxxxxxx>

Added call to acpi_tb_verify_table(). Fixed page count passed to
map_pages_to_xen(). Cosmetic changes.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/drivers/acpi/tables/tbxface.c
+++ b/xen/drivers/acpi/tables/tbxface.c
@@ -205,3 +205,51 @@ acpi_get_table(char *signature,

return (AE_NOT_FOUND);
}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_get_table_phys
+ *
+ * PARAMETERS: signature - ACPI signature of needed table
+ * instance - Which instance (for SSDTs)
+ * addr - Where the table's physical address is returned
+ * len - Where the length of table is returned
+ *
+ * RETURN: Status, pointer and length of table
+ *
+ * DESCRIPTION: Finds physical address and length of ACPI table
+ *
+ *****************************************************************************/
+acpi_status __init
+acpi_get_table_phys(acpi_string signature, acpi_native_uint instance,
+ acpi_physical_address *addr, acpi_native_uint *len)
+{
+ acpi_native_uint i, j;
+ acpi_status status;
+
+ if (!signature || !addr || !len)
+ return AE_BAD_PARAMETER;
+
+ for (i = j = 0; i < acpi_gbl_root_table_list.count; i++) {
+ if (!ACPI_COMPARE_NAME(
+ &acpi_gbl_root_table_list.tables[i].signature,
+ signature))
+ continue;
+
+ if (++j < instance)
+ continue;
+
+ status =
+ acpi_tb_verify_table(&acpi_gbl_root_table_list.tables[i]);
+ if (ACPI_SUCCESS(status)) {
+ *addr = acpi_gbl_root_table_list.tables[i].address;
+ *len = acpi_gbl_root_table_list.tables[i].length;
+ }
+
+ acpi_gbl_root_table_list.tables[i].pointer = NULL;
+
+ return status;
+ }
+
+ return AE_NOT_FOUND;
+}
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -776,6 +776,7 @@ out:
}

#ifdef CONFIG_X86
+#include <asm/fixmap.h>
#include <asm/tboot.h>
/* ACPI tables may not be DMA protected by tboot, so use DMAR copy */
/* SINIT saved in SinitMleData in TXT heap (which is DMA protected) */
@@ -786,7 +787,32 @@ out:

int __init acpi_dmar_init(void)
{
- acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_table);
+ acpi_physical_address dmar_addr;
+ acpi_native_uint dmar_len;
+
+ if ( ACPI_SUCCESS(acpi_get_table_phys(ACPI_SIG_DMAR, 0,
+ &dmar_addr, &dmar_len)) )
+ {
+#ifdef CONFIG_X86_32
+ if ( dmar_addr + dmar_len > (DIRECTMAP_MBYTES << 20) )
+ {
+ unsigned long offset = dmar_addr & (PAGE_SIZE - 1);
+ unsigned long mapped_size = PAGE_SIZE - offset;
+
+ set_fixmap(FIX_DMAR_ZAP_LO, dmar_addr);
+ if ( mapped_size < sizeof(*dmar_table) )
+ set_fixmap(FIX_DMAR_ZAP_HI, dmar_addr + PAGE_SIZE);
+ dmar_table = (void *)fix_to_virt(FIX_DMAR_ZAP_LO) + offset;
+ goto exit;
+ }
+#endif
+ map_pages_to_xen((unsigned long)__va(dmar_addr), PFN_DOWN(dmar_addr),
+ PFN_UP(dmar_addr + dmar_len) - PFN_DOWN(dmar_addr),
+ PAGE_HYPERVISOR);
+ dmar_table = __va(dmar_addr);
+ }
+
+ exit: __attribute__((__unused__))
return parse_dmar_table(acpi_parse_dmar);
}

--- a/xen/include/acpi/acpixf.h
+++ b/xen/include/acpi/acpixf.h
@@ -77,6 +77,9 @@ acpi_status
acpi_get_table(acpi_string signature,
acpi_native_uint instance, struct acpi_table_header **out_table);

+acpi_status
+acpi_get_table_phys(acpi_string signature, acpi_native_uint instance,
+ acpi_physical_address *addr, acpi_native_uint *len);
/*
* Namespace and name interfaces
*/
--- a/xen/include/asm-x86/fixmap.h
+++ b/xen/include/asm-x86/fixmap.h
@@ -50,6 +50,8 @@ enum fixed_addresses {
FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,
#define FIX_VGC_END FIX_PAE_HIGHMEM_0
#define FIX_VGC_BEGIN FIX_PAE_HIGHMEM_END
+ FIX_DMAR_ZAP_HI,
+ FIX_DMAR_ZAP_LO,
#else
FIX_VGC_END,
FIX_VGC_BEGIN = FIX_VGC_END
++++++ 26444-x86-nHVM-no-self-enable.patch ++++++
References: CVE-2013-0152 XSA-35 bnc#797287

# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxx>
# Date 1358938044 -3600
# Node ID 621b1a889e9b120236698731e0b5ecc5b0cb1d82
# Parent 9efe4c0bf9c8d3ecf03868c69c24dad3218523a4
xen: Do not allow guests to enable nested HVM on themselves

There is no reason for this and doing so exposes a memory leak to
guests. Only toolstacks need write access to this HVM param.

This is XSA-35 / CVE-2013-0152.

Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Acked-by: Jan Beulich <JBeulich@xxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3930,6 +3930,11 @@ long do_hvm_op(unsigned long op, XEN_GUE
rc = -EINVAL;
break;
case HVM_PARAM_NESTEDHVM:
+ if ( !IS_PRIV(current->domain) )
+ {
+ rc = -EPERM;
+ break;
+ }
#ifdef __i386__
if ( a.value )
rc = -EINVAL;
++++++ 26468-libxl-race.patch ++++++
# HG changeset patch
# User Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
# Date 1359031672 0
# Node ID a181bf3e77df891c97fc20dff4e9b90b7584022b
# Parent 3e93c50982de4f2f7db99d92b04684556320541c
libxl: fix stale fd event callback race

Because there is not necessarily any lock held at the point the
application (eg, libvirt) calls libxl_osevent_occurred_timeout and
..._fd, in a multithreaded program those calls may be arbitrarily
delayed in relation to other activities within the program.

libxl therefore needs to be prepared to receive very old event
callbacks. Arrange for this to be the case for fd callbacks.

This requires a new layer of indirection through a "hook nexus" struct
which can outlive the libxl__ev_foo. Allocation and deallocation of
these nexi is mostly handled in the OSEVENT macros which wrap up
the application's callbacks.

Document the problem and the solution in a comment in libxl_event.c
just before the definition of struct libxl__osevent_hook_nexus.

There is still a race relating to libxl__osevent_occurred_timeout;
this will be addressed in the following patch.

Reported-by: Bamvor Jian Zhang <bjzhang@xxxxxxxx>
Cc: Bamvor Jian Zhang <bjzhang@xxxxxxxx>
Cc: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
Tested-by: Jim Fehlig <jfehlig@xxxxxxxx>
Acked-by: Jim Fehlig <jfehlig@xxxxxxxx>
Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

Index: xen-4.2.1-testing/tools/libxl/libxl_event.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_event.c
+++ xen-4.2.1-testing/tools/libxl/libxl_event.c
@@ -38,23 +38,131 @@
* The application's registration hooks should be called ONLY via
* these macros, with the ctx locked. Likewise all the "occurred"
* entrypoints from the application should assert(!in_hook);
+ *
+ * During the hook call - including while the arguments are being
+ * evaluated - ev->nexus is guaranteed to be valid and refer to the
+ * nexus which is being used for this event registration. The
+ * arguments should specify ev->nexus for the for_libxl argument and
+ * ev->nexus->for_app_reg (or a pointer to it) for for_app_reg.
*/
-#define OSEVENT_HOOK_INTERN(retval, hookname, ...) do { \
- if (CTX->osevent_hooks) { \
- CTX->osevent_in_hook++; \
- retval CTX->osevent_hooks->hookname(CTX->osevent_user, __VA_ARGS__); \
- CTX->osevent_in_hook--; \
- } \
+#define OSEVENT_HOOK_INTERN(retval, failedp, evkind, hookop, nexusop, ...) do
{ \
+ if (CTX->osevent_hooks) { \
+ CTX->osevent_in_hook++; \
+ libxl__osevent_hook_nexi *nexi = &CTX->hook_##evkind##_nexi_idle; \
+ osevent_hook_pre_##nexusop(gc, ev, nexi, &ev->nexus); \
+ retval CTX->osevent_hooks->evkind##_##hookop \
+ (CTX->osevent_user, __VA_ARGS__); \
+ if ((failedp)) \
+ osevent_hook_failed_##nexusop(gc, ev, nexi, &ev->nexus); \
+ CTX->osevent_in_hook--; \
+ } \
} while (0)

-#define OSEVENT_HOOK(hookname, ...) ({ \
- int osevent_hook_rc = 0; \
- OSEVENT_HOOK_INTERN(osevent_hook_rc = , hookname, __VA_ARGS__); \
- osevent_hook_rc; \
+#define OSEVENT_HOOK(evkind, hookop, nexusop, ...) ({ \
+ int osevent_hook_rc = 0; \
+ OSEVENT_HOOK_INTERN(osevent_hook_rc =, !!osevent_hook_rc, \
+ evkind, hookop, nexusop, __VA_ARGS__); \
+ osevent_hook_rc; \
})

-#define OSEVENT_HOOK_VOID(hookname, ...) \
- OSEVENT_HOOK_INTERN(/* void */, hookname, __VA_ARGS__)
+#define OSEVENT_HOOK_VOID(evkind, hookop, nexusop, ...)
\
+ OSEVENT_HOOK_INTERN(/* void */, 0, evkind, hookop, nexusop, __VA_ARGS__)
+
+/*
+ * The application's calls to libxl_osevent_occurred_... may be
+ * indefinitely delayed with respect to the rest of the program (since
+ * they are not necessarily called with any lock held). So the
+ * for_libxl value we receive may be (almost) arbitrarily old. All we
+ * know is that it came from this ctx.
+ *
+ * Therefore we may not free the object referred to by any for_libxl
+ * value until we free the whole libxl_ctx. And if we reuse it we
+ * must be able to tell when an old use turns up, and discard the
+ * stale event.
+ *
+ * Thus we cannot use the ev directly as the for_libxl value - we need
+ * a layer of indirection.
+ *
+ * We do this by keeping a pool of libxl__osevent_hook_nexus structs,
+ * and use pointers to them as for_libxl values. In fact, there are
+ * two pools: one for fds and one for timeouts. This ensures that we
+ * don't risk a type error when we upcast nexus->ev. In each nexus
+ * the ev is either null or points to a valid libxl__ev_time or
+ * libxl__ev_fd, as applicable.
+ *
+ * We /do/ allow ourselves to reassociate an old nexus with a new ev
+ * as otherwise we would have to leak nexi. (This reassociation
+ * might, of course, be an old ev being reused for a new purpose so
+ * simply comparing the ev pointer is not sufficient.) Thus the
+ * libxl_osevent_occurred functions need to check that the condition
+ * allegedly signalled by this event actually exists.
+ *
+ * The nexi and the lists are all protected by the ctx lock.
+ */
+
+struct libxl__osevent_hook_nexus {
+ void *ev;
+ void *for_app_reg;
+ LIBXL_SLIST_ENTRY(libxl__osevent_hook_nexus) next;
+};
+
+static void *osevent_ev_from_hook_nexus(libxl_ctx *ctx,
+ libxl__osevent_hook_nexus *nexus /* pass void *for_libxl */)
+{
+ return nexus->ev;
+}
+
+static void osevent_release_nexus(libxl__gc *gc,
+ libxl__osevent_hook_nexi *nexi_idle,
+ libxl__osevent_hook_nexus *nexus)
+{
+ nexus->ev = 0;
+ LIBXL_SLIST_INSERT_HEAD(nexi_idle, nexus, next);
+}
+
+/*----- OSEVENT* hook functions for nexusop "alloc" -----*/
+static void osevent_hook_pre_alloc(libxl__gc *gc, void *ev,
+ libxl__osevent_hook_nexi *nexi_idle,
+ libxl__osevent_hook_nexus **nexus_r)
+{
+ libxl__osevent_hook_nexus *nexus = LIBXL_SLIST_FIRST(nexi_idle);
+ if (nexus) {
+ LIBXL_SLIST_REMOVE_HEAD(nexi_idle, next);
+ } else {
+ nexus = libxl__zalloc(NOGC, sizeof(*nexus));
+ }
+ nexus->ev = ev;
+ *nexus_r = nexus;
+}
+static void osevent_hook_failed_alloc(libxl__gc *gc, void *ev,
+ libxl__osevent_hook_nexi *nexi_idle,
+ libxl__osevent_hook_nexus **nexus)
+{
+ osevent_release_nexus(gc, nexi_idle, *nexus);
+}
+
+/*----- OSEVENT* hook functions for nexusop "release" -----*/
+static void osevent_hook_pre_release(libxl__gc *gc, void *ev,
+ libxl__osevent_hook_nexi *nexi_idle,
+ libxl__osevent_hook_nexus **nexus)
+{
+ osevent_release_nexus(gc, nexi_idle, *nexus);
+}
+static void osevent_hook_failed_release(libxl__gc *gc, void *ev,
+ libxl__osevent_hook_nexi *nexi_idle,
+ libxl__osevent_hook_nexus **nexus)
+{
+ abort();
+}
+
+/*----- OSEVENT* hook functions for nexusop "noop" -----*/
+static void osevent_hook_pre_noop(libxl__gc *gc, void *ev,
+ libxl__osevent_hook_nexi *nexi_idle,
+ libxl__osevent_hook_nexus **nexus) { }
+static void osevent_hook_failed_noop(libxl__gc *gc, void *ev,
+ libxl__osevent_hook_nexi *nexi_idle,
+ libxl__osevent_hook_nexus **nexus) { }
+

/*
* fd events
@@ -72,7 +180,8 @@ int libxl__ev_fd_register(libxl__gc *gc,

DBG("ev_fd=%p register fd=%d events=%x", ev, fd, events);

- rc = OSEVENT_HOOK(fd_register, fd, &ev->for_app_reg, events, ev);
+ rc = OSEVENT_HOOK(fd,register, alloc, fd, &ev->nexus->for_app_reg,
+ events, ev->nexus);
if (rc) goto out;

ev->fd = fd;
@@ -97,7 +206,7 @@ int libxl__ev_fd_modify(libxl__gc *gc, l

DBG("ev_fd=%p modify fd=%d events=%x", ev, ev->fd, events);

- rc = OSEVENT_HOOK(fd_modify, ev->fd, &ev->for_app_reg, events);
+ rc = OSEVENT_HOOK(fd,modify, noop, ev->fd, &ev->nexus->for_app_reg,
events);
if (rc) goto out;

ev->events = events;
@@ -119,7 +228,7 @@ void libxl__ev_fd_deregister(libxl__gc *

DBG("ev_fd=%p deregister fd=%d", ev, ev->fd);

- OSEVENT_HOOK_VOID(fd_deregister, ev->fd, ev->for_app_reg);
+ OSEVENT_HOOK_VOID(fd,deregister, release, ev->fd, ev->nexus->for_app_reg);
LIBXL_LIST_REMOVE(ev, entry);
ev->fd = -1;

@@ -171,7 +280,8 @@ static int time_register_finite(libxl__g
{
int rc;

- rc = OSEVENT_HOOK(timeout_register, &ev->for_app_reg, absolute, ev);
+ rc = OSEVENT_HOOK(timeout,register, alloc, &ev->nexus->for_app_reg,
+ absolute, ev->nexus);
if (rc) return rc;

ev->infinite = 0;
@@ -184,7 +294,7 @@ static int time_register_finite(libxl__g
static void time_deregister(libxl__gc *gc, libxl__ev_time *ev)
{
if (!ev->infinite) {
- OSEVENT_HOOK_VOID(timeout_deregister, ev->for_app_reg);
+ OSEVENT_HOOK_VOID(timeout,deregister, release, ev->nexus->for_app_reg);
LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
}
}
@@ -270,7 +380,8 @@ int libxl__ev_time_modify_abs(libxl__gc
rc = time_register_finite(gc, ev, absolute);
if (rc) goto out;
} else {
- rc = OSEVENT_HOOK(timeout_modify, &ev->for_app_reg, absolute);
+ rc = OSEVENT_HOOK(timeout,modify, noop,
+ &ev->nexus->for_app_reg, absolute);
if (rc) goto out;

LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
@@ -1009,35 +1120,54 @@ void libxl_osevent_register_hooks(libxl_


void libxl_osevent_occurred_fd(libxl_ctx *ctx, void *for_libxl,
- int fd, short events, short revents)
+ int fd, short events_ign, short revents_ign)
{
- libxl__ev_fd *ev = for_libxl;
-
EGC_INIT(ctx);
CTX_LOCK;
assert(!CTX->osevent_in_hook);

- assert(fd == ev->fd);
- revents &= ev->events;
- if (revents)
- ev->func(egc, ev, fd, ev->events, revents);
+ libxl__ev_fd *ev = osevent_ev_from_hook_nexus(ctx, for_libxl);
+ if (!ev) goto out;
+ if (ev->fd != fd) goto out;

+ struct pollfd check;
+ for (;;) {
+ check.fd = fd;
+ check.events = ev->events;
+ int r = poll(&check, 1, 0);
+ if (!r)
+ goto out;
+ if (r==1)
+ break;
+ assert(r<0);
+ if (errno != EINTR) {
+ LIBXL__EVENT_DISASTER(egc, "failed poll to check for fd", errno,
0);
+ goto out;
+ }
+ }
+
+ if (check.revents)
+ ev->func(egc, ev, fd, ev->events, check.revents);
+
+ out:
CTX_UNLOCK;
EGC_FREE;
}

void libxl_osevent_occurred_timeout(libxl_ctx *ctx, void *for_libxl)
{
- libxl__ev_time *ev = for_libxl;
-
EGC_INIT(ctx);
CTX_LOCK;
assert(!CTX->osevent_in_hook);

+ libxl__ev_time *ev = osevent_ev_from_hook_nexus(ctx, for_libxl);
+ if (!ev) goto out;
assert(!ev->infinite);
+
LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
ev->func(egc, ev, &ev->abs);

+ out:
CTX_UNLOCK;
EGC_FREE;
}
Index: xen-4.2.1-testing/tools/libxl/libxl_internal.h
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_internal.h
+++ xen-4.2.1-testing/tools/libxl/libxl_internal.h
@@ -136,6 +136,8 @@ typedef struct libxl__gc libxl__gc;
typedef struct libxl__egc libxl__egc;
typedef struct libxl__ao libxl__ao;
typedef struct libxl__aop_occurred libxl__aop_occurred;
+typedef struct libxl__osevent_hook_nexus libxl__osevent_hook_nexus;
+typedef struct libxl__osevent_hook_nexi libxl__osevent_hook_nexi;

_hidden void libxl__alloc_failed(libxl_ctx *, const char *func,
size_t nmemb, size_t size) __attribute__((noreturn));
@@ -163,7 +165,7 @@ struct libxl__ev_fd {
libxl__ev_fd_callback *func;
/* remainder is private for libxl__ev_fd... */
LIBXL_LIST_ENTRY(libxl__ev_fd) entry;
- void *for_app_reg;
+ libxl__osevent_hook_nexus *nexus;
};


@@ -178,7 +180,7 @@ struct libxl__ev_time {
int infinite; /* not registered in list or with app if infinite */
LIBXL_TAILQ_ENTRY(libxl__ev_time) entry;
struct timeval abs;
- void *for_app_reg;
+ libxl__osevent_hook_nexus *nexus;
};

typedef struct libxl__ev_xswatch libxl__ev_xswatch;
@@ -329,6 +331,8 @@ struct libxl__ctx {
libxl__poller poller_app; /* libxl_osevent_beforepoll and _afterpoll */
LIBXL_LIST_HEAD(, libxl__poller) pollers_event, pollers_idle;

+ LIBXL_SLIST_HEAD(libxl__osevent_hook_nexi, libxl__osevent_hook_nexus)
+ hook_fd_nexi_idle, hook_timeout_nexi_idle;
LIBXL_LIST_HEAD(, libxl__ev_fd) efds;
LIBXL_TAILQ_HEAD(, libxl__ev_time) etimes;

++++++ 26469-libxl-race.patch ++++++
# HG changeset patch
# User Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
# Date 1359031673 0
# Node ID a162a72e719a85799e3b08f52af7bb2147a407b8
# Parent a181bf3e77df891c97fc20dff4e9b90b7584022b
libxl: fix stale timeout event callback race

Because there is not necessarily any lock held at the point the
application (eg, libvirt) calls libxl_osevent_occurred_timeout, in a
multithreaded program those calls may be arbitrarily delayed in
relation to other activities within the program.

Specifically this means when ->timeout_deregister returns, libxl does
not know whether it can safely dispose of the for_libxl value or
whether it needs to retain it in case of an in-progress call to
_occurred_timeout.

The interface could be fixed by requiring the application to make a
new call into libxl to say that the deregistration was complete.

However that new call would have to be threaded through the
application's event loop; this is complicated and some application
authors are likely not to implement it properly. Furthermore the
easiest way to implement this facility in most event loops is to queue
up a time event for "now".

Shortcut all of this by having libxl always call timeout_modify
setting abs={0,0} (ie, ASAP) instead of timeout_deregister. This will
cause the application to call _occurred_timeout. When processing this
calldown we see that we were no longer actually interested and simply
throw it away.

Additionally, there is a race between _occurred_timeout and
->timeout_modify. If libxl ever adjusts the deadline for a timeout
the application may already be in the process of calling _occurred, in
which case the situation with for_app's lifetime becomes very
complicated. Therefore abolish libxl__ev_time_modify_{abs,rel} (which
have no callers) and promise to the application only ever to call
->timeout_modify with abs=={0,0}. The application still needs to cope
with ->timeout_modify racing with its internal function which calls
_occurred_timeout. Document this.

This is a forwards-compatible change for applications using the libxl
API, and will hopefully eliminate these races in callback-supplying
applications (such as libvirt) without the need for corresponding
changes to the application. (It is possible that this might expose
bugs in applications, though, as previously libxl would never call
libxl_osevent_hooks->timeout_modify and now it never calls
->timeout_deregister).

For clarity, fold the body of time_register_finite into its one
remaining call site. This makes the semantics of ev->infinite
slightly clearer.

Cc: Bamvor Jian Zhang <bjzhang@xxxxxxxx>
Cc: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
Tested-by: Jim Fehlig <jfehlig@xxxxxxxx>
Acked-by: Jim Fehlig <jfehlig@xxxxxxxx>
Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

Index: xen-4.2.1-testing/tools/libxl/libxl_event.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_event.c
+++ xen-4.2.1-testing/tools/libxl/libxl_event.c
@@ -267,18 +267,11 @@ static int time_rel_to_abs(libxl__gc *gc
return 0;
}

-static void time_insert_finite(libxl__gc *gc, libxl__ev_time *ev)
-{
- libxl__ev_time *evsearch;
- LIBXL_TAILQ_INSERT_SORTED(&CTX->etimes, entry, ev, evsearch, /*empty*/,
- timercmp(&ev->abs, &evsearch->abs, >));
- ev->infinite = 0;
-}
-
static int time_register_finite(libxl__gc *gc, libxl__ev_time *ev,
struct timeval absolute)
{
int rc;
+ libxl__ev_time *evsearch;

rc = OSEVENT_HOOK(timeout,register, alloc, &ev->nexus->for_app_reg,
absolute, ev->nexus);
@@ -286,7 +279,8 @@ static int time_register_finite(libxl__g

ev->infinite = 0;
ev->abs = absolute;
- time_insert_finite(gc, ev);
+ LIBXL_TAILQ_INSERT_SORTED(&CTX->etimes, entry, ev, evsearch, /*empty*/,
+ timercmp(&ev->abs, &evsearch->abs, >));

return 0;
}
@@ -294,7 +288,12 @@ static int time_register_finite(libxl__g
static void time_deregister(libxl__gc *gc, libxl__ev_time *ev)
{
if (!ev->infinite) {
- OSEVENT_HOOK_VOID(timeout,deregister, release, ev->nexus->for_app_reg);
+ struct timeval right_away = { 0, 0 };
+ if (ev->nexus) /* only set if app provided hooks */
+ ev->nexus->ev = 0;
+ OSEVENT_HOOK_VOID(timeout,modify,
+ noop /* release nexus in _occurred_ */,
+ &ev->nexus->for_app_reg, right_away);
LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
}
}
@@ -364,70 +363,6 @@ int libxl__ev_time_register_rel(libxl__g
return rc;
}

-int libxl__ev_time_modify_abs(libxl__gc *gc, libxl__ev_time *ev,
- struct timeval absolute)
-{
- int rc;
-
- CTX_LOCK;
-
- DBG("ev_time=%p modify abs==%lu.%06lu",
- ev, (unsigned long)absolute.tv_sec, (unsigned long)absolute.tv_usec);
-
- assert(libxl__ev_time_isregistered(ev));
-
- if (ev->infinite) {
- rc = time_register_finite(gc, ev, absolute);
- if (rc) goto out;
- } else {
- rc = OSEVENT_HOOK(timeout,modify, noop,
- &ev->nexus->for_app_reg, absolute);
- if (rc) goto out;
-
- LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
- ev->abs = absolute;
- time_insert_finite(gc, ev);
- }
-
- rc = 0;
- out:
- time_done_debug(gc,__func__,ev,rc);
- CTX_UNLOCK;
- return rc;
-}
-
-int libxl__ev_time_modify_rel(libxl__gc *gc, libxl__ev_time *ev,
- int milliseconds)
-{
- struct timeval absolute;
- int rc;
-
- CTX_LOCK;
-
- DBG("ev_time=%p modify ms=%d", ev, milliseconds);
-
- assert(libxl__ev_time_isregistered(ev));
-
- if (milliseconds < 0) {
- time_deregister(gc, ev);
- ev->infinite = 1;
- rc = 0;
- goto out;
- }
-
- rc = time_rel_to_abs(gc, milliseconds, &absolute);
- if (rc) goto out;
-
- rc = libxl__ev_time_modify_abs(gc, ev, absolute);
- if (rc) goto out;
-
- rc = 0;
- out:
- time_done_debug(gc,__func__,ev,rc);
- CTX_UNLOCK;
- return rc;
-}
-
void libxl__ev_time_deregister(libxl__gc *gc, libxl__ev_time *ev)
{
CTX_LOCK;
@@ -1160,7 +1095,11 @@ void libxl_osevent_occurred_timeout(libx
CTX_LOCK;
assert(!CTX->osevent_in_hook);

- libxl__ev_time *ev = osevent_ev_from_hook_nexus(ctx, for_libxl);
+ libxl__osevent_hook_nexus *nexus = for_libxl;
+ libxl__ev_time *ev = osevent_ev_from_hook_nexus(ctx, nexus);
+
+ osevent_release_nexus(gc, &CTX->hook_timeout_nexi_idle, nexus);
+
if (!ev) goto out;
assert(!ev->infinite);

Index: xen-4.2.1-testing/tools/libxl/libxl_event.h
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_event.h
+++ xen-4.2.1-testing/tools/libxl/libxl_event.h
@@ -287,8 +287,10 @@ typedef struct libxl_osevent_hooks {
int (*timeout_register)(void *user, void **for_app_registration_out,
struct timeval abs, void *for_libxl);
int (*timeout_modify)(void *user, void **for_app_registration_update,
- struct timeval abs);
- void (*timeout_deregister)(void *user, void *for_app_registration);
+ struct timeval abs)
+ /* only ever called with abs={0,0}, meaning ASAP */;
+ void (*timeout_deregister)(void *user, void *for_app_registration)
+ /* will never be called */;
} libxl_osevent_hooks;

/* The application which calls register_fd_hooks promises to
@@ -337,6 +339,17 @@ typedef struct libxl_osevent_hooks {
* register (or modify), and pass it to subsequent calls to modify
* or deregister.
*
+ * Note that the application must cope with a call from libxl to
+ * timeout_modify racing with its own call to
+ * libxl__osevent_occurred_timeout. libxl guarantees that
+ * timeout_modify will only be called with abs={0,0} but the
+ * application must still ensure that libxl's attempt to cause the
+ * timeout to occur immediately is safely ignored even the timeout is
+ * actually already in the process of occurring.
+ *
+ * timeout_deregister is not used because it forms part of a
+ * deprecated unsafe mode of use of the API.
+ *
* osevent_register_hooks may be called only once for each libxl_ctx.
* libxl may make calls to register/modify/deregister from within
* any libxl function (indeed, it will usually call register from
++++++ 26501-VMX-simplify-CR0-update.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1359566139 28800
# Node ID 8201b6ec3564c80db5516cdcf36dcfa9b7fdd93b
# Parent 1fe8ecfdf10cc9077fc810364663a0f25a5c5b96
vmx: Simplify cr0 update handling by deferring cr4 changes to the cr4 handler.

Signed-off-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1133,20 +1133,18 @@ static void vmx_update_guest_cr(struct v

if ( paging_mode_hap(v->domain) )
{
- /* We manage GUEST_CR3 when guest CR0.PE is zero or when cr3
memevents are on */
+ /* Manage GUEST_CR3 when CR0.PE=0. */
uint32_t cr3_ctls = (CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING);
v->arch.hvm_vmx.exec_control &= ~cr3_ctls;
if ( !hvm_paging_enabled(v) )
v->arch.hvm_vmx.exec_control |= cr3_ctls;

+ /* Trap CR3 updates if CR3 memory events are enabled. */
if ( v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_CR3]
)
v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING;

vmx_update_cpu_exec_control(v);
-
- /* Changing CR0.PE can change some bits in real CR4. */
- vmx_update_guest_cr(v, 4);
}

if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
@@ -1176,8 +1174,6 @@ static void vmx_update_guest_cr(struct v
{
for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ )
vmx_set_segment_register(v, s, &reg[s]);
- v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME;
- __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
v->arch.hvm_vmx.exception_bitmap = 0xffffffff;
vmx_update_exception_bitmap(v);
}
@@ -1187,10 +1183,6 @@ static void vmx_update_guest_cr(struct v
if ( !(v->arch.hvm_vmx.vm86_segment_mask & (1<<s)) )
vmx_set_segment_register(
v, s, &v->arch.hvm_vmx.vm86_saved_seg[s]);
- v->arch.hvm_vcpu.hw_cr[4] =
- ((v->arch.hvm_vcpu.hw_cr[4] & ~X86_CR4_VME)
- |(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VME));
- __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK
| (paging_mode_hap(v->domain) ?
0 : (1U << TRAP_page_fault))
@@ -1204,6 +1196,9 @@ static void vmx_update_guest_cr(struct v
v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
__vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
__vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]);
+
+ /* Changing CR0 can change some bits in real CR4. */
+ vmx_update_guest_cr(v, 4);
break;
}
case 2:
++++++ 26502-VMX-disable-SMEP-when-not-paging.patch ++++++
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
# Date 1359566250 28800
# Node ID d1bf3b21f78302dad1ed53e540facf7b9a0e2ab5
# Parent 8201b6ec3564c80db5516cdcf36dcfa9b7fdd93b
VMX: disable SMEP feature when guest is in non-paging mode

SMEP is disabled if CPU is in non-paging mode in hardware.
However Xen always uses paging mode to emulate guest non-paging
mode with HAP. To emulate this behavior, SMEP needs to be manually
disabled when guest switches to non-paging mode.

We met an issue that, SMP Linux guest with recent kernel (enable
SMEP support, for example, 3.5.3) would crash with triple fault if
setting unrestricted_guest=0 in grub. This is because Xen uses an
identity mapping page table to emulate the non-paging mode, where
the page table is set with USER flag. If SMEP is still enabled in
this case, guest will meet unhandlable page fault and then crash.

Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
Signed-off-by: Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1227,6 +1227,13 @@ static void vmx_update_guest_cr(struct v
{
v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE;
v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
+ /*
+ * SMEP is disabled if CPU is in non-paging mode in hardware.
+ * However Xen always uses paging mode to emulate guest non-paging
+ * mode with HAP. To emulate this behavior, SMEP needs to be
+ * manually disabled when guest switches to non-paging mode.
+ */
+ v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_SMEP;
}
__vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
++++++ 26516-ACPI-parse-table-retval.patch ++++++
References: CVE-2013-0153 XSA-36 bnc#800275

# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
# Date 1360073898 -3600
# Node ID 32d4516a97f0b22ed06155f7b8e0bff075024991
# Parent 2fdca30363f08026971c094e8a1a84e19ca3e55b
ACPI: acpi_table_parse() should return handler's error code

Currently, the error code returned by acpi_table_parse()'s handler
is ignored. This patch will propagate handler's return value to
acpi_table_parse()'s caller.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/drivers/acpi/tables.c
+++ b/xen/drivers/acpi/tables.c
@@ -267,7 +267,7 @@ acpi_table_parse_madt(enum acpi_madt_typ
* @handler: handler to run
*
* Scan the ACPI System Descriptor Table (STD) for a table matching @id,
- * run @handler on it. Return 0 if table found, return on if not.
+ * run @handler on it.
*/
int __init acpi_table_parse(char *id, acpi_table_handler handler)
{
@@ -282,8 +282,7 @@ int __init acpi_table_parse(char *id, ac
acpi_get_table(id, 0, &table);

if (table) {
- handler(table);
- return 0;
+ return handler(table);
} else
return 1;
}
++++++ 26517-AMD-IOMMU-clear-irtes.patch ++++++
References: CVE-2013-0153 XSA-36 bnc#800275

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1360074047 -3600
# Node ID 601139e2b0db7dc8a5bb69b9b7373fb87742741c
# Parent 32d4516a97f0b22ed06155f7b8e0bff075024991
AMD,IOMMU: Clean up old entries in remapping tables when creating new one

When changing the affinity of an IRQ associated with a passed
through PCI device, clear previous mapping.

This is XSA-36 / CVE-2013-0153.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

In addition, because some BIOSes may incorrectly program IVRS
entries for IOAPIC try to check for entry's consistency. Specifically,
if conflicting entries are found disable IOMMU if per-device
remapping table is used. If entries refer to bogus IOAPIC IDs
disable IOMMU unconditionally

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -22,6 +22,7 @@
#include <xen/errno.h>
#include <xen/acpi.h>
#include <asm/apicdef.h>
+#include <asm/io_apic.h>
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>

@@ -635,6 +636,7 @@ static u16 __init parse_ivhd_device_spec
u16 header_length, u16 block_length, struct amd_iommu *iommu)
{
u16 dev_length, bdf;
+ int apic;

dev_length = sizeof(*special);
if ( header_length < (block_length + dev_length) )
@@ -651,10 +653,59 @@ static u16 __init parse_ivhd_device_spec
}

add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, iommu);
- /* set device id of ioapic */
- ioapic_sbdf[special->handle].bdf = bdf;
- ioapic_sbdf[special->handle].seg = seg;
- return dev_length;
+
+ if ( special->variety != ACPI_IVHD_IOAPIC )
+ {
+ if ( special->variety != ACPI_IVHD_HPET )
+ printk(XENLOG_ERR "Unrecognized IVHD special variety %#x\n",
+ special->variety);
+ return dev_length;
+ }
+
+ /*
+ * Some BIOSes have IOAPIC broken entries so we check for IVRS
+ * consistency here --- whether entry's IOAPIC ID is valid and
+ * whether there are conflicting/duplicated entries.
+ */
+ for ( apic = 0; apic < nr_ioapics; apic++ )
+ {
+ if ( IO_APIC_ID(apic) != special->handle )
+ continue;
+
+ if ( ioapic_sbdf[special->handle].pin_setup )
+ {
+ if ( ioapic_sbdf[special->handle].bdf == bdf &&
+ ioapic_sbdf[special->handle].seg == seg )
+ AMD_IOMMU_DEBUG("IVHD Warning: Duplicate IO-APIC %#x
entries\n",
+ special->handle);
+ else
+ {
+ printk(XENLOG_ERR "IVHD Error: Conflicting IO-APIC %#x
entries\n",
+ special->handle);
+ if ( amd_iommu_perdev_intremap )
+ return 0;
+ }
+ }
+ else
+ {
+ /* set device id of ioapic */
+ ioapic_sbdf[special->handle].bdf = bdf;
+ ioapic_sbdf[special->handle].seg = seg;
+
+ ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
+ unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+ if ( nr_ioapic_entries[apic] &&
+ !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ {
+ printk(XENLOG_ERR "IVHD Error: Out of memory\n");
+ return 0;
+ }
+ }
+ return dev_length;
+ }
+
+ printk(XENLOG_ERR "IVHD Error: Invalid IO-APIC %#x\n", special->handle);
+ return 0;
}

static int __init parse_ivhd_block(const struct acpi_ivrs_hardware *ivhd_block)
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -99,12 +99,12 @@ static void update_intremap_entry(u32* e
static void update_intremap_entry_from_ioapic(
int bdf,
struct amd_iommu *iommu,
- struct IO_APIC_route_entry *ioapic_rte)
+ const struct IO_APIC_route_entry *rte,
+ const struct IO_APIC_route_entry *old_rte)
{
unsigned long flags;
u32* entry;
u8 delivery_mode, dest, vector, dest_mode;
- struct IO_APIC_route_entry *rte = ioapic_rte;
int req_id;
spinlock_t *lock;
int offset;
@@ -120,6 +120,14 @@ static void update_intremap_entry_from_i
spin_lock_irqsave(lock, flags);

offset = get_intremap_offset(vector, delivery_mode);
+ if ( old_rte )
+ {
+ int old_offset = get_intremap_offset(old_rte->vector,
+ old_rte->delivery_mode);
+
+ if ( offset != old_offset )
+ free_intremap_entry(iommu->seg, bdf, old_offset);
+ }
entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);

@@ -188,6 +196,7 @@ int __init amd_iommu_setup_ioapic_remapp
amd_iommu_flush_intremap(iommu, req_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
+ set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
}
}
return 0;
@@ -199,6 +208,7 @@ void amd_iommu_ioapic_update_ire(
struct IO_APIC_route_entry old_rte = { 0 };
struct IO_APIC_route_entry new_rte = { 0 };
unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
+ unsigned int pin = (reg - 0x10) / 2;
int saved_mask, seg, bdf;
struct amd_iommu *iommu;

@@ -236,6 +246,14 @@ void amd_iommu_ioapic_update_ire(
*(((u32 *)&new_rte) + 1) = value;
}

+ if ( new_rte.mask &&
+ !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
+ {
+ ASSERT(saved_mask);
+ __io_apic_write(apic, reg, value);
+ return;
+ }
+
/* mask the interrupt while we change the intremap table */
if ( !saved_mask )
{
@@ -244,7 +262,11 @@ void amd_iommu_ioapic_update_ire(
}

/* Update interrupt remapping entry */
- update_intremap_entry_from_ioapic(bdf, iommu, &new_rte);
+ update_intremap_entry_from_ioapic(
+ bdf, iommu, &new_rte,
+ test_and_set_bit(pin,
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
+ : NULL);

/* Forward write access to IO-APIC RTE */
__io_apic_write(apic, reg, value);
@@ -354,6 +376,12 @@ void amd_iommu_msi_msg_update_ire(
return;
}

+ if ( msi_desc->remap_index >= 0 )
+ update_intremap_entry_from_msi_msg(iommu, pdev, msi_desc, NULL);
+
+ if ( !msg )
+ return;
+
update_intremap_entry_from_msi_msg(iommu, pdev, msi_desc, msg);
}

--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -100,6 +100,7 @@ void amd_iommu_read_msi_from_ire(

extern struct ioapic_sbdf {
u16 bdf, seg;
+ unsigned long *pin_setup;
} ioapic_sbdf[MAX_IO_APICS];
extern void *shared_intremap_table;

++++++ 26518-AMD-IOMMU-disable-if-SATA-combined-mode.patch ++++++
References: CVE-2013-0153 XSA-36 bnc#800275

# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
# Date 1360074085 -3600
# Node ID e379a23b04655e9e43dc50944a5c9d1e59d8bee9
# Parent 601139e2b0db7dc8a5bb69b9b7373fb87742741c
AMD,IOMMU: Disable IOMMU if SATA Combined mode is on

AMD's SP5100 chipset can be placed into SATA Combined mode
that may cause prevent dom0 from booting when IOMMU is
enabled and per-device interrupt remapping table is used.
While SP5100 erratum 28 requires BIOSes to disable this mode,
some may still use it.

This patch checks whether this mode is on and, if per-device
table is in use, disables IOMMU.

This is XSA-36 / CVE-2013-0153.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>

Flipped operands of && in amd_iommu_init() to make the message issued
by amd_sp5100_erratum28() match reality (when amd_iommu_perdev_intremap
is zero, there's really no point in calling the function).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -1118,12 +1118,45 @@ static int __init amd_iommu_setup_device
return 0;
}

+/* Check whether SP5100 SATA Combined mode is on */
+static bool_t __init amd_sp5100_erratum28(void)
+{
+ u32 bus, id;
+ u16 vendor_id, dev_id;
+ u8 byte;
+
+ for (bus = 0; bus < 256; bus++)
+ {
+ id = pci_conf_read32(0, bus, 0x14, 0, PCI_VENDOR_ID);
+
+ vendor_id = id & 0xffff;
+ dev_id = (id >> 16) & 0xffff;
+
+ /* SP5100 SMBus module sets Combined mode on */
+ if (vendor_id != 0x1002 || dev_id != 0x4385)
+ continue;
+
+ byte = pci_conf_read8(0, bus, 0x14, 0, 0xad);
+ if ( (byte >> 3) & 1 )
+ {
+ printk(XENLOG_WARNING "AMD-Vi: SP5100 erratum 28 detected,
disabling IOMMU.\n"
+ "If possible, disable SATA Combined mode in BIOS or contact
your vendor for BIOS update.\n");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
int __init amd_iommu_init(void)
{
struct amd_iommu *iommu;

BUG_ON( !iommu_found() );

+ if ( amd_iommu_perdev_intremap && amd_sp5100_erratum28() )
+ goto error_out;
+
ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();

if ( !ivrs_bdf_entries )
++++++ 26519-AMD-IOMMU-perdev-intremap-default.patch ++++++
References: CVE-2013-0153 XSA-36 bnc#800275

# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
# Date 1360074131 -3600
# Node ID 1af531e7bc2fc518f16d8d1461083c528e1517cf
# Parent e379a23b04655e9e43dc50944a5c9d1e59d8bee9
AMD,IOMMU: Make per-device interrupt remapping table default

Using global interrupt remapping table may be insecure, as
described by XSA-36. This patch makes per-device mode default.

This is XSA-36 / CVE-2013-0153.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>

Moved warning in amd_iov_detect() to location covering all cases.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1942,9 +1942,6 @@ int map_domain_pirq(
spin_lock_irqsave(&desc->lock, flags);
set_domain_irq_pirq(d, irq, info);
spin_unlock_irqrestore(&desc->lock, flags);
-
- if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV )
- printk(XENLOG_INFO "Per-device vector maps for GSIs not
implemented yet.\n");
}

done:
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -204,6 +204,8 @@ int __init amd_iov_detect(void)
{
printk("AMD-Vi: Not overriding irq_vector_map setting\n");
}
+ if ( !amd_iommu_perdev_intremap )
+ printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is
not recommended (see XSA-36)!\n");
return scan_pci_devices();
}

--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -52,7 +52,7 @@ bool_t __read_mostly iommu_qinval = 1;
bool_t __read_mostly iommu_intremap = 1;
bool_t __read_mostly iommu_hap_pt_share = 1;
bool_t __read_mostly iommu_debug;
-bool_t __read_mostly amd_iommu_perdev_intremap;
+bool_t __read_mostly amd_iommu_perdev_intremap = 1;

DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);

++++++ 26526-pvdrv-no-devinit.patch ++++++
# HG changeset patch
# User Olaf Hering <olaf@xxxxxxxxx>
# Date 1360664991 -3600
# Node ID a37aa55c3cbcb0e8340b4985314ef8fb31d7610b
# Parent 9af6e566befe5516e66b62197813aa22e1d7122c
unmodified_drivers: __devinit was removed in linux-3.8

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>

Merge with __init handling.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
+++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
@@ -13,10 +13,19 @@
#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
#endif

-#if defined(_LINUX_INIT_H) && !defined(__init)
+#ifdef _LINUX_INIT_H
+
+#ifndef __init
#define __init
#endif

+#ifndef __devinit
+#define __devinit
+#define __devinitdata
+#endif
+
+#endif /* _LINUX_INIT_H */
+
#if defined(__LINUX_CACHE_H) && !defined(__read_mostly)
#define __read_mostly
#endif
++++++ 26529-gcc48-build-fix.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1360775011 0
# Node ID 97b7e546e2e4a021491e198a33f7d685550ebc73
# Parent 742dde457258422a3d08e3ddbf9a7eae55c93acb
gcc4.8 build fix: Add -Wno-unused-local-typedefs to CFLAGS.

Based on a patch by M A Young <m.a.young@xxxxxxxxxxxx>

Signed-off-by: Keir Fraser <keir@xxxxxxx>

--- a/Config.mk
+++ b/Config.mk
@@ -166,6 +166,7 @@ CFLAGS-$(clang) += -Wno-parentheses -Wno
$(call cc-option-add,HOSTCFLAGS,HOSTCC,-Wdeclaration-after-statement)
$(call cc-option-add,CFLAGS,CC,-Wdeclaration-after-statement)
$(call cc-option-add,CFLAGS,CC,-Wno-unused-but-set-variable)
+$(call cc-option-add,CFLAGS,CC,-Wno-unused-local-typedefs)

LDFLAGS += $(foreach i, $(EXTRA_LIB), -L$(i))
CFLAGS += $(foreach i, $(EXTRA_INCLUDES), -I$(i))
++++++ 26531-AMD-IOMMU-IVHD-special-missing.patch ++++++
References: CVE-2013-0153 XSA-36 bnc#800275

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1360831252 -3600
# Node ID e68f14b9e73925e9d404e517ba510f73fe472e4e
# Parent c43be17eec0602015fc6461d1f13c992ba330c20
AMD IOMMU: also spot missing IO-APIC entries in IVRS table

Apart from dealing duplicate conflicting entries, we also have to
handle firmware omitting IO-APIC entries in IVRS altogether. Not doing
so has resulted in c/s 26517:601139e2b0db to crash such systems during
boot (whereas with the change here the IOMMU gets disabled just as is
being done in the other cases, i.e. unless global tables are being
used).

Debugging this issue has also pointed out that the debug log output is
pretty ugly to look at - consolidate the output, and add one extra
item for the IVHD special entries, so that future issues are easier
to analyze.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Tested-by: Sander Eikelenboom <linux@xxxxxxxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -352,9 +352,8 @@ static int __init parse_ivmd_block(const
base = start_addr & PAGE_MASK;
limit = (start_addr + mem_length - 1) & PAGE_MASK;

- AMD_IOMMU_DEBUG("IVMD Block: Type 0x%x\n",ivmd_block->header.type);
- AMD_IOMMU_DEBUG(" Start_Addr_Phys 0x%lx\n", start_addr);
- AMD_IOMMU_DEBUG(" Mem_Length 0x%lx\n", mem_length);
+ AMD_IOMMU_DEBUG("IVMD Block: type %#x phys %#lx len %#lx\n",
+ ivmd_block->header.type, start_addr, mem_length);

if ( ivmd_block->header.flags & ACPI_IVMD_EXCLUSION_RANGE )
iw = ir = IOMMU_CONTROL_ENABLED;
@@ -549,8 +548,8 @@ static u16 __init parse_ivhd_device_alia
return 0;
}

- AMD_IOMMU_DEBUG(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf);
- AMD_IOMMU_DEBUG(" Dev_Id Alias: 0x%x\n", alias_id);
+ AMD_IOMMU_DEBUG(" Dev_Id Range: %#x -> %#x alias %#x\n",
+ first_bdf, last_bdf, alias_id);

for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
add_ivrs_mapping_entry(bdf, alias_id, range->alias.header.data_setting,
@@ -652,6 +651,9 @@ static u16 __init parse_ivhd_device_spec
return 0;
}

+ AMD_IOMMU_DEBUG("IVHD Special: %04x:%02x:%02x.%u variety %#x handle %#x\n",
+ seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf),
+ special->variety, special->handle);
add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, iommu);

if ( special->variety != ACPI_IVHD_IOAPIC )
@@ -737,10 +739,9 @@ static int __init parse_ivhd_block(const
{
ivhd_device = (const void *)((const u8 *)ivhd_block + block_length);

- AMD_IOMMU_DEBUG( "IVHD Device Entry:\n");
- AMD_IOMMU_DEBUG( " Type 0x%x\n", ivhd_device->header.type);
- AMD_IOMMU_DEBUG( " Dev_Id 0x%x\n", ivhd_device->header.id);
- AMD_IOMMU_DEBUG( " Flags 0x%x\n", ivhd_device->header.data_setting);
+ AMD_IOMMU_DEBUG("IVHD Device Entry: type %#x id %#x flags %#x\n",
+ ivhd_device->header.type, ivhd_device->header.id,
+ ivhd_device->header.data_setting);

switch ( ivhd_device->header.type )
{
@@ -869,6 +870,7 @@ static int __init parse_ivrs_table(struc
{
const struct acpi_ivrs_header *ivrs_block;
unsigned long length;
+ unsigned int apic;
int error = 0;

BUG_ON(!table);
@@ -882,11 +884,9 @@ static int __init parse_ivrs_table(struc
{
ivrs_block = (struct acpi_ivrs_header *)((u8 *)table + length);

- AMD_IOMMU_DEBUG("IVRS Block:\n");
- AMD_IOMMU_DEBUG(" Type 0x%x\n", ivrs_block->type);
- AMD_IOMMU_DEBUG(" Flags 0x%x\n", ivrs_block->flags);
- AMD_IOMMU_DEBUG(" Length 0x%x\n", ivrs_block->length);
- AMD_IOMMU_DEBUG(" Dev_Id 0x%x\n", ivrs_block->device_id);
+ AMD_IOMMU_DEBUG("IVRS Block: type %#x flags %#x len %#x id %#x\n",
+ ivrs_block->type, ivrs_block->flags,
+ ivrs_block->length, ivrs_block->device_id);

if ( table->length < (length + ivrs_block->length) )
{
@@ -901,6 +901,29 @@ static int __init parse_ivrs_table(struc
length += ivrs_block->length;
}

+ /* Each IO-APIC must have been mentioned in the table. */
+ for ( apic = 0; !error && apic < nr_ioapics; ++apic )
+ {
+ if ( !nr_ioapic_entries[apic] ||
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ continue;
+
+ printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
+ IO_APIC_ID(apic));
+ if ( amd_iommu_perdev_intremap )
+ error = -ENXIO;
+ else
+ {
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
+ unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+ if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ {
+ printk(XENLOG_ERR "IVHD Error: Out of memory\n");
+ error = -ENOMEM;
+ }
+ }
+ }
+
return error;
}

++++++ 26532-AMD-IOMMU-phantom-MSI.patch ++++++
References: bnc#787169

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1360831377 -3600
# Node ID 788f4551580d476e13ea907e373e58806a32179e
# Parent e68f14b9e73925e9d404e517ba510f73fe472e4e
AMD IOMMU: handle MSI for phantom functions

With ordinary requests allowed to come from phantom functions, the
remapping tables ought to be set up to also allow for MSI triggers to
come from other than the "real" device too.

It is not clear to me whether the alias-ID handling also needs
adjustment for this to work properly, or whether firmware can be
expected to properly express this through a device alias range
descriptor (or multiple device alias ones).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -284,33 +284,32 @@ void amd_iommu_ioapic_update_ire(
}

static void update_intremap_entry_from_msi_msg(
- struct amd_iommu *iommu, struct pci_dev *pdev,
- struct msi_desc *msi_desc, struct msi_msg *msg)
+ struct amd_iommu *iommu, u16 bdf,
+ int *remap_index, const struct msi_msg *msg)
{
unsigned long flags;
u32* entry;
- u16 bdf, req_id, alias_id;
+ u16 req_id, alias_id;
u8 delivery_mode, dest, vector, dest_mode;
spinlock_t *lock;
int offset;

- bdf = (pdev->bus << 8) | pdev->devfn;
- req_id = get_dma_requestor_id(pdev->seg, bdf);
- alias_id = get_intremap_requestor_id(pdev->seg, bdf);
+ req_id = get_dma_requestor_id(iommu->seg, bdf);
+ alias_id = get_intremap_requestor_id(iommu->seg, bdf);

if ( msg == NULL )
{
lock = get_intremap_lock(iommu->seg, req_id);
spin_lock_irqsave(lock, flags);
- free_intremap_entry(iommu->seg, req_id, msi_desc->remap_index);
+ free_intremap_entry(iommu->seg, req_id, *remap_index);
spin_unlock_irqrestore(lock, flags);

if ( ( req_id != alias_id ) &&
- get_ivrs_mappings(pdev->seg)[alias_id].intremap_table != NULL )
+ get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
{
lock = get_intremap_lock(iommu->seg, alias_id);
spin_lock_irqsave(lock, flags);
- free_intremap_entry(iommu->seg, alias_id, msi_desc->remap_index);
+ free_intremap_entry(iommu->seg, alias_id, *remap_index);
spin_unlock_irqrestore(lock, flags);
}
goto done;
@@ -324,7 +323,10 @@ static void update_intremap_entry_from_m
vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
offset = get_intremap_offset(vector, delivery_mode);
- msi_desc->remap_index = offset;
+ if ( *remap_index < 0)
+ *remap_index = offset;
+ else
+ BUG_ON(*remap_index != offset);

entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
@@ -339,7 +341,7 @@ static void update_intremap_entry_from_m

lock = get_intremap_lock(iommu->seg, alias_id);
if ( ( req_id != alias_id ) &&
- get_ivrs_mappings(pdev->seg)[alias_id].intremap_table != NULL )
+ get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
{
spin_lock_irqsave(lock, flags);
entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
@@ -362,27 +364,44 @@ void amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
struct pci_dev *pdev = msi_desc->dev;
+ int bdf = PCI_BDF2(pdev->bus, pdev->devfn);
struct amd_iommu *iommu = NULL;

if ( !iommu_intremap )
return;

- iommu = find_iommu_for_device(pdev->seg, (pdev->bus << 8) | pdev->devfn);
-
+ iommu = find_iommu_for_device(pdev->seg, bdf);
if ( !iommu )
{
- AMD_IOMMU_DEBUG("Fail to find iommu for MSI device id = 0x%x\n",
- (pdev->bus << 8) | pdev->devfn);
+ AMD_IOMMU_DEBUG("Fail to find iommu for MSI device id = 0x%x\n", bdf);
return;
}

if ( msi_desc->remap_index >= 0 )
- update_intremap_entry_from_msi_msg(iommu, pdev, msi_desc, NULL);
+ {
+ do {
+ update_intremap_entry_from_msi_msg(iommu, bdf,
+ &msi_desc->remap_index, NULL);
+ if ( !pdev || !pdev->phantom_stride )
+ break;
+ bdf += pdev->phantom_stride;
+ } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
+
+ msi_desc->remap_index = -1;
+ if ( pdev )
+ bdf = PCI_BDF2(pdev->bus, pdev->devfn);
+ }

if ( !msg )
return;

- update_intremap_entry_from_msi_msg(iommu, pdev, msi_desc, msg);
+ do {
+ update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
+ msg);
+ if ( !pdev || !pdev->phantom_stride )
+ break;
+ bdf += pdev->phantom_stride;
+ } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
}

void amd_iommu_read_msi_from_ire(
++++++ 26536-xenoprof-div-by-0.patch ++++++
# HG changeset patch
# User Tim Deegan <tim@xxxxxxx>
# Date 1360917722 -3600
# Node ID 0cca8a18432f08b342d76a753aa98559d892f592
# Parent 7af3c38ae187b351c5cea58e9eee482b50d814d8
xenoprof: avoid division by 0

Signed-off-by: Tim Deegan <tim@xxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/common/xenoprof.c
+++ b/xen/common/xenoprof.c
@@ -193,6 +193,13 @@ static int alloc_xenoprof_struct(
unsigned max_max_samples;
int i;

+ nvcpu = 0;
+ for_each_vcpu ( d, v )
+ nvcpu++;
+
+ if ( !nvcpu )
+ return -EINVAL;
+
d->xenoprof = xzalloc(struct xenoprof);
if ( d->xenoprof == NULL )
{
@@ -209,10 +216,6 @@ static int alloc_xenoprof_struct(
return -ENOMEM;
}

- nvcpu = 0;
- for_each_vcpu ( d, v )
- nvcpu++;
-
bufsize = sizeof(struct xenoprof_buf);
i = sizeof(struct event_log);
#ifdef CONFIG_COMPAT
++++++ 26547-tools-xc_fix_logic_error_in_stdiostream_progress.patch ++++++
changeset: 26547:8285d20a6f5b
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Feb 15 13:32:11 2013 +0000
files: tools/libxc/xtl_logger_stdio.c
description:
tools/xc: fix logic error in stdiostream_progress

Setting XTL_STDIOSTREAM_HIDE_PROGRESS should disable progress reporting.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r 0141aeb86b79 -r 8285d20a6f5b tools/libxc/xtl_logger_stdio.c
--- a/tools/libxc/xtl_logger_stdio.c Fri Feb 15 13:32:10 2013 +0000
+++ b/tools/libxc/xtl_logger_stdio.c Fri Feb 15 13:32:11 2013 +0000
@@ -89,7 +89,7 @@ static void stdiostream_progress(struct
int newpel, extra_erase;
xentoollog_level this_level;

- if (!(lg->flags & XTL_STDIOSTREAM_HIDE_PROGRESS))
+ if (lg->flags & XTL_STDIOSTREAM_HIDE_PROGRESS)
return;

if (percent < lg->progress_last_percent) {
++++++
26548-tools-xc_handle_tty_output_differently_in_stdiostream_progress.patch
++++++
changeset: 26548:e7d9bac5c11d
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Feb 15 13:32:11 2013 +0000
files: tools/libxc/xtl_logger_stdio.c
description:
tools/xc: handle tty output differently in stdiostream_progress

If the output goes to a tty, rewind the cursor and print everything in a
single line as it was done up to now. If the output goes to a file or
pipe print a newline after each progress output. This will fix logging
of progress messages from xc_save to xend.log.

To support XTL_STDIOSTREAM_SHOW_PID or XTL_STDIOSTREAM_SHOW_DATE print
the output via vmessage if the output is not a tty.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r 8285d20a6f5b -r e7d9bac5c11d tools/libxc/xtl_logger_stdio.c
--- a/tools/libxc/xtl_logger_stdio.c Fri Feb 15 13:32:11 2013 +0000
+++ b/tools/libxc/xtl_logger_stdio.c Fri Feb 15 13:32:11 2013 +0000
@@ -81,6 +81,17 @@ static void stdiostream_vmessage(xentool
fflush(lg->f);
}

+static void stdiostream_message(struct xentoollog_logger *logger_in,
+ xentoollog_level level,
+ const char *context,
+ const char *format, ...)
+{
+ va_list al;
+ va_start(al,format);
+ stdiostream_vmessage(logger_in, level, -1, context, format, al);
+ va_end(al);
+}
+
static void stdiostream_progress(struct xentoollog_logger *logger_in,
const char *context,
const char *doing_what, int percent,
@@ -105,11 +116,18 @@ static void stdiostream_progress(struct
if (this_level < lg->min_level)
return;

+ lg->progress_last_percent = percent;
+
+ if (isatty(fileno(lg->f)) <= 0) {
+ stdiostream_message(logger_in, this_level, context,
+ "%s: %lu/%lu %3d%%",
+ doing_what, done, total, percent);
+ return;
+ }
+
if (lg->progress_erase_len)
putc('\r', lg->f);

- lg->progress_last_percent = percent;
-
newpel = fprintf(lg->f, "%s%s" "%s: %lu/%lu %3d%%%s",
context?context:"", context?": ":"",
doing_what, done, total, percent,
++++++ 26549-tools-xc_turn_XCFLAGS__into_shifts.patch ++++++
changeset: 26549:d2991367ecd2
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Feb 15 13:32:12 2013 +0000
files: tools/libxc/xenguest.h
description:
tools/xc: turn XCFLAGS_* into shifts

to make it clear that these are bits and to make it easier to use in
xend code.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r e7d9bac5c11d -r d2991367ecd2 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Fri Feb 15 13:32:11 2013 +0000
+++ b/tools/libxc/xenguest.h Fri Feb 15 13:32:12 2013 +0000
@@ -23,11 +23,12 @@
#ifndef XENGUEST_H
#define XENGUEST_H

-#define XCFLAGS_LIVE 1
-#define XCFLAGS_DEBUG 2
-#define XCFLAGS_HVM 4
-#define XCFLAGS_STDVGA 8
-#define XCFLAGS_CHECKPOINT_COMPRESS 16
+#define XCFLAGS_LIVE (1 << 0)
+#define XCFLAGS_DEBUG (1 << 1)
+#define XCFLAGS_HVM (1 << 2)
+#define XCFLAGS_STDVGA (1 << 3)
+#define XCFLAGS_CHECKPOINT_COMPRESS (1 << 4)
+
#define X86_64_B_SIZE 64
#define X86_32_B_SIZE 32

++++++ 26550-tools-xc_restore_logging_in_xc_save.patch ++++++
changeset: 26550:e6c373fcb73e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Feb 15 13:32:13 2013 +0000
files: tools/xcutils/xc_save.c
description:
tools/xc: restore logging in xc_save

Prior to xen-4.1 the helper xc_save would print some progress during
migration. With the new xc_interface_open API no more messages were
printed because no logger was configured.

Restore previous behaviour by providing a logger. The progress in
xc_domain_save will be disabled because it generates alot of output and
fills up xend.log quickly.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


diff -r d2991367ecd2 -r e6c373fcb73e tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c Fri Feb 15 13:32:12 2013 +0000
+++ b/tools/xcutils/xc_save.c Fri Feb 15 13:32:13 2013 +0000
@@ -166,17 +166,15 @@ static int switch_qemu_logdirty(int domi
int
main(int argc, char **argv)
{
- unsigned int maxit, max_f;
+ unsigned int maxit, max_f, lflags;
int io_fd, ret, port;
struct save_callbacks callbacks;
+ xentoollog_level lvl;
+ xentoollog_logger *l;

if (argc != 6)
errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);

- si.xch = xc_interface_open(0,0,0);
- if (!si.xch)
- errx(1, "failed to open control interface");
-
io_fd = atoi(argv[1]);
si.domid = atoi(argv[2]);
maxit = atoi(argv[3]);
@@ -185,6 +183,13 @@ main(int argc, char **argv)

si.suspend_evtchn = -1;

+ lvl = si.flags & XCFLAGS_DEBUG ? XTL_DEBUG: XTL_DETAIL;
+ lflags = XTL_STDIOSTREAM_HIDE_PROGRESS;
+ l = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr, lvl, lflags);
+ si.xch = xc_interface_open(l, 0, 0);
+ if (!si.xch)
+ errx(1, "failed to open control interface");
+
si.xce = xc_evtchn_open(NULL, 0);
if (si.xce == NULL)
warnx("failed to open event channel handle");
++++++ 26551-tools-xc_log_pid_in_xc_save-xc_restore_output.patch ++++++
changeset: 26551:48f9436959dd
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Feb 15 13:32:13 2013 +0000
files: tools/libxc/xc_domain_restore.c tools/libxc/xc_domain_save.c
tools/xcutils/xc_restore.c tools/xcutils/xc_save.c
description:
tools/xc: log pid in xc_save/xc_restore output

If several migrations log their output to xend.log its not clear which
line belongs to a which guest. Print entry/exit of xc_save and
xc_restore and also request to print pid with each log call.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>


Index: xen-4.2.1-testing/tools/libxc/xc_domain_restore.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxc/xc_domain_restore.c
+++ xen-4.2.1-testing/tools/libxc/xc_domain_restore.c
@@ -1382,6 +1382,8 @@ int xc_domain_restore(xc_interface *xch,
struct restore_ctx *ctx = &_ctx;
struct domain_info_context *dinfo = &ctx->dinfo;

+ DPRINTF("%s: starting restore of new domid %u", __func__, dom);
+
pagebuf_init(&pagebuf);
memset(&tailbuf, 0, sizeof(tailbuf));
tailbuf.ishvm = hvm;
@@ -1408,7 +1410,7 @@ int xc_domain_restore(xc_interface *xch,
PERROR("read: p2m_size");
goto out;
}
- DPRINTF("xc_domain_restore start: p2m_size = %lx\n", dinfo->p2m_size);
+ DPRINTF("%s: p2m_size = %lx\n", __func__, dinfo->p2m_size);

if ( !get_platform_info(xch, dom,
&ctx->max_mfn, &ctx->hvirt_start, &ctx->pt_levels,
&dinfo->guest_width) )
@@ -2215,7 +2217,7 @@ int xc_domain_restore(xc_interface *xch,

fcntl(io_fd, F_SETFL, orig_io_fd_flags);

- DPRINTF("Restore exit with rc=%d\n", rc);
+ DPRINTF("Restore exit of domid %u with rc=%d\n", dom, rc);

return rc;
}
Index: xen-4.2.1-testing/tools/libxc/xc_domain_save.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxc/xc_domain_save.c
+++ xen-4.2.1-testing/tools/libxc/xc_domain_save.c
@@ -897,6 +897,8 @@ int xc_domain_save(xc_interface *xch, in

int completed = 0;

+ DPRINTF("%s: starting save of domid %u", __func__, dom);
+
if ( hvm && !callbacks->switch_qemu_logdirty )
{
ERROR("No switch_qemu_logdirty callback provided.");
@@ -2112,7 +2114,7 @@ int xc_domain_save(xc_interface *xch, in
free(pfn_err);
free(to_fix);

- DPRINTF("Save exit rc=%d\n",rc);
+ DPRINTF("Save exit of domid %u with rc=%d\n", dom, rc);

return !!rc;
}
Index: xen-4.2.1-testing/tools/xcutils/xc_restore.c
===================================================================
--- xen-4.2.1-testing.orig/tools/xcutils/xc_restore.c
+++ xen-4.2.1-testing/tools/xcutils/xc_restore.c
@@ -19,17 +19,22 @@ int
main(int argc, char **argv)
{
unsigned int domid, store_evtchn, console_evtchn;
- unsigned int hvm, pae, apic;
+ unsigned int hvm, pae, apic, lflags;
xc_interface *xch;
int io_fd, ret;
int superpages;
unsigned long store_mfn, console_mfn;
+ xentoollog_level lvl;
+ xentoollog_logger *l;

if ( (argc != 8) && (argc != 9) )
errx(1, "usage: %s iofd domid store_evtchn "
"console_evtchn hvm pae apic [superpages]", argv[0]);

- xch = xc_interface_open(0,0,0);
+ lvl = XTL_DETAIL;
+ lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS;
+ l = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr, lvl, lflags);
+ xch = xc_interface_open(l, 0, 0);
if ( !xch )
errx(1, "failed to open control interface");

Index: xen-4.2.1-testing/tools/xcutils/xc_save.c
===================================================================
--- xen-4.2.1-testing.orig/tools/xcutils/xc_save.c
+++ xen-4.2.1-testing/tools/xcutils/xc_save.c
@@ -184,7 +184,7 @@ main(int argc, char **argv)
si.suspend_evtchn = -1;

lvl = si.flags & XCFLAGS_DEBUG ? XTL_DEBUG: XTL_DETAIL;
- lflags = XTL_STDIOSTREAM_HIDE_PROGRESS;
+ lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS;
l = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr, lvl, lflags);
si.xch = xc_interface_open(l, 0, 0);
if (!si.xch)
++++++ 26554-hvm-firmware-passthrough.patch ++++++
# HG changeset patch
# User Ross Philipson <ross.philipson@xxxxxxxxxx>
# Date 1360935136 0
# Node ID 3124ab7855fd7d4e0f3ea125cb21b60d693e8800
# Parent 71c15ae0998378b5c117bbd27a48015757685706
libxl: switch to using the new xc_hvm_build() libxc API.

Signed-off-by: Ross Philipson <ross.philipson@xxxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

diff -r 71c15ae09983 -r 3124ab7855fd tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c Fri Feb 15 13:32:15 2013 +0000
+++ b/tools/libxl/libxl_dom.c Fri Feb 15 13:32:16 2013 +0000
@@ -542,17 +542,24 @@ int libxl__build_hvm(libxl__gc *gc, uint
libxl__domain_build_state *state)
{
libxl_ctx *ctx = libxl__gc_owner(gc);
+ struct xc_hvm_build_args args = {};
int ret, rc = ERROR_FAIL;
const char *firmware = libxl__domain_firmware(gc, info);

if (!firmware)
goto out;
- ret = xc_hvm_build_target_mem(
- ctx->xch,
- domid,
- (info->max_memkb - info->video_memkb) / 1024,
- (info->target_memkb - info->video_memkb) / 1024,
- firmware);
+
+ memset(&args, 0, sizeof(struct xc_hvm_build_args));
+ /* The params from the configuration file are in Mb, which are then
+ * multiplied by 1 Kb. This was then divided off when calling
+ * the old xc_hvm_build_target_mem() which then turned them to bytes.
+ * Do all this in one step here...
+ */
+ args.mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
+ args.mem_target = (uint64_t)(info->target_memkb - info->video_memkb) << 10;
+ args.image_file_name = firmware;
+
+ ret = xc_hvm_build(ctx->xch, domid, &args);
if (ret) {
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, ret, "hvm building failed");
goto out;
++++++ 26555-hvm-firmware-passthrough.patch ++++++
# HG changeset patch
# User Ross Philipson <ross.philipson@xxxxxxxxxx>
# Date 1360935136 0
# Node ID 17a228e37ec0913ff86b8b5f2d88f1b8e92146f1
# Parent 3124ab7855fd7d4e0f3ea125cb21b60d693e8800
libxl: HVM firmware passthrough support

This patch introduces support for two new parameters in libxl:

smbios_firmware=<path_to_smbios_structures_file>
acpi_firmware=<path_to_acpi_tables_file>

The changes are primarily in the domain building code where the firmware files
are read and passed to libxc for loading into the new guest. After the domain
building call to libxc, the addresses for the loaded blobs are returned and
written to xenstore.

LIBXL_HAVE_FIRMWARE_PASSTHROUGH is defined in libxl.h to allow users to
determine if the feature is present.

This patch also updates the xl.cfg man page with descriptions of the two new
parameters for firmware passthrough.

Signed-off-by: Ross Philipson <ross.philipson@xxxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

Index: xen-4.2.1-testing/docs/man/xl.cfg.pod.5
===================================================================
--- xen-4.2.1-testing.orig/docs/man/xl.cfg.pod.5
+++ xen-4.2.1-testing/docs/man/xl.cfg.pod.5
@@ -637,6 +637,25 @@ of Xen) within a Xen guest or to support
which uses hardware virtualisation extensions (e.g. Windows XP
compatibility mode on more modern Windows OS).

+=item B<acpi_firmware="STRING">
+
+Specify a path to a file that contains extra ACPI firmware tables to pass in to
+a guest. The file can contain several tables in their binary AML form
+concatenated together. Each table self describes its length so no additional
+information is needed. These tables will be added to the ACPI table set in the
+guest. Note that existing tables cannot be overridden by this feature. For
+example this cannot be used to override tables like DSDT, FADT, etc.
+
+=item B<smbios_firmware="STRING">
+
+Specify a path to a file that contains extra SMBIOS firmware structures to pass
+in to a guest. The file can contain a set DMTF predefined structures which will
+override the internal defaults. Not all predefined structures can be
overridden,
+only the following types: 0, 1, 2, 3, 11, 22, 39. The file can also contain any
+number of vendor defined SMBIOS structures (type 128 - 255). Since SMBIOS
+structures do not present their overall size, each entry in the file must be
+preceded by a 32b integer indicating the size of the next structure.
+
=back

=head3 Guest Virtual Time Controls
Index: xen-4.2.1-testing/tools/libxl/libxl.h
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl.h
+++ xen-4.2.1-testing/tools/libxl/libxl.h
@@ -68,6 +68,13 @@
*/

/*
+ * LIBXL_HAVE_FIRMWARE_PASSTHROUGH indicates the feature for
+ * passing in SMBIOS and ACPI firmware to HVM guests is present
+ * in the library.
+ */
+#define LIBXL_HAVE_FIRMWARE_PASSTHROUGH 1
+
+/*
* libxl ABI compatibility
*
* The only guarantee which libxl makes regarding ABI compatibility
Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_dom.c
+++ xen-4.2.1-testing/tools/libxl/libxl_dom.c
@@ -21,6 +21,7 @@

#include <xc_dom.h>
#include <xen/hvm/hvm_info_table.h>
+#include <xen/hvm/hvm_xs_strings.h>

libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
{
@@ -510,11 +511,61 @@ static int hvm_build_set_params(xc_inter
return 0;
}

-static const char *libxl__domain_firmware(libxl__gc *gc,
- libxl_domain_build_info *info)
+static int hvm_build_set_xs_values(libxl__gc *gc,
+ uint32_t domid,
+ struct xc_hvm_build_args *args)
+{
+ char *path = NULL;
+ int ret = 0;
+
+ if (args->smbios_module.guest_addr_out) {
+ path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_ADDRESS, domid);
+
+ ret = libxl__xs_write(gc, XBT_NULL, path, "0x%"PRIx64,
+ args->smbios_module.guest_addr_out);
+ if (ret)
+ goto err;
+
+ path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_LENGTH, domid);
+
+ ret = libxl__xs_write(gc, XBT_NULL, path, "0x%x",
+ args->smbios_module.length);
+ if (ret)
+ goto err;
+ }
+
+ if (args->acpi_module.guest_addr_out) {
+ path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_ADDRESS, domid);
+
+ ret = libxl__xs_write(gc, XBT_NULL, path, "0x%"PRIx64,
+ args->acpi_module.guest_addr_out);
+ if (ret)
+ goto err;
+
+ path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_LENGTH, domid);
+
+ ret = libxl__xs_write(gc, XBT_NULL, path, "0x%x",
+ args->acpi_module.length);
+ if (ret)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ LOG(ERROR, "failed to write firmware xenstore value, err: %d", ret);
+ return ret;
+}
+
+static int libxl__domain_firmware(libxl__gc *gc,
+ libxl_domain_build_info *info,
+ struct xc_hvm_build_args *args)
{
libxl_ctx *ctx = libxl__gc_owner(gc);
const char *firmware;
+ int e, rc = ERROR_FAIL;
+ int datalen = 0;
+ void *data;

if (info->u.hvm.firmware)
firmware = info->u.hvm.firmware;
@@ -528,13 +579,52 @@ static const char *libxl__domain_firmwar
firmware = "hvmloader";
break;
default:
- LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "invalid device model version
%d",
- info->device_model_version);
- return NULL;
+ LOG(ERROR, "invalid device model version %d",
+ info->device_model_version);
+ return ERROR_FAIL;
break;
}
}
- return libxl__abs_path(gc, firmware, libxl__xenfirmwaredir_path());
+ args->image_file_name = libxl__abs_path(gc, firmware,
+ libxl__xenfirmwaredir_path());
+
+ if (info->u.hvm.smbios_firmware) {
+ data = NULL;
+ e = libxl_read_file_contents(ctx, info->u.hvm.smbios_firmware,
+ &data, &datalen);
+ if (e) {
+ LOGEV(ERROR, e, "failed to read SMBIOS firmware file %s",
+ info->u.hvm.smbios_firmware);
+ goto out;
+ }
+ libxl__ptr_add(gc, data);
+ if (datalen) {
+ /* Only accept non-empty files */
+ args->smbios_module.data = data;
+ args->smbios_module.length = (uint32_t)datalen;
+ }
+ }
+
+ if (info->u.hvm.acpi_firmware) {
+ data = NULL;
+ e = libxl_read_file_contents(ctx, info->u.hvm.acpi_firmware,
+ &data, &datalen);
+ if (e) {
+ LOGEV(ERROR, e, "failed to read ACPI firmware file %s",
+ info->u.hvm.acpi_firmware);
+ goto out;
+ }
+ libxl__ptr_add(gc, data);
+ if (datalen) {
+ /* Only accept non-empty files */
+ args->acpi_module.data = data;
+ args->acpi_module.length = (uint32_t)datalen;
+ }
+ }
+
+ return 0;
+out:
+ return rc;
}

int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
@@ -544,10 +634,6 @@ int libxl__build_hvm(libxl__gc *gc, uint
libxl_ctx *ctx = libxl__gc_owner(gc);
struct xc_hvm_build_args args = {};
int ret, rc = ERROR_FAIL;
- const char *firmware = libxl__domain_firmware(gc, info);
-
- if (!firmware)
- goto out;

memset(&args, 0, sizeof(struct xc_hvm_build_args));
/* The params from the configuration file are in Mb, which are then
@@ -557,22 +643,34 @@ int libxl__build_hvm(libxl__gc *gc, uint
*/
args.mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
args.mem_target = (uint64_t)(info->target_memkb - info->video_memkb) << 10;
- args.image_file_name = firmware;
+
+ if (libxl__domain_firmware(gc, info, &args)) {
+ LOG(ERROR, "initializing domain firmware failed");
+ goto out;
+ }

ret = xc_hvm_build(ctx->xch, domid, &args);
if (ret) {
- LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, ret, "hvm building failed");
+ LOGEV(ERROR, ret, "hvm building failed");
goto out;
}
+
ret = hvm_build_set_params(ctx->xch, domid, info, state->store_port,
&state->store_mfn, state->console_port,
&state->console_mfn, state->store_domid,
state->console_domid);
if (ret) {
- LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, ret, "hvm build set params
failed");
+ LOGEV(ERROR, ret, "hvm build set params failed");
goto out;
}
- rc = 0;
+
+ ret = hvm_build_set_xs_values(gc, domid, &args);
+ if (ret) {
+ LOG(ERROR, "hvm build set xenstore values failed (ret=%d)", ret);
+ goto out;
+ }
+
+ return 0;
out:
return rc;
}
@@ -634,7 +732,7 @@ int libxl__toolstack_restore(uint32_t do

memcpy(&count, ptr, sizeof(count));
ptr += sizeof(count);
-
+
if (size < sizeof(version) + sizeof(count) +
count * (sizeof(struct libxl__physmap_info))) {
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "wrong size");
@@ -809,7 +907,7 @@ static void switch_logdirty_xswatch(libx
rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
if (rc) goto out;

- rc = libxl__xs_transaction_commit(gc, &t);
+ rc = libxl__xs_transaction_commit(gc, &t);
if (!rc) break;
if (rc<0) goto out;
}
@@ -1281,7 +1379,7 @@ void libxl__xc_domain_save_done(libxl__e
if (type == LIBXL_DOMAIN_TYPE_HVM) {
rc = libxl__domain_suspend_device_model(gc, dss);
if (rc) goto out;
-
+
libxl__domain_save_device_model(egc, dss, domain_suspend_done);
return;
}
Index: xen-4.2.1-testing/tools/libxl/libxl_types.idl
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_types.idl
+++ xen-4.2.1-testing/tools/libxl/libxl_types.idl
@@ -301,6 +301,8 @@ libxl_domain_build_info = Struct("domain
("vpt_align", libxl_defbool),
("timer_mode", libxl_timer_mode),
("nested_hvm", libxl_defbool),
+ ("smbios_firmware", string),
+ ("acpi_firmware", string),
("nographic", libxl_defbool),
("vga",
libxl_vga_interface_info),
("vnc", libxl_vnc_info),
Index: xen-4.2.1-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.2.1-testing/tools/libxl/xl_cmdimpl.c
@@ -863,6 +863,11 @@ static void parse_config_data(const char
}

xlu_cfg_get_defbool(config, "nestedhvm", &b_info->u.hvm.nested_hvm, 0);
+
+ xlu_cfg_replace_string(config, "smbios_firmware",
+ &b_info->u.hvm.smbios_firmware, 0);
+ xlu_cfg_replace_string(config, "acpi_firmware",
+ &b_info->u.hvm.acpi_firmware, 0);
break;
case LIBXL_DOMAIN_TYPE_PV:
{
++++++ 26556-hvm-firmware-passthrough.patch ++++++
# HG changeset patch
# User Ross Philipson <ross.philipson@xxxxxxxxxx>
# Date 1360935137 0
# Node ID 6a9549a15108669408123e5e39f52ad09dea1c10
# Parent 17a228e37ec0913ff86b8b5f2d88f1b8e92146f1
libxl: Cleanup, use LOG* and GCSPRINTF macro in libxl_dom.c

Signed-off-by: Ross Philipson <ross.philipson@xxxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_dom.c
+++ xen-4.2.1-testing/tools/libxl/libxl_dom.c
@@ -31,8 +31,7 @@ libxl_domain_type libxl__domain_type(lib

ret = xc_domain_getinfolist(ctx->xch, domid, 1, &info);
if (ret != 1 || info.domain != domid) {
- LIBXL__LOG(CTX, LIBXL__LOG_ERROR,
- "unable to get domain type for domid=%"PRIu32, domid);
+ LOG(ERROR, "unable to get domain type for domid=%"PRIu32, domid);
return LIBXL_DOMAIN_TYPE_INVALID;
}
if (info.flags & XEN_DOMINF_hvm_guest)
@@ -313,20 +312,19 @@ int libxl__build_post(libxl__gc *gc, uin

ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
ents[0] = "memory/static-max";
- ents[1] = libxl__sprintf(gc, "%"PRId64, info->max_memkb);
+ ents[1] = GCSPRINTF("%"PRId64, info->max_memkb);
ents[2] = "memory/target";
- ents[3] = libxl__sprintf(gc, "%"PRId64,
- info->target_memkb - info->video_memkb);
+ ents[3] = GCSPRINTF("%"PRId64, info->target_memkb - info->video_memkb);
ents[4] = "memory/videoram";
- ents[5] = libxl__sprintf(gc, "%"PRId64, info->video_memkb);
+ ents[5] = GCSPRINTF("%"PRId64, info->video_memkb);
ents[6] = "domid";
- ents[7] = libxl__sprintf(gc, "%d", domid);
+ ents[7] = GCSPRINTF("%d", domid);
ents[8] = "store/port";
- ents[9] = libxl__sprintf(gc, "%"PRIu32, state->store_port);
+ ents[9] = GCSPRINTF("%"PRIu32, state->store_port);
ents[10] = "store/ring-ref";
- ents[11] = libxl__sprintf(gc, "%lu", state->store_mfn);
+ ents[11] = GCSPRINTF("%lu", state->store_mfn);
for (i = 0; i < info->max_vcpus; i++) {
- ents[12+(i*2)] = libxl__sprintf(gc, "cpu/%d/availability", i);
+ ents[12+(i*2)] = GCSPRINTF("cpu/%d/availability", i);
ents[12+(i*2)+1] = libxl_bitmap_test(&info->avail_vcpus, i)
? "online" : "offline";
}
@@ -335,7 +333,7 @@ int libxl__build_post(libxl__gc *gc, uin
if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
hvm_ents = libxl__calloc(gc, 3, sizeof(char *));
hvm_ents[0] = "hvmloader/generation-id-address";
- hvm_ents[1] = libxl__sprintf(gc, "0x%lx", state->vm_generationid_addr);
+ hvm_ents[1] = GCSPRINTF("0x%lx", state->vm_generationid_addr);
}

dom_path = libxl__xs_get_dompath(gc, domid);
@@ -343,7 +341,7 @@ int libxl__build_post(libxl__gc *gc, uin
return ERROR_FAIL;
}

- vm_path = xs_read(ctx->xsh, XBT_NULL, libxl__sprintf(gc, "%s/vm",
dom_path), NULL);
+ vm_path = xs_read(ctx->xsh, XBT_NULL, GCSPRINTF("%s/vm", dom_path), NULL);
retry_transaction:
t = xs_transaction_start(ctx->xsh);

@@ -374,7 +372,7 @@ int libxl__build_pv(libxl__gc *gc, uint3

dom = xc_dom_allocate(ctx->xch, state->pv_cmdline, info->u.pv.features);
if (!dom) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_allocate failed");
+ LOGE(ERROR, "xc_dom_allocate failed");
return ERROR_FAIL;
}

@@ -384,13 +382,13 @@ int libxl__build_pv(libxl__gc *gc, uint3
state->pv_kernel.data,
state->pv_kernel.size);
if ( ret != 0) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_kernel_mem
failed");
+ LOGE(ERROR, "xc_dom_kernel_mem failed");
goto out;
}
} else {
ret = xc_dom_kernel_file(dom, state->pv_kernel.path);
if ( ret != 0) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_kernel_file
failed");
+ LOGE(ERROR, "xc_dom_kernel_file failed");
goto out;
}
}
@@ -398,12 +396,12 @@ int libxl__build_pv(libxl__gc *gc, uint3
if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
if (state->pv_ramdisk.mapped) {
if ( (ret = xc_dom_ramdisk_mem(dom, state->pv_ramdisk.data,
state->pv_ramdisk.size)) != 0 ) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_ramdisk_mem
failed");
+ LOGE(ERROR, "xc_dom_ramdisk_mem failed");
goto out;
}
} else {
if ( (ret = xc_dom_ramdisk_file(dom, state->pv_ramdisk.path)) != 0
) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_ramdisk_file
failed");
+ LOGE(ERROR, "xc_dom_ramdisk_file failed");
goto out;
}
}
@@ -416,31 +414,31 @@ int libxl__build_pv(libxl__gc *gc, uint3
dom->xenstore_domid = state->store_domid;

if ( (ret = xc_dom_boot_xen_init(dom, ctx->xch, domid)) != 0 ) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_boot_xen_init failed");
+ LOGE(ERROR, "xc_dom_boot_xen_init failed");
goto out;
}
if ( (ret = xc_dom_parse_image(dom)) != 0 ) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_parse_image failed");
+ LOGE(ERROR, "xc_dom_parse_image failed");
goto out;
}
if ( (ret = xc_dom_mem_init(dom, info->target_memkb / 1024)) != 0 ) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_mem_init failed");
+ LOGE(ERROR, "xc_dom_mem_init failed");
goto out;
}
if ( (ret = xc_dom_boot_mem_init(dom)) != 0 ) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_boot_mem_init failed");
+ LOGE(ERROR, "xc_dom_boot_mem_init failed");
goto out;
}
if ( (ret = xc_dom_build_image(dom)) != 0 ) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_build_image failed");
+ LOGE(ERROR, "xc_dom_build_image failed");
goto out;
}
if ( (ret = xc_dom_boot_image(dom)) != 0 ) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_boot_image failed");
+ LOGE(ERROR, "xc_dom_boot_image failed");
goto out;
}
if ( (ret = xc_dom_gnttab_init(dom)) != 0 ) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_dom_gnttab_init failed");
+ LOGE(ERROR, "xc_dom_gnttab_init failed");
goto out;
}

@@ -679,8 +677,7 @@ int libxl__qemu_traditional_cmd(libxl__g
const char *cmd)
{
char *path = NULL;
- path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
- domid);
+ path = GCSPRINTF("/local/domain/0/device-model/%d/command", domid);
return libxl__xs_write(gc, XBT_NULL, path, "%s", cmd);
}

@@ -697,8 +694,7 @@ struct libxl__physmap_info {
static inline char *restore_helper(libxl__gc *gc, uint32_t domid,
uint64_t phys_offset, char *node)
{
- return libxl__sprintf(gc,
- "/local/domain/0/device-model/%d/physmap/%"PRIx64"/%s",
+ return GCSPRINTF("/local/domain/0/device-model/%d/physmap/%"PRIx64"/%s",
domid, phys_offset, node);
}

@@ -708,7 +704,6 @@ int libxl__toolstack_restore(uint32_t do
libxl__save_helper_state *shs = user;
libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs);
STATE_AO_GC(dcs->ao);
- libxl_ctx *ctx = CTX;
int i, ret;
const uint8_t *ptr = buf;
uint32_t count = 0, version = 0;
@@ -718,7 +713,7 @@ int libxl__toolstack_restore(uint32_t do
LOG(DEBUG,"domain=%"PRIu32" toolstack data size=%"PRIu32, domid, size);

if (size < sizeof(version) + sizeof(count)) {
- LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "wrong size");
+ LOG(ERROR, "wrong size");
return -1;
}

@@ -726,7 +721,7 @@ int libxl__toolstack_restore(uint32_t do
ptr += sizeof(version);

if (version != TOOLSTACK_SAVE_VERSION) {
- LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "wrong version");
+ LOG(ERROR, "wrong version");
return -1;
}

@@ -735,7 +730,7 @@ int libxl__toolstack_restore(uint32_t do

if (size < sizeof(version) + sizeof(count) +
count * (sizeof(struct libxl__physmap_info))) {
- LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "wrong size");
+ LOG(ERROR, "wrong size");
return -1;
}

@@ -945,15 +940,13 @@ static void switch_logdirty_done(libxl__
int libxl__domain_suspend_device_model(libxl__gc *gc,
libxl__domain_suspend_state *dss)
{
- libxl_ctx *ctx = libxl__gc_owner(gc);
int ret = 0;
uint32_t const domid = dss->domid;
const char *const filename = dss->dm_savefile;

switch (libxl__device_model_version_running(gc, domid)) {
case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
- LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
- "Saving device model state to %s", filename);
+ LOG(DEBUG, "Saving device model state to %s", filename);
libxl__qemu_traditional_cmd(gc, domid, "save");
libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
break;
@@ -1129,8 +1122,7 @@ int libxl__domain_suspend_common_callbac
static inline char *physmap_path(libxl__gc *gc, uint32_t domid,
char *phys_offset, char *node)
{
- return libxl__sprintf(gc,
- "/local/domain/0/device-model/%d/physmap/%s/%s",
+ return GCSPRINTF("/local/domain/0/device-model/%d/physmap/%s/%s",
domid, phys_offset, node);
}

@@ -1147,7 +1139,7 @@ int libxl__toolstack_save(uint32_t domid
char **entries = NULL;
struct libxl__physmap_info *pi;

- entries = libxl__xs_directory(gc, 0, libxl__sprintf(gc,
+ entries = libxl__xs_directory(gc, 0, GCSPRINTF(
"/local/domain/0/device-model/%d/physmap", domid), &num);
count = num;

@@ -1288,7 +1280,7 @@ void libxl__domain_suspend(libxl__egc *e
char *path;
char *addr;

- path = libxl__sprintf(gc, "%s/hvmloader/generation-id-address",
+ path = GCSPRINTF("%s/hvmloader/generation-id-address",
libxl__xs_get_dompath(gc, domid));
addr = libxl__xs_read(gc, XBT_NULL, path);

@@ -1502,10 +1494,7 @@ static void domain_suspend_done(libxl__e

char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
{
- char *s = libxl__sprintf(gc, LIBXL_UUID_FMT, LIBXL_UUID_BYTES(uuid));
- if (!s)
- LIBXL__LOG(libxl__gc_owner(gc), LIBXL__LOG_ERROR, "cannot allocate for
uuid");
- return s;
+ return GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(uuid));
}

static const char *userdata_path(libxl__gc *gc, uint32_t domid,
@@ -1513,34 +1502,27 @@ static const char *userdata_path(libxl__
const char *wh)
{
libxl_ctx *ctx = libxl__gc_owner(gc);
- char *path, *uuid_string;
+ char *uuid_string;
libxl_dominfo info;
int rc;

rc = libxl_domain_info(ctx, &info, domid);
if (rc) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "unable to find domain info"
- " for domain %"PRIu32, domid);
+ LOGE(ERROR, "unable to find domain info for domain %"PRIu32, domid);
return NULL;
}
- uuid_string = libxl__sprintf(gc, LIBXL_UUID_FMT,
LIBXL_UUID_BYTES(info.uuid));
+ uuid_string = GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(info.uuid));

- path = libxl__sprintf(gc, "/var/lib/xen/"
- "userdata-%s.%u.%s.%s",
- wh, domid, uuid_string, userdata_userid);
- if (!path)
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "unable to allocate for"
- " userdata path");
- return path;
+ return GCSPRINTF("/var/lib/xen/userdata-%s.%u.%s.%s",
+ wh, domid, uuid_string, userdata_userid);
}

static int userdata_delete(libxl__gc *gc, const char *path)
{
- libxl_ctx *ctx = libxl__gc_owner(gc);
int r;
r = unlink(path);
if (r) {
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "remove failed for %s", path);
+ LOGE(ERROR, "remove failed for %s", path);
return errno;
}
return 0;
@@ -1548,7 +1530,6 @@ static int userdata_delete(libxl__gc *gc

void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid)
{
- libxl_ctx *ctx = libxl__gc_owner(gc);
const char *pattern;
glob_t gl;
int r, i;
@@ -1564,7 +1545,7 @@ void libxl__userdata_destroyall(libxl__g
if (r == GLOB_NOMATCH)
goto out;
if (r)
- LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "glob failed for %s", pattern);
+ LOGE(ERROR, "glob failed for %s", pattern);

for (i=0; i<gl.gl_pathc; i++) {
userdata_delete(gc, gl.gl_pathv[i]);
++++++ 26576-x86-APICV-migration.patch ++++++
References: FATE#313605

# HG changeset patch
# User Jiongxi Li <jiongxi.li@xxxxxxxxx>
# Date 1361176078 -3600
# Node ID 4c3355d776e115f979fd2abc135bb77ba710f0d4
# Parent 217a4fc4cd46e8de06f2f43eed727838891e9398
x86/VMX: fix live migration while enabling APICV

SVI should be restored in case guest is processing virtual interrupt
while saveing a domain state. Otherwise SVI would be missed when
virtual interrupt delivery is enabled.

Signed-off-by: Jiongxi Li <jiongxi.li@xxxxxxxxx>
Acked-by: Eddie Dong <eddie.dong@xxxxxxxxx>
Acked-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -1198,6 +1198,9 @@ static int lapic_load_regs(struct domain
if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 )
return -EINVAL;

+ if ( hvm_funcs.process_isr )
+ hvm_funcs.process_isr(vlapic_find_highest_isr(s), v);
+
vlapic_adjust_i8259_target(d);
lapic_rearm(s);
return 0;
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -290,8 +290,8 @@ void vmx_intr_assist(void)
vmx_set_eoi_exit_bitmap(v, pt_vector);

/* we need update the RVI field */
- status &= ~(unsigned long)0x0FF;
- status |= (unsigned long)0x0FF &
+ status &= ~VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
+ status |= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK &
intack.vector;
__vmwrite(GUEST_INTR_STATUS, status);
if (v->arch.hvm_vmx.eoi_exitmap_changed) {
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1520,6 +1520,29 @@ static int vmx_virtual_intr_delivery_ena
return cpu_has_vmx_virtual_intr_delivery;
}

+static void vmx_process_isr(int isr, struct vcpu *v)
+{
+ unsigned long status;
+ u8 old;
+
+ if ( !cpu_has_vmx_virtual_intr_delivery )
+ return;
+
+ if ( isr < 0 )
+ isr = 0;
+
+ vmx_vmcs_enter(v);
+ status = __vmread(GUEST_INTR_STATUS);
+ old = status >> VMX_GUEST_INTR_STATUS_SVI_OFFSET;
+ if ( isr != old )
+ {
+ status &= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
+ status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
+ __vmwrite(GUEST_INTR_STATUS, status);
+ }
+ vmx_vmcs_exit(v);
+}
+
static struct hvm_function_table __read_mostly vmx_function_table = {
.name = "VMX",
.cpu_up_prepare = vmx_cpu_up_prepare,
@@ -1568,7 +1591,8 @@ static struct hvm_function_table __read_
.nhvm_intr_blocked = nvmx_intr_blocked,
.nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
.update_eoi_exit_bitmap = vmx_update_eoi_exit_bitmap,
- .virtual_intr_delivery_enabled = vmx_virtual_intr_delivery_enabled
+ .virtual_intr_delivery_enabled = vmx_virtual_intr_delivery_enabled,
+ .process_isr = vmx_process_isr,
};

struct hvm_function_table * __init start_vmx(void)
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -184,6 +184,7 @@ struct hvm_function_table {
/* Virtual interrupt delivery */
void (*update_eoi_exit_bitmap)(struct vcpu *v, u8 vector, u8 trig);
int (*virtual_intr_delivery_enabled)(void);
+ void (*process_isr)(int isr, struct vcpu *v);
};

extern struct hvm_function_table hvm_funcs;
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -246,6 +246,10 @@ extern bool_t cpu_has_vmx_ins_outs_instr
#define VMX_INTR_SHADOW_SMI 0x00000004
#define VMX_INTR_SHADOW_NMI 0x00000008

+/* Guest interrupt status */
+#define VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK 0x0FF
+#define VMX_GUEST_INTR_STATUS_SVI_OFFSET 8
+
/* VMCS field encodings. */
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
++++++ 26577-x86-APICV-x2APIC.patch ++++++
References: FATE#313605

# HG changeset patch
# User Jiongxi Li <jiongxi.li@xxxxxxxxx>
# Date 1361176458 -3600
# Node ID 45d59b822ed187c535b127679e32853b148ed411
# Parent 4c3355d776e115f979fd2abc135bb77ba710f0d4
x86/VMX: fix VMCS setting for x2APIC mode guest while enabling APICV

The "APIC-register virtualization" and "virtual-interrupt deliver"
VM-execution control has no effect on the behavior of RDMSR/WRMSR if
the "virtualize x2APIC mode" VM-execution control is 0.
When guest uses x2APIC mode, we should enable "virtualize x2APIC mode"
for APICV first.

Signed-off-by: Jiongxi Li <jiongxi.li@xxxxxxxxx>
Acked-by: Eddie Dong <eddie.dong@xxxxxxxxx>
Acked-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -194,7 +194,8 @@ static int vmx_init_vmcs_config(void)
*/
if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;


_vmx_secondary_exec_control = adjust_vmx_controls(
@@ -673,19 +674,59 @@ void vmx_disable_intercept_for_msr(struc
*/
if ( msr <= 0x1fff )
{
- if (type & MSR_TYPE_R)
- __clear_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */
- if (type & MSR_TYPE_W)
- __clear_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low
*/
+ if ( type & MSR_TYPE_R )
+ clear_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */
+ if ( type & MSR_TYPE_W )
+ clear_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low */
}
else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
{
msr &= 0x1fff;
- if (type & MSR_TYPE_R)
- __clear_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high
*/
- if (type & MSR_TYPE_W)
- __clear_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high
*/
+ if ( type & MSR_TYPE_R )
+ clear_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high */
+ if ( type & MSR_TYPE_W )
+ clear_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high */
}
+ else
+ HVM_DBG_LOG(DBG_LEVEL_0,
+ "msr %x is out of the control range"
+ "0x00000000-0x00001fff and 0xc0000000-0xc0001fff"
+ "RDMSR or WRMSR will cause a VM exit", msr);
+}
+
+void vmx_enable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
+{
+ unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
+
+ /* VMX MSR bitmap supported? */
+ if ( msr_bitmap == NULL )
+ return;
+
+ /*
+ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+ * have the write-low and read-high bitmap offsets the wrong way round.
+ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+ */
+ if ( msr <= 0x1fff )
+ {
+ if ( type & MSR_TYPE_R )
+ set_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */
+ if ( type & MSR_TYPE_W )
+ set_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low */
+ }
+ else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+ {
+ msr &= 0x1fff;
+ if ( type & MSR_TYPE_R )
+ set_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high */
+ if ( type & MSR_TYPE_W )
+ set_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high */
+ }
+ else
+ HVM_DBG_LOG(DBG_LEVEL_0,
+ "msr %x is out of the control range"
+ "0x00000000-0x00001fff and 0xc0000000-0xc0001fff"
+ "RDMSR or WRMSR will cause a VM exit", msr);
}

/*
@@ -751,6 +792,10 @@ static int construct_vmcs(struct vcpu *v
vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_PAT;
}

+ /* Disable Virtualize x2APIC mode by default. */
+ v->arch.hvm_vmx.secondary_exec_control &=
+ ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+
/* Do not enable Monitor Trap Flag unless start single step debug */
v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;

@@ -787,18 +832,6 @@ static int construct_vmcs(struct vcpu *v
vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R |
MSR_TYPE_W);
if ( cpu_has_vmx_pat && paging_mode_hap(d) )
vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R |
MSR_TYPE_W);
- if ( cpu_has_vmx_apic_reg_virt )
- {
- int msr;
- for (msr = MSR_IA32_APICBASE_MSR; msr <= MSR_IA32_APICBASE_MSR +
0xff; msr++)
- vmx_disable_intercept_for_msr(v, msr, MSR_TYPE_R);
- }
- if ( cpu_has_vmx_virtual_intr_delivery )
- {
- vmx_disable_intercept_for_msr(v, MSR_IA32_APICTPR_MSR, MSR_TYPE_W);
- vmx_disable_intercept_for_msr(v, MSR_IA32_APICEOI_MSR, MSR_TYPE_W);
- vmx_disable_intercept_for_msr(v, MSR_IA32_APICSELF_MSR,
MSR_TYPE_W);
- }
}

/* I/O access bitmap. */
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2009,18 +2009,63 @@ static void vmx_install_vlapic_mapping(s

void vmx_vlapic_msr_changed(struct vcpu *v)
{
+ int virtualize_x2apic_mode;
struct vlapic *vlapic = vcpu_vlapic(v);

- if ( !cpu_has_vmx_virtualize_apic_accesses )
+ virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt ||
+ cpu_has_vmx_virtual_intr_delivery) &&
+ cpu_has_vmx_virtualize_x2apic_mode );
+
+ if ( !cpu_has_vmx_virtualize_apic_accesses &&
+ !virtualize_x2apic_mode )
return;

vmx_vmcs_enter(v);
v->arch.hvm_vmx.secondary_exec_control &=
- ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
if ( !vlapic_hw_disabled(vlapic) &&
(vlapic_base_address(vlapic) == APIC_DEFAULT_PHYS_BASE) )
- v->arch.hvm_vmx.secondary_exec_control |=
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ {
+ unsigned int msr;
+
+ if ( virtualize_x2apic_mode && vlapic_x2apic_mode(vlapic) )
+ {
+ v->arch.hvm_vmx.secondary_exec_control |=
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+ if ( cpu_has_vmx_apic_reg_virt )
+ {
+ for ( msr = MSR_IA32_APICBASE_MSR;
+ msr <= MSR_IA32_APICBASE_MSR + 0xff; msr++ )
+ vmx_disable_intercept_for_msr(v, msr, MSR_TYPE_R);
+
+ vmx_enable_intercept_for_msr(v, MSR_IA32_APICPPR_MSR,
+ MSR_TYPE_R);
+ vmx_enable_intercept_for_msr(v, MSR_IA32_APICTMICT_MSR,
+ MSR_TYPE_R);
+ vmx_enable_intercept_for_msr(v, MSR_IA32_APICTMCCT_MSR,
+ MSR_TYPE_R);
+ }
+ if ( cpu_has_vmx_virtual_intr_delivery )
+ {
+ vmx_disable_intercept_for_msr(v, MSR_IA32_APICTPR_MSR,
+ MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_APICEOI_MSR,
+ MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_APICSELF_MSR,
+ MSR_TYPE_W);
+ }
+ }
+ else
+ {
+ v->arch.hvm_vmx.secondary_exec_control |=
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ for ( msr = MSR_IA32_APICBASE_MSR;
+ msr <= MSR_IA32_APICBASE_MSR + 0xff; msr++ )
+ vmx_enable_intercept_for_msr(v, msr,
+ MSR_TYPE_R | MSR_TYPE_W);
+ }
+ }
vmx_update_secondary_exec_control(v);
vmx_vmcs_exit(v);
}
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -182,6 +182,7 @@ extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
#define SECONDARY_EXEC_ENABLE_RDTSCP 0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
@@ -239,6 +240,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr
(vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT)
#define cpu_has_vmx_virtual_intr_delivery \
(vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
+#define cpu_has_vmx_virtualize_x2apic_mode \
+ (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)

/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
@@ -414,6 +417,7 @@ enum vmcs_field {
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type);
+void vmx_enable_intercept_for_msr(struct vcpu *v, u32 msr, int type);
int vmx_read_guest_msr(u32 msr, u64 *val);
int vmx_write_guest_msr(u32 msr, u64 val);
int vmx_add_guest_msr(u32 msr);
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -295,7 +295,10 @@
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
#define MSR_IA32_APICBASE_MSR 0x800
#define MSR_IA32_APICTPR_MSR 0x808
+#define MSR_IA32_APICPPR_MSR 0x80a
#define MSR_IA32_APICEOI_MSR 0x80b
+#define MSR_IA32_APICTMICT_MSR 0x838
+#define MSR_IA32_APICTMCCT_MSR 0x839
#define MSR_IA32_APICSELF_MSR 0x83f

#define MSR_IA32_UCODE_WRITE 0x00000079
++++++ 26578-AMD-IOMMU-replace-BUG_ON.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1361176655 -3600
# Node ID 57e67af5281a6b66cf71dfa812e4335930684fd6
# Parent 45d59b822ed187c535b127679e32853b148ed411
AMD IOMMU: don't BUG() when we don't have to

find_iommu_for_device() can easily return NULL instead, as all of its
callers are prepared for that.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -32,8 +32,8 @@ struct amd_iommu *find_iommu_for_device(
{
struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);

- BUG_ON ( bdf >= ivrs_bdf_entries );
- return ivrs_mappings ? ivrs_mappings[bdf].iommu : NULL;
+ return ivrs_mappings && bdf < ivrs_bdf_entries ? ivrs_mappings[bdf].iommu
+ : NULL;
}

/*
++++++ 26585-x86-mm-Take-the-p2m-lock-even-in-shadow-mode.patch ++++++
# Commit a15d87475ed95840dba693ab0a56d0b48a215cbc
# Date 2013-02-21 15:16:20 +0000
# Author Tim Deegan <tim@xxxxxxx>
# Committer Tim Deegan <tim@xxxxxxx>
x86/mm: Take the p2m lock even in shadow mode.

The reworking of p2m lookups to use get_gfn()/put_gfn() left the
shadow code not taking the p2m lock, even in cases where the p2m would
be updated (i.e. PoD).

In many cases, shadow code doesn't need the exclusion that
get_gfn()/put_gfn() provides, as it has its own interlocks against p2m
updates, but this is taking things too far, and can lead to crashes in
the PoD code.

Now that most shadow-code p2m lookups are done with explicitly
unlocked accessors, or with the get_page_from_gfn() accessor, which is
often lock-free, we can just turn this locking on.

The remaining locked lookups are in sh_page_fault() (in a path that's
almost always already serializing on the paging lock), and in
emulate_map_dest() (which can probably be updated to use
get_page_from_gfn()). They're not addressed here but may be in a
follow-up patch.

Signed-off-by: Tim Deegan <tim@xxxxxxx>
Acked-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -163,8 +163,7 @@ mfn_t __get_gfn_type_access(struct p2m_d
return _mfn(gfn);
}

- /* For now only perform locking on hap domains */
- if ( locked && (hap_enabled(p2m->domain)) )
+ if ( locked )
/* Grab the lock here, don't release until put_gfn */
gfn_lock(p2m, gfn, 0);

@@ -197,8 +196,7 @@ mfn_t __get_gfn_type_access(struct p2m_d

void __put_gfn(struct p2m_domain *p2m, unsigned long gfn)
{
- if ( !p2m || !paging_mode_translate(p2m->domain)
- || !hap_enabled(p2m->domain) )
+ if ( !p2m || !paging_mode_translate(p2m->domain) )
/* Nothing to do in this case */
return;

++++++
26595-x86-nhvm-properly-clean-up-after-failure-to-set-up-all-vCPU-s.patch ++++++
# Commit 17281aea1a9a10f1ee165c6e6a2921a67b7b1df2
# Date 2013-02-22 11:21:38 +0100
# Author Jan Beulich <jbeulich@xxxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
x86/nhvm: properly clean up after failure to set up all vCPU-s

Otherwise we may leak memory when setting up nHVM fails half way.

This implies that the individual destroy functions will have to remain
capable (in the VMX case they first need to be made so, following
26486:7648ef657fe7 and 26489:83a3fa9c8434) of being called for a vCPU
that the corresponding init function was never run on.

Once at it, also remove a redundant check from the corresponding
parameter validation code.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Tested-by: Olaf Hering <olaf@xxxxxxxxx>

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3941,18 +3941,20 @@ long do_hvm_op(unsigned long op, XEN_GUE
#else
if ( a.value > 1 )
rc = -EINVAL;
- if ( !is_hvm_domain(d) )
- rc = -EINVAL;
/* Remove the check below once we have
* shadow-on-shadow.
*/
if ( cpu_has_svm && !paging_mode_hap(d) && a.value )
rc = -EINVAL;
/* Set up NHVM state for any vcpus that are already up */
- if ( !d->arch.hvm_domain.params[HVM_PARAM_NESTEDHVM] )
+ if ( a.value &&
+ !d->arch.hvm_domain.params[HVM_PARAM_NESTEDHVM] )
for_each_vcpu(d, v)
if ( rc == 0 )
rc = nestedhvm_vcpu_initialise(v);
+ if ( !a.value || rc )
+ for_each_vcpu(d, v)
+ nestedhvm_vcpu_destroy(v);
#endif
break;
case HVM_PARAM_BUFIOREQ_EVTCHN:
--- a/xen/arch/x86/hvm/nestedhvm.c
+++ b/xen/arch/x86/hvm/nestedhvm.c
@@ -88,7 +88,7 @@ nestedhvm_vcpu_initialise(struct vcpu *v
void
nestedhvm_vcpu_destroy(struct vcpu *v)
{
- if ( nestedhvm_enabled(v->domain) && hvm_funcs.nhvm_vcpu_destroy )
+ if ( hvm_funcs.nhvm_vcpu_destroy )
hvm_funcs.nhvm_vcpu_destroy(v);
}

++++++ 26601-honor-ACPI-v4-FADT-flags.patch ++++++
# Commit 992fdf6f46252a459c6b1b8d971b2c71f01460f8
# Date 2013-02-22 11:56:54 +0100
# Author Jan Beulich <jbeulich@xxxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
honor ACPI v4 FADT flags

- force use of physical APIC mode if indicated so (as we don't support
xAPIC cluster mode, the respective flag is taken to force physical
mode too)
- don't use MSI if indicated so (implies no IOMMU)

Both can be overridden on the command line, for the MSI case this at
once adds a new command line option allowing to turn off PCI MSI (IOMMU
and HPET are unaffected by this).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -602,6 +602,13 @@ limit is ignored by Xen.

Specify if the MMConfig space should be enabled.

+### msi
+> `= <boolean>`
+
+> Default: `true`
+
+Force Xen to (not) use PCI-MSI, even if ACPI FADT says otherwise.
+
### nmi
`= ignore | dom0 | fatal`

--- a/xen/arch/x86/genapic/bigsmp.c
+++ b/xen/arch/x86/genapic/bigsmp.c
@@ -40,7 +40,14 @@ static struct dmi_system_id __initdata b

static __init int probe_bigsmp(void)
{
- if (!def_to_bigsmp)
+ /*
+ * We don't implement cluster mode, so force use of
+ * physical mode in both cases.
+ */
+ if (acpi_gbl_FADT.flags &
+ (ACPI_FADT_APIC_CLUSTER | ACPI_FADT_APIC_PHYSICAL))
+ def_to_bigsmp = 1;
+ else if (!def_to_bigsmp)
dmi_check_system(bigsmp_dmi_table);
return def_to_bigsmp;
}
--- a/xen/arch/x86/genapic/x2apic.c
+++ b/xen/arch/x86/genapic/x2apic.c
@@ -29,9 +29,6 @@
#include <xen/smp.h>
#include <asm/mach-default/mach_mpparse.h>

-static bool_t __initdata x2apic_phys; /* By default we use logical cluster
mode. */
-boolean_param("x2apic_phys", x2apic_phys);
-
static void init_apic_ldr_x2apic_phys(void)
{
}
@@ -121,8 +118,14 @@ static const struct genapic apic_x2apic_
.send_IPI_self = send_IPI_self_x2apic
};

+static s8 __initdata x2apic_phys = -1; /* By default we use logical cluster
mode. */
+boolean_param("x2apic_phys", x2apic_phys);
+
const struct genapic *__init apic_x2apic_probe(void)
{
+ if ( x2apic_phys < 0 )
+ x2apic_phys = !!(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL);
+
return x2apic_phys ? &apic_x2apic_phys : &apic_x2apic_cluster;
}

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -31,6 +31,9 @@
#include <xen/iommu.h>
#include <xsm/xsm.h>

+static s8 __read_mostly use_msi = -1;
+boolean_param("msi", use_msi);
+
/* bitmap indicate which fixed map is free */
DEFINE_SPINLOCK(msix_fixmap_lock);
DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
@@ -958,6 +961,9 @@ int pci_enable_msi(struct msi_info *msi,
{
ASSERT(spin_is_locked(&pcidevs_lock));

+ if ( !use_msi )
+ return -EPERM;
+
return msi->table_base ? __pci_enable_msix(msi, desc) :
__pci_enable_msi(msi, desc);
}
@@ -1003,7 +1009,10 @@ int pci_restore_msi_state(struct pci_dev

ASSERT(spin_is_locked(&pcidevs_lock));

- if (!pdev)
+ if ( !use_msi )
+ return -EOPNOTSUPP;
+
+ if ( !pdev )
return -EINVAL;

ret = xsm_resource_setup_pci((pdev->seg << 16) | (pdev->bus << 8) |
pdev->devfn);
@@ -1062,7 +1071,7 @@ unsigned int pci_msix_get_table_len(stru
func = PCI_FUNC(pdev->devfn);

pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
- if ( !pos )
+ if ( !pos || !use_msi )
return 0;

control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
@@ -1135,6 +1144,9 @@ static struct keyhandler dump_msi_keyhan

static int __init msi_setup_keyhandler(void)
{
+ if ( use_msi < 0 )
+ use_msi = !(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI);
+
register_keyhandler('M', &dump_msi_keyhandler);
return 0;
}
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -1066,5 +1066,8 @@ int __init amd_iommu_get_ivrs_dev_entrie

int __init amd_iommu_update_ivrs_mapping_acpi(void)
{
+ if ( unlikely(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) )
+ return -EPERM;
+
return acpi_table_parse(ACPI_SIG_IVRS, parse_ivrs_table);
}
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2119,6 +2119,12 @@ int __init intel_vtd_setup(void)
if ( list_empty(&acpi_drhd_units) )
return -ENODEV;

+ if ( unlikely(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) )
+ {
+ ret = -EPERM;
+ goto error;
+ }
+
platform_quirks_init();

/* We enable the following features only if they are supported by all VT-d
++++++ 26656-x86-fix-null-pointer-dereference-in-intel_get_extended_msrs.patch
++++++
# Commit c40e24a8ef74f9d0ee59dd9b8ca890be08b0b874
# Date 2013-02-25 12:44:25 +0100
# Author Xi Wang <xi@xxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
x86: fix null pointer dereference in intel_get_extended_msrs()

`memset(&mc_ext, 0, ...)' leads to a buffer overflow and a subsequent
null pointer dereference. Replace `&mc_ext' with `mc_ext'.

Signed-off-by: Xi Wang <xi@xxxxxxx>

--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -534,7 +534,7 @@ intel_get_extended_msrs(struct mcinfo_gl
}

/* this function will called when CAP(9).MCG_EXT_P = 1 */
- memset(&mc_ext, 0, sizeof(struct mcinfo_extended));
+ memset(mc_ext, 0, sizeof(*mc_ext));
mc_ext->common.type = MC_TYPE_EXTENDED;
mc_ext->common.size = sizeof(struct mcinfo_extended);

++++++ 26659-AMD-IOMMU-erratum-746-workaround.patch ++++++
# Commit 0f8adcb2a7183bea5063f6fffba7d7e1aa14fc84
# Date 2013-02-26 10:14:53 +0100
# Author Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
IOMMU, AMD Family15h Model10-1Fh erratum 746 Workaround

The IOMMU may stop processing page translations due to a perceived lack
of credits for writing upstream peripheral page service request (PPR)
or event logs. If the L2B miscellaneous clock gating feature is enabled
the IOMMU does not properly register credits after the log request has
completed, leading to a potential system hang.

BIOSes are supposed to disable L2B micellaneous clock gating by setting
L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b. This
patch corrects that for those which do not enable this workaround.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -795,6 +795,42 @@ static int __init set_iommu_interrupt_ha
return irq;
}

+/*
+ * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
+ * Workaround:
+ * BIOS should disable L2B micellaneous clock gating by setting
+ * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
+ */
+static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
+{
+ u32 value;
+ u8 bus = PCI_BUS(iommu->bdf);
+ u8 dev = PCI_SLOT(iommu->bdf);
+ u8 func = PCI_FUNC(iommu->bdf);
+
+ if ( (boot_cpu_data.x86 != 0x15) ||
+ (boot_cpu_data.x86_model < 0x10) ||
+ (boot_cpu_data.x86_model > 0x1f) )
+ return;
+
+ pci_conf_write32(iommu->seg, bus, dev, func, 0xf0, 0x90);
+ value = pci_conf_read32(iommu->seg, bus, dev, func, 0xf4);
+
+ if ( value & (1 << 2) )
+ return;
+
+ /* Select NB indirect register 0x90 and enable writing */
+ pci_conf_write32(iommu->seg, bus, dev, func, 0xf0, 0x90 | (1 << 8));
+
+ pci_conf_write32(iommu->seg, bus, dev, func, 0xf4, value | (1 << 2));
+ printk(XENLOG_INFO
+ "AMD-Vi: Applying erratum 746 workaround for IOMMU at
%04x:%02x:%02x.%u\n",
+ iommu->seg, bus, dev, func);
+
+ /* Clear the enable writing bit */
+ pci_conf_write32(iommu->seg, bus, dev, func, 0xf0, 0x90);
+}
+
static void enable_iommu(struct amd_iommu *iommu)
{
unsigned long flags;
@@ -807,6 +843,8 @@ static void enable_iommu(struct amd_iomm
return;
}

+ amd_iommu_erratum_746_workaround(iommu);
+
register_iommu_dev_table_in_mmio_space(iommu);
register_iommu_cmd_buffer_in_mmio_space(iommu);
register_iommu_event_log_in_mmio_space(iommu);
++++++ 26660-x86-fix-CMCI-injection.patch ++++++
# Commit 2f8c55ccefe49bb526df0eaf5fa9b7b788422208
# Date 2013-02-26 10:15:56 +0100
# Author Jan Beulich <jbeulich@xxxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
x86: fix CMCI injection

This fixes the wrong use of literal vector 0xF7 with an "int"
instruction (invalidated by 25113:14609be41f36) and the fact that doing
the injection via a software interrupt was never valid anyway (because
cmci_interrupt() acks the LAPIC, which does the wrong thing if the
interrupt didn't get delivered though it).

In order to do latter, the patch introduces send_IPI_self(), at once
removing two opend coded uses of "genapic" in the IRQ handling code.

Reported-by: Yongjie Ren <yongjie.ren@xxxxxxxxx>
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Tested-by: Yongjie Ren <yongjie.ren@xxxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -30,6 +30,7 @@ bool_t __read_mostly mce_broadcast = 0;
bool_t is_mc_panic;
unsigned int __read_mostly nr_mce_banks;
unsigned int __read_mostly firstbank;
+uint8_t __read_mostly cmci_apic_vector;

static void intpose_init(void);
static void mcinfo_clear(struct mc_info *);
@@ -1277,12 +1278,6 @@ static void x86_mc_mceinject(void *data)
__asm__ __volatile__("int $0x12");
}

-static void x86_cmci_inject(void *data)
-{
- printk("Simulating CMCI on cpu %d\n", smp_processor_id());
- __asm__ __volatile__("int $0xf7");
-}
-
#if BITS_PER_LONG == 64

#define ID2COOKIE(id) ((mctelem_cookie_t)(id))
@@ -1568,11 +1563,15 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
on_selected_cpus(cpumap, x86_mc_mceinject, NULL, 1);
break;
case XEN_MC_INJECT_TYPE_CMCI:
- if ( !cmci_support )
+ if ( !cmci_apic_vector )
ret = x86_mcerr(
"No CMCI supported in platform\n", -EINVAL);
else
- on_selected_cpus(cpumap, x86_cmci_inject, NULL, 1);
+ {
+ if ( cpumask_test_cpu(smp_processor_id(), cpumap) )
+ send_IPI_self(cmci_apic_vector);
+ send_IPI_mask(cpumap, cmci_apic_vector);
+ }
break;
default:
ret = x86_mcerr("Wrong mca type\n", -EINVAL);
--- a/xen/arch/x86/cpu/mcheck/mce.h
+++ b/xen/arch/x86/cpu/mcheck/mce.h
@@ -38,6 +38,8 @@ enum mcheck_type {
mcheck_intel
};

+extern uint8_t cmci_apic_vector;
+
/* Init functions */
enum mcheck_type amd_k7_mcheck_init(struct cpuinfo_x86 *c);
enum mcheck_type amd_k8_mcheck_init(struct cpuinfo_x86 *c);
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -1164,7 +1164,6 @@ static void intel_init_cmci(struct cpuin
{
u32 l, apic;
int cpu = smp_processor_id();
- static uint8_t cmci_apic_vector;

if (!mce_available(c) || !cmci_support) {
if (opt_cpu_info)
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -646,7 +646,7 @@ void irq_move_cleanup_interrupt(struct c
* to myself.
*/
if (irr & (1 << (vector % 32))) {
- genapic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+ send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
TRACE_3D(TRC_HW_IRQ_MOVE_CLEANUP_DELAY,
irq, vector, smp_processor_id());
goto unlock;
@@ -692,7 +692,7 @@ static void send_cleanup_vector(struct i

cpumask_and(&cleanup_mask, desc->arch.old_cpu_mask, &cpu_online_map);
desc->arch.move_cleanup_count = cpumask_weight(&cleanup_mask);
- genapic->send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);

desc->arch.move_in_progress = 0;
}
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -43,6 +43,11 @@ void send_IPI_mask(const cpumask_t *mask
genapic->send_IPI_mask(mask, vector);
}

+void send_IPI_self(int vector)
+{
+ genapic->send_IPI_self(vector);
+}
+
/*
* Some notes on x86 processor bugs affecting SMP operation:
*
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -29,7 +29,8 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_core_

void smp_send_nmi_allbutself(void);

-void send_IPI_mask(const cpumask_t *mask, int vector);
+void send_IPI_mask(const cpumask_t *, int vector);
+void send_IPI_self(int vector);

extern void (*mtrr_hook) (void);

++++++ 26672-vmx-fix-handling-of-NMI-VMEXIT.patch ++++++
# Commit 7dd3b06ff031c9a8c727df16c5def2afb382101c
# Date 2013-02-28 14:00:18 +0000
# Author Tim Deegan <tim@xxxxxxx>
# Committer Tim Deegan <tim@xxxxxxx>
vmx: fix handling of NMI VMEXIT.

Call do_nmi() directly and explicitly re-enable NMIs rather than
raising an NMI through the APIC. Since NMIs are disabled after the
VMEXIT, the raised NMI would be blocked until the next IRET
instruction (i.e. the next real interrupt, or after scheduling a PV
guest) and in the meantime the guest will spin taking NMI VMEXITS.

Also, handle NMIs before re-enabling interrupts, since if we handle an
interrupt (and therefore IRET) before calling do_nmi(), we may end up
running the NMI handler with NMIs enabled.

Signed-off-by: Tim Deegan <tim@xxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2421,6 +2421,13 @@ void vmx_vmexit_handler(struct cpu_user_
vector = intr_info & INTR_INFO_VECTOR_MASK;
if ( vector == TRAP_machine_check )
do_machine_check(regs);
+ if ( vector == TRAP_nmi
+ && ((intr_info & INTR_INFO_INTR_TYPE_MASK) ==
+ (X86_EVENTTYPE_NMI << 8)) )
+ {
+ do_nmi(regs);
+ enable_nmis();
+ }
break;
case EXIT_REASON_MCE_DURING_VMENTRY:
do_machine_check(regs);
@@ -2594,7 +2601,7 @@ void vmx_vmexit_handler(struct cpu_user_
(X86_EVENTTYPE_NMI << 8) )
goto exit_and_crash;
HVMTRACE_0D(NMI);
- self_nmi(); /* Real NMI, vector 2: normal processing. */
+ /* Already handled above. */
break;
case TRAP_machine_check:
HVMTRACE_0D(MCE);
--- a/xen/arch/x86/x86_32/entry.S
+++ b/xen/arch/x86/x86_32/entry.S
@@ -621,6 +621,14 @@ ENTRY(machine_check)
pushl $TRAP_machine_check<<16
jmp handle_nmi_mce

+/* Enable NMIs. No special register assumptions. All registers are preserved.
*/
+ENTRY(enable_nmis)
+ /* Set up stack frame */
+ pushf # EFLAGS
+ push %cs # CS
+ push $.Lret # EIP
+ iret # Disable the hardware NMI latch
+
ENTRY(setup_vm86_frame)
mov %ecx,%ds
mov %ecx,%es
@@ -634,7 +642,7 @@ ENTRY(setup_vm86_frame)
.endm
copy_vm86_words
addl $16,%esp
- ret
+.Lret: ret

.section .rodata, "a", @progbits

--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -643,6 +643,22 @@ ENTRY(machine_check)
movl $TRAP_machine_check,4(%rsp)
jmp handle_ist_exception

+/* Enable NMIs. No special register assumptions. Only %rax is not preserved.
*/
+ENTRY(enable_nmis)
+ movq %rsp, %rax /* Grab RSP before pushing */
+
+ /* Set up stack frame */
+ pushq $0 /* SS */
+ pushq %rax /* RSP */
+ pushfq /* RFLAGS */
+ pushq $__HYPERVISOR_CS /* CS */
+ leaq 1f(%rip),%rax
+ pushq %rax /* RIP */
+
+ iretq /* Disable the hardware NMI latch */
+1:
+ retq
+
.section .rodata, "a", @progbits

ENTRY(exception_table)
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -584,6 +584,8 @@ DECLARE_TRAP_HANDLER(alignment_check);
DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
#undef DECLARE_TRAP_HANDLER

+void enable_nmis(void);
+
void syscall_enter(void);
void sysenter_entry(void);
void sysenter_eflags_saved(void);
++++++ 26673-Avoid-stale-pointer-when-moving-domain-to-another-cpupool.patch
++++++
# Commit 482300def7d08e773ccd2a0d978bcb9469fdd810
# Date 2013-02-28 14:56:45 +0000
# Author Juergen Gross <juergen.gross@xxxxxxxxxxxxxx>
# Committer Keir Fraser <keir@xxxxxxx>
Avoid stale pointer when moving domain to another cpupool

When a domain is moved to another cpupool the scheduler private data pointers
in vcpu and domain structures must never point to an already freed memory
area.

While at it, simplify sched_init_vcpu() by using DOM2OP instead VCPU2OP.

Signed-off-by: Juergen Gross <juergen.gross@xxxxxxxxxxxxxx>

--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -220,7 +220,7 @@ int sched_init_vcpu(struct vcpu *v, unsi
if ( v->sched_priv == NULL )
return 1;

- SCHED_OP(VCPU2OP(v), insert_vcpu, v);
+ SCHED_OP(DOM2OP(d), insert_vcpu, v);

return 0;
}
@@ -231,6 +231,9 @@ int sched_move_domain(struct domain *d,
unsigned int new_p;
void **vcpu_priv;
void *domdata;
+ void *vcpudata;
+ struct scheduler *old_ops;
+ void *old_domdata;

domdata = SCHED_OP(c->sched, alloc_domdata, d);
if ( domdata == NULL )
@@ -261,21 +264,22 @@ int sched_move_domain(struct domain *d,

domain_pause(d);

+ old_ops = DOM2OP(d);
+ old_domdata = d->sched_priv;
+
for_each_vcpu ( d, v )
{
- SCHED_OP(VCPU2OP(v), remove_vcpu, v);
- SCHED_OP(VCPU2OP(v), free_vdata, v->sched_priv);
- v->sched_priv = NULL;
+ SCHED_OP(old_ops, remove_vcpu, v);
}

- SCHED_OP(DOM2OP(d), free_domdata, d->sched_priv);
-
d->cpupool = c;
d->sched_priv = domdata;

new_p = cpumask_first(c->cpu_valid);
for_each_vcpu ( d, v )
{
+ vcpudata = v->sched_priv;
+
migrate_timer(&v->periodic_timer, new_p);
migrate_timer(&v->singleshot_timer, new_p);
migrate_timer(&v->poll_timer, new_p);
@@ -288,12 +292,16 @@ int sched_move_domain(struct domain *d,
new_p = cpumask_cycle(new_p, c->cpu_valid);

SCHED_OP(c->sched, insert_vcpu, v);
+
+ SCHED_OP(old_ops, free_vdata, vcpudata);
}

domain_update_node_affinity(d);

domain_unpause(d);

+ SCHED_OP(old_ops, free_domdata, old_domdata);
+
xfree(vcpu_priv);

return 0;
++++++
26675-tools-xentoollog_update_tty_detection_in_stdiostream_progress.patch ++++++
changeset: 26675:3eb62c576a1a
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Feb 27 14:16:36 2013 +0000
files: tools/libxc/xtl_logger_stdio.c
description:
tools/xentoollog: update tty detection in stdiostream_progress

As suggested by IanJ:
Check isatty only once to preserve the errno of ->progress users, and to
reduce the noice in strace output.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>


diff -r 4b25c1e6cfbb -r 3eb62c576a1a tools/libxc/xtl_logger_stdio.c
--- a/tools/libxc/xtl_logger_stdio.c Wed Feb 27 11:16:47 2013 +0000
+++ b/tools/libxc/xtl_logger_stdio.c Wed Feb 27 14:16:36 2013 +0000
@@ -35,6 +35,7 @@ struct xentoollog_logger_stdiostream {
xentoollog_level min_level;
unsigned flags;
int progress_erase_len, progress_last_percent;
+ int tty;
};

static void progress_erase(xentoollog_logger_stdiostream *lg) {
@@ -118,7 +119,7 @@ static void stdiostream_progress(struct

lg->progress_last_percent = percent;

- if (isatty(fileno(lg->f)) <= 0) {
+ if (!lg->tty) {
stdiostream_message(logger_in, this_level, context,
"%s: %lu/%lu %3d%%",
doing_what, done, total, percent);
@@ -166,6 +167,7 @@ xentoollog_logger_stdiostream *xtl_creat
newlogger.f = f;
newlogger.min_level = min_level;
newlogger.flags = flags;
+ newlogger.tty = isatty(fileno(newlogger.f)) > 0;

if (newlogger.flags & XTL_STDIOSTREAM_SHOW_DATE) tzset();

++++++ 26676-fix-compat-memory-exchange-op-splitting.patch ++++++
# Commit 53decd322157e922cac2988e07da6d39538c8033
# Date 2013-03-01 16:59:49 +0100
# Author Jan Beulich <jbeulich@xxxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
fix compat memory exchange op splitting

A shift with a negative count was erroneously used here, yielding
undefined behavior.

Reported-by: Xi Wang <xi@xxxxxxx>
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/common/compat/memory.c
+++ b/xen/common/compat/memory.c
@@ -172,7 +172,7 @@ int compat_memory_op(unsigned int cmd, X
if ( order_delta >= 0 )
nat.xchg->out.nr_extents = end_extent >> order_delta;
else
- nat.xchg->out.nr_extents = end_extent << order_delta;
+ nat.xchg->out.nr_extents = end_extent << -order_delta;
++split;
}

++++++ 26677-x86-make-certain-memory-sub-ops-return-valid-values.patch ++++++
# Commit 7ffc9779aa5120c5098d938cb88f69a1dda9a0fe
# Date 2013-03-04 10:16:04 +0100
# Author Jan Beulich <jbeulich@xxxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
x86: make certain memory sub-ops return valid values

When a domain's shared info field "max_pfn" is zero,
domain_get_maximum_gpfn() so far returned ULONG_MAX, which
do_memory_op() in turn converted to -1 (i.e. -EPERM). Make the former
always return a sensible number (i.e. zero if the field was zero) and
have the latter no longer truncate return values.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -437,7 +437,7 @@ unsigned long domain_get_maximum_gpfn(st
if ( is_hvm_domain(d) )
return p2m_get_hostp2m(d)->max_mapped_pfn;
/* NB. PV guests specify nr_pfns rather than max_pfn so we adjust here. */
- return arch_get_max_pfn(d) - 1;
+ return (arch_get_max_pfn(d) ?: 1) - 1;
}

void share_xen_page_with_guest(
--- a/xen/common/compat/memory.c
+++ b/xen/common/compat/memory.c
@@ -15,7 +15,8 @@ CHECK_TYPE(domid);

int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE(void) compat)
{
- int rc, split, op = cmd & MEMOP_CMD_MASK;
+ int split, op = cmd & MEMOP_CMD_MASK;
+ long rc;
unsigned int start_extent = cmd >> MEMOP_EXTENT_SHIFT;

do
@@ -204,7 +205,7 @@ int compat_memory_op(unsigned int cmd, X

rc = do_memory_op(cmd, nat.hnd);
if ( rc < 0 )
- return rc;
+ break;

cmd = 0;
if ( hypercall_xlat_continuation(&cmd, 0x02, nat.hnd, compat) )
@@ -318,5 +319,11 @@ int compat_memory_op(unsigned int cmd, X
__HYPERVISOR_memory_op, "ih", cmd, compat);
} while ( split > 0 );

+ if ( unlikely(rc > INT_MAX) )
+ return INT_MAX;
+
+ if ( unlikely(rc < INT_MIN) )
+ return INT_MIN;
+
return rc;
}
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -532,14 +532,13 @@ static long memory_exchange(XEN_GUEST_HA
long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
{
struct domain *d;
- int rc, op;
+ long rc;
unsigned int address_bits;
unsigned long start_extent;
struct xen_memory_reservation reservation;
struct memop_args args;
domid_t domid;
-
- op = cmd & MEMOP_CMD_MASK;
+ int op = cmd & MEMOP_CMD_MASK;

switch ( op )
{
++++++ 26678-SEDF-avoid-gathering-vCPU-s-on-pCPU0.patch ++++++
# Commit e6a6fd63652814e5c36a0016c082032f798ced1f
# Date 2013-03-04 10:17:52 +0100
# Author Jan Beulich <jbeulich@xxxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
SEDF: avoid gathering vCPU-s on pCPU0

The introduction of vcpu_force_reschedule() in 14320:215b799fa181 was
incompatible with the SEDF scheduler: Any vCPU using
VCPUOP_stop_periodic_timer (e.g. any vCPU of half way modern PV Linux
guests) ends up on pCPU0 after that call. Obviously, running all PV
guests' (and namely Dom0's) vCPU-s on pCPU0 causes problems for those
guests rather sooner than later.

So the main thing that was clearly wrong (and bogus from the beginning)
was the use of cpumask_first() in sedf_pick_cpu(). It is being replaced
by a construct that prefers to put back the vCPU on the pCPU that it
got launched on.

However, there's one more glitch: When reducing the affinity of a vCPU
temporarily, and then widening it again to a set that includes the pCPU
that the vCPU was last running on, the generic scheduler code would not
force a migration of that vCPU, and hence it would forever stay on the
pCPU it last ran on. Since that can again create a load imbalance, the
SEDF scheduler wants a migration to happen regardless of it being
apparently unnecessary.

Of course, an alternative to checking for SEDF explicitly in
vcpu_set_affinity() would be to introduce a flags field in struct
scheduler, and have SEDF set a "always-migrate-on-affinity-change"
flag.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -396,7 +396,8 @@ static int sedf_pick_cpu(const struct sc

online = cpupool_scheduler_cpumask(v->domain->cpupool);
cpumask_and(&online_affinity, v->cpu_affinity, online);
- return cpumask_first(&online_affinity);
+ return cpumask_cycle(v->vcpu_id % cpumask_weight(&online_affinity) - 1,
+ &online_affinity);
}

/*
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -611,7 +611,8 @@ int vcpu_set_affinity(struct vcpu *v, co
vcpu_schedule_lock_irq(v);

cpumask_copy(v->cpu_affinity, affinity);
- if ( !cpumask_test_cpu(v->processor, v->cpu_affinity) )
+ if ( VCPU2OP(v)->sched_id == XEN_SCHEDULER_SEDF ||
+ !cpumask_test_cpu(v->processor, v->cpu_affinity) )
set_bit(_VPF_migrating, &v->pause_flags);

vcpu_schedule_unlock_irq(v);
++++++ 26679-x86-defer-processing-events-on-the-NMI-exit-path.patch ++++++
# Commit d463b005bbd6475ed930a302821efe239e1b2cf9
# Date 2013-03-04 10:19:34 +0100
# Author Jan Beulich <jbeulich@xxxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
x86: defer processing events on the NMI exit path

Otherwise, we may end up in the scheduler, keeping NMIs masked for a
possibly unbounded period of time (until whenever the next IRET gets
executed). Enforce timely event processing by sending a self IPI.

Of course it's open for discussion whether to always use the straight
exit path from handle_ist_exception.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/arch/x86/x86_32/entry.S
+++ b/xen/arch/x86/x86_32/entry.S
@@ -60,6 +60,7 @@
#include <asm/apicdef.h>
#include <asm/page.h>
#include <public/xen.h>
+#include <irq_vectors.h>

ALIGN
restore_all_guest:
@@ -561,6 +562,8 @@ ENTRY(early_page_fault)
jmp restore_all_xen
.popsection

+ENTRY(nmi)
+ pushl $TRAP_nmi<<16
handle_nmi_mce:
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
# NMI/MCE entry protocol is incompatible with guest kernel in ring 0.
@@ -581,7 +584,24 @@ handle_nmi_mce:
* cases we have put guest DS/ES on the guest stack frame, which will
* be detected by SAVE_ALL(), or we have rolled back restore_guest.
*/
- jmp ret_from_intr
+ cmpb $TRAP_nmi,UREGS_entry_vector(%esp)
+ jne ret_from_intr
+ /* We want to get straight to the IRET on the NMI exit path. */
+ GET_CURRENT(%ebx)
+ movl UREGS_eflags(%esp),%eax
+ movb UREGS_cs(%esp),%al
+ testl $(3|X86_EFLAGS_VM),%eax
+ jz restore_all_xen
+ /* Send an IPI to ourselves to cover for the lack of event checking. */
+ movl VCPU_processor(%ebx),%eax
+ shll $IRQSTAT_shift,%eax
+ cmpl $0,irq_stat(%eax)
+ je restore_all_guest
+ pushl $EVENT_CHECK_VECTOR
+ call send_IPI_self
+ addl $4,%esp
+ jmp restore_all_guest
+
.Lnmi_mce_xen:
/* Check the outer (guest) context for %ds/%es state validity. */
GET_CPUINFO_FIELD(CPUINFO_guest_cpu_user_regs,%ebx)
@@ -613,10 +633,6 @@ handle_nmi_mce:
jmp .Lnmi_mce_common
#endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */

-ENTRY(nmi)
- pushl $TRAP_nmi<<16
- jmp handle_nmi_mce
-
ENTRY(machine_check)
pushl $TRAP_machine_check<<16
jmp handle_nmi_mce
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -171,7 +171,7 @@ compat_bad_hypercall:
jmp compat_test_all_events

/* %rbx: struct vcpu, interrupts disabled */
-compat_restore_all_guest:
+ENTRY(compat_restore_all_guest)
ASSERT_INTERRUPTS_DISABLED
RESTORE_ALL
addq $8,%rsp
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -11,6 +11,7 @@
#include <asm/apicdef.h>
#include <asm/page.h>
#include <public/xen.h>
+#include <irq_vectors.h>

ALIGN
/* %rbx: struct vcpu */
@@ -617,6 +618,9 @@ ENTRY(early_page_fault)
jmp restore_all_xen
.popsection

+ENTRY(nmi)
+ pushq $0
+ movl $TRAP_nmi,4(%rsp)
handle_ist_exception:
SAVE_ALL
testb $3,UREGS_cs(%rsp)
@@ -631,12 +635,25 @@ handle_ist_exception:
movl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
callq *(%rdx,%rax,8)
- jmp ret_from_intr
+ cmpb $TRAP_nmi,UREGS_entry_vector(%rsp)
+ jne ret_from_intr

-ENTRY(nmi)
- pushq $0
- movl $TRAP_nmi,4(%rsp)
- jmp handle_ist_exception
+ /* We want to get straight to the IRET on the NMI exit path. */
+ testb $3,UREGS_cs(%rsp)
+ jz restore_all_xen
+ GET_CURRENT(%rbx)
+ /* Send an IPI to ourselves to cover for the lack of event checking. */
+ movl VCPU_processor(%rbx),%eax
+ shll $IRQSTAT_shift,%eax
+ leaq irq_stat(%rip),%rcx
+ cmpl $0,(%rcx,%rax,1)
+ je 1f
+ movl $EVENT_CHECK_VECTOR,%edi
+ call send_IPI_self
+1: movq VCPU_domain(%rbx),%rax
+ cmpb $0,DOMAIN_is_32bit_pv(%rax)
+ je restore_all_guest
+ jmp compat_restore_all_guest

ENTRY(machine_check)
pushq $0
++++++ 26683-credit1-Use-atomic-bit-operations-for-the-flags-structure.patch
++++++
# Commit be6507509454adf3bb5a50b9406c88504e996d5a
# Date 2013-03-04 13:37:39 +0100
# Author George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
credit1: Use atomic bit operations for the flags structure

The flags structure is not protected by locks (or more precisely,
it is protected using an inconsistent set of locks); we therefore need
to make sure that all accesses are atomic-safe. This is particulary
important in the case of the PARKED flag, which if clobbered while
changing the YIELD bit will leave a vcpu wedged in an offline state.

Using the atomic bitops also requires us to change the size of the "flags"
element.

Spotted-by: Igor Pavlikevich <ipavlikevich@xxxxxxxxx>
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -58,8 +58,8 @@
/*
* Flags
*/
-#define CSCHED_FLAG_VCPU_PARKED 0x0001 /* VCPU over capped credits */
-#define CSCHED_FLAG_VCPU_YIELD 0x0002 /* VCPU yielding */
+#define CSCHED_FLAG_VCPU_PARKED 0x0 /* VCPU over capped credits */
+#define CSCHED_FLAG_VCPU_YIELD 0x1 /* VCPU yielding */


/*
@@ -132,7 +132,7 @@ struct csched_vcpu {
struct vcpu *vcpu;
atomic_t credit;
s_time_t start_time; /* When we were scheduled (used for credit) */
- uint16_t flags;
+ unsigned flags;
int16_t pri;
#ifdef CSCHED_STATS
struct {
@@ -214,7 +214,7 @@ __runq_insert(unsigned int cpu, struct c
/* If the vcpu yielded, try to put it behind one lower-priority
* runnable vcpu if we can. The next runq_sort will bring it forward
* within 30ms if the queue too long. */
- if ( svc->flags & CSCHED_FLAG_VCPU_YIELD
+ if ( test_bit(CSCHED_FLAG_VCPU_YIELD, &svc->flags)
&& __runq_elem(iter)->pri > CSCHED_PRI_IDLE )
{
iter=iter->next;
@@ -776,7 +776,7 @@ csched_vcpu_wake(const struct scheduler
* those.
*/
if ( svc->pri == CSCHED_PRI_TS_UNDER &&
- !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
+ !test_bit(CSCHED_FLAG_VCPU_PARKED, &svc->flags) )
{
svc->pri = CSCHED_PRI_TS_BOOST;
}
@@ -789,12 +789,12 @@ csched_vcpu_wake(const struct scheduler
static void
csched_vcpu_yield(const struct scheduler *ops, struct vcpu *vc)
{
- struct csched_vcpu * const sv = CSCHED_VCPU(vc);
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);

if ( !sched_credit_default_yield )
{
/* Let the scheduler know that this vcpu is trying to yield */
- sv->flags |= CSCHED_FLAG_VCPU_YIELD;
+ set_bit(CSCHED_FLAG_VCPU_YIELD, &svc->flags);
}
}

@@ -1122,11 +1122,10 @@ csched_acct(void* dummy)
/* Park running VCPUs of capped-out domains */
if ( sdom->cap != 0U &&
credit < -credit_cap &&
- !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
+ !test_and_set_bit(CSCHED_FLAG_VCPU_PARKED, &svc->flags) )
{
CSCHED_STAT_CRANK(vcpu_park);
vcpu_pause_nosync(svc->vcpu);
- svc->flags |= CSCHED_FLAG_VCPU_PARKED;
}

/* Lower bound on credits */
@@ -1142,7 +1141,7 @@ csched_acct(void* dummy)
svc->pri = CSCHED_PRI_TS_UNDER;

/* Unpark any capped domains whose credits go positive */
- if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
+ if ( test_and_clear_bit(CSCHED_FLAG_VCPU_PARKED, &svc->flags) )
{
/*
* It's important to unset the flag AFTER the unpause()
@@ -1151,7 +1150,6 @@ csched_acct(void* dummy)
*/
CSCHED_STAT_CRANK(vcpu_unpark);
vcpu_unpause(svc->vcpu);
- svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
}

/* Upper bound on credits means VCPU stops earning */
@@ -1410,8 +1408,7 @@ csched_schedule(
/*
* Clear YIELD flag before scheduling out
*/
- if ( scurr->flags & CSCHED_FLAG_VCPU_YIELD )
- scurr->flags &= ~(CSCHED_FLAG_VCPU_YIELD);
+ clear_bit(CSCHED_FLAG_VCPU_YIELD, &scurr->flags);

/*
* SMP Load balance:
++++++ 26686-xentrace-fix-off-by-one-in-calculate_tbuf_size.patch ++++++
# Commit d9fb28ae6d41c8201482948660e52889481830dd
# Date 2013-03-04 13:42:17 +0100
# Author Olaf Hering <olaf@xxxxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
xentrace: fix off-by-one in calculate_tbuf_size

Commit "xentrace: reduce trace buffer size to something mfn_offset can
reach" contains an off-by-one bug. max_mfn_offset needs to be reduced by
exactly the value of t_info_first_offset.

If the system has two cpus and the number of requested trace pages is
very large, the final number of trace pages + the offset will not fit
into a short. As a result the variable offset in alloc_trace_bufs() will
wrap while allocating buffers for the second cpu. Later
share_xen_page_with_privileged_guests() will be called with a wrong page
and the ASSERT in this function triggers. If the ASSERT is ignored by
running a non-dbg hypervisor the asserts in xentrace itself trigger
because "cons" is not aligned because the very last trace page for the
second cpu is a random mfn.

Thanks to Jan for the quick analysis.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

--- a/xen/common/trace.c
+++ b/xen/common/trace.c
@@ -133,7 +133,7 @@ static int calculate_tbuf_size(unsigned
* The array of mfns for the highest cpu can start at the maximum value
* mfn_offset can hold. So reduce the number of cpus and also the
mfn_offset.
*/
- max_mfn_offset -= t_info_first_offset - 1;
+ max_mfn_offset -= t_info_first_offset;
max_cpus--;
if ( max_cpus )
max_mfn_offset /= max_cpus;
++++++ 26689-fix-domain-unlocking-in-some-xsm-error-paths.patch ++++++
# Commit 9581c4f9a55372a21e759cd449cb676d0e8feddb
# Date 2013-03-06 17:10:26 +0100
# Author Matthew Daley <mattjd@xxxxxxxxx>
# Committer Jan Beulich <jbeulich@xxxxxxxx>
fix domain unlocking in some xsm error paths

A couple of xsm error/access-denied code paths in hypercalls neglect to
unlock a previously locked domain. Fix by ensuring the domains are
unlocked correctly.

Signed-off-by: Matthew Daley <mattjd@xxxxxxxxx>
Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2262,7 +2262,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDL
rc = xsm_grant_setup(current->domain, d);
if ( rc ) {
op.status = GNTST_permission_denied;
- goto out1;
+ goto out2;
}

gt = d->grant_table;
++++++ 32on64-extra-mem.patch ++++++
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2982,7 +2982,7 @@ class XendDomainInfo:

self.guest_bitsize = self.image.getBitSize()
# Make sure there's enough RAM available for the domain
- balloon.free(memory + shadow + vtd_mem, self)
+ balloon.free(memory + shadow + vtd_mem + 512, self)

# Set up the shadow memory
shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
++++++ CVE-2012-6075-xsa41.patch ++++++
Subject: e1000: Discard packets that are too long if !SBP and !LPE
From: Michael Contreras michael@xxxxxxxxxxx Sun Dec 2 20:11:22 2012 -0800
Date: Wed Jan 16 14:12:40 2013 +0000:
Git: b4e9b8169dedc0bcf0d3abe07642f761ac70aeea

The e1000_receive function for the e1000 needs to discard packets longer than
1522 bytes if the SBP and LPE flags are disabled. The linux driver assumes
this behavior and allocates memory based on this assumption.

Signed-off-by: Michael Contreras <michael@xxxxxxxxxxx>
Signed-off-by: Anthony Liguori <aliguori@xxxxxxxxxx>

Subject: e1000: Discard oversized packets based on SBP|LPE
From: Michael Contreras <michael@xxxxxxxxxxx>
Date: Wed, 5 Dec 2012 18:31:30 +0000 (-0500)

e1000: Discard oversized packets based on SBP|LPE

Discard packets longer than 16384 when !SBP to match the hardware behavior.

Signed-off-by: Michael Contreras <michael@xxxxxxxxxxx>
Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxx>

[ This is a security vulnerability, CVE-2012-6075 / XSA-41. ]
(cherry picked from commit 4c2cae2a882db4d2a231b27b3b31a5bbec6dacbf)

Index: xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
===================================================================
--- xen-4.2.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -55,6 +55,11 @@ static int debugflags = DBGBIT(TXERR) |
#define REG_IOADDR 0x0
#define REG_IODATA 0x4

+/* this is the size past which hardware will drop packets when setting LPE=0 */
+#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
+/* this is the size past which hardware will drop packets when setting LPE=1 */
+#define MAXIMUM_ETHERNET_LPE_SIZE 16384
+
/*
* HW models:
* E1000_DEV_ID_82540EM works with Windows and Linux
@@ -628,6 +633,14 @@ e1000_receive(void *opaque, const uint8_
return;
}

+ /* Discard oversized packets if !LPE and !SBP. */
+ if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
+ (size > MAXIMUM_ETHERNET_VLAN_SIZE
+ && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
+ && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
+ return;
+ }
+
if (!receive_filter(s, buf, size))
return;

Index: xen-4.2.1-testing/tools/qemu-xen-dir-remote/hw/e1000.c
===================================================================
--- xen-4.2.1-testing.orig/tools/qemu-xen-dir-remote/hw/e1000.c
+++ xen-4.2.1-testing/tools/qemu-xen-dir-remote/hw/e1000.c
@@ -59,6 +59,11 @@ static int debugflags = DBGBIT(TXERR) |
#define PNPMMIO_SIZE 0x20000
#define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */

+/* this is the size past which hardware will drop packets when setting LPE=0 */
+#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
+/* this is the size past which hardware will drop packets when setting LPE=1 */
+#define MAXIMUM_ETHERNET_LPE_SIZE 16384
+
/*
* HW models:
* E1000_DEV_ID_82540EM works with Windows and Linux
@@ -693,6 +698,14 @@ e1000_receive(VLANClientState *nc, const
size = sizeof(min_buf);
}

+ /* Discard oversized packets if !LPE and !SBP. */
+ if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
+ (size > MAXIMUM_ETHERNET_VLAN_SIZE
+ && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
+ && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
+ return size;
+ }
+
if (!receive_filter(s, buf, size))
return size;

++++++ CVE-2013-0151-xsa34.patch ++++++
References: CVE-2013-0151 XSA-34 bnc#797285

x86_32: don't allow use of nested HVM

There are (indirect) uses of map_domain_page() in the nested HVM code
that are unsafe when not just using the 1:1 mapping.

This is XSA-34 / CVE-2013-0151.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3930,6 +3930,10 @@ long do_hvm_op(unsigned long op, XEN_GUE
rc = -EINVAL;
break;
case HVM_PARAM_NESTEDHVM:
+#ifdef __i386__
+ if ( a.value )
+ rc = -EINVAL;
+#else
if ( a.value > 1 )
rc = -EINVAL;
if ( !is_hvm_domain(d) )
@@ -3944,6 +3948,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
for_each_vcpu(d, v)
if ( rc == 0 )
rc = nestedhvm_vcpu_initialise(v);
+#endif
break;
case HVM_PARAM_BUFIOREQ_EVTCHN:
rc = -EINVAL;
++++++ README.SuSE ++++++
README for the Xen packages
===========================

This file contains SUSE-specific instructions and suggestions for using Xen.

For more in-depth documentation of using Xen on SUSE, consult the
virtualization chapter in the SLES or SUSE Linux manual, or read up-to-date
virtualization information, including a list of known issues, at
http://www.novell.com/documentation/vmserver/.

For more complete documentation on Xen itself, please install one of the
xen-doc-* packages and read the documentation installed into
/usr/share/doc/packages/xen/.


About
-----
Xen allows you to run multiple virtual machines on a single physical machine.

See the Xen homepage for more information:
http://www.cl.cam.ac.uk/research/srg/netos/xen/

If you want to use Xen, you need to install the Xen hypervisor and a number of
supporting packages. During the initial SUSE installation (or when installing
from YaST) check-mark the "Xen Virtual Machine Host Server" pattern. If,
instead, you wish to install Xen manually later, click on the "Install
Hypervisor and Tools" icon in YaST.

If you want to install and manage VMs graphically, be sure to install a
graphical desktop environment like KDE or GNOME. The following optional
packages are needed to manage VMs graphically. Note that "Install Hypervisor
and Tools" installs all the packages below:
vm-install (Optional, to install VMs)
virt-manager (Optional, to manage VMs graphically)
virt-viewer (Optional, to view VMs outside virt-manager)

Additional packages:
nbd-client (Optional, to access virtual disks stored on NBD servers)
open-iscsi (Optional, to access virtual disks stored on iSCSI targets)

You then need to reboot your machine. Instead of booting a normal Linux
kernel, you will boot the Xen hypervisor and a slightly changed Linux kernel.
This Linux kernel runs in the first virtual machine and will drive most of
your hardware.

This approach is called paravirtualization, since it is a partial
virtualization (the Linux kernel needs to be changed slightly, to make the
virtualization easier). It results in very good performance (consult
http://www.cl.cam.ac.uk/research/srg/netos/xen/performance.html) but has the
downside of unchanged operating systems not being supported. However, new
hardware features (e.g., Intel VT and AMD-V) are overcoming this limitation.


Terminology
-----------
The Xen open-source community has a number of terms that you should be
familiar with.

A "domain" is Xen's term for a virtual machine.

"Domain 0" is the first virtual machine. It can control all other virtual
machines. It also (usually) controls the physical hardware. A kernel used in
domain 0 may sometimes be referred to as a dom0 kernel.

"Domain U" is any virtual machine other than domain 0. The "U" indicates it
is unprivileged (that is, it cannot control other domains). A kernel used in
an unprivileged domain may be referred to as a domU kernel.

Novell documentation will use the more industry-standard term "virtual
machine", or "VM", rather than "domain" where possible. And to that end,
domain 0 will be called the "virtual machine server", since it essentially the
server on which the other VMs run. All other domains are simply "virtual
machines".

The acronym "HVM" refers to a hardware-assisted virtual machine. These are
VMs that have not been modified (e.g., Windows) and therefore need hardware
support such as Intel VT or AMD-V to run on Xen.


Kernels
-------
Xen supports two kinds of kernels: A privileged kernel (which boots the
machine, controls other VMs, and usually controls all your physical hardware)
and unprivileged kernels (which can't control other VMs, and usually don't need
drivers for physical hardware). The privileged kernel boots first (as the VM
server); an unprivileged kernel is used in all subsequent VMs.

The VM server takes control of the boot process after Xen has initialized the
CPU and the memory. This VM contains a privileged kernel and all the hardware
drivers.

For the other virtual machines, you usually don't need the hardware drivers.
(It is possible to hide a PCI device from the VM server and re-assign it to
another VM for direct access, but that is a more advanced topic.) Instead you
use virtual network and block device drivers in the unprivileged VMs to access
the physical network and block drivers in the VM server.

For simplicity, SUSE ships a single Xen-enabled Linux kernel, rather than
separate privileged and unprivileged kernels. As most of the hardware drivers
are modules anyway, using this kernel as an unprivileged kernel has very
little extra overhead.

The kernel is contained in the kernel-xen package, which you need to install to
use Xen.


Booting
-------
If you installed Xen during the initial SUSE installation, or installed one
of the kernel-xen* packages later, a "XEN" option should exist in your Grub
bootloader. Select that to boot SUSE on top of Xen.

If you want to add additional entries, or modify the existing ones, you will
have to edit Grub yourself. All Xen entries in the Grub configuration file
(usually /boot/grub/menu.lst) look something like this:

title XEN
root (hd0,5)
kernel /xen.gz
module /vmlinuz-xen <parameters>
module /initrd-xen

Replace (hd0,5) with the partition that holds your /boot directory in
grub-speak, e.g., hda1 -> (hd0,0) and sda5 -> (hd2,4).

Normally, xen.gz requires no parameters. If you want to add parameters,
see below.

Replace "<parameters>" with the kernel parameters that you want to pass to
your kernel. These should be very similar, if not identical, to those passed
to a normal kernel that you boot on bare iron.

Once you have booted this configuration successfully, you are running Xen with
a privileged kernel on top of it.


Xen Boot Parameters
-------------------
Normally, xen.gz requires no parameters. However, in special cases (such as
debugging or a dedicated VM server) you may wish to pass it parameters.

In particular in case of problems you may want to attach a serial terminal and
direct Xen to send its output not only to the screen, but also to that
terminal. In order to do so, add "console=vga,com<n> com<n>=<baud>" (without
the quotes and with <n> replaced by the serial port number - generally 1 or 2 -
and with <baud> replaced by the baud rate the serial terminal is configured
for) to the xen.gz line.

For a more complete discussion of possible parameters, see the user
documentation in the xen-doc-* packages.


Init scripts
------------
Before you can create additional VMs (or use any other xm command) xend must
be running. This init script is part of the xen-tools package, and it is
activated at installation time. You can (de)activate it using insserv. You
can also start it manually with "rcxend start".

The deprecated xendomains script is also shipped, but disabled by default. In
SLES 10 GA (xen 3.0.2) and older, this script allowed VMs to be started and
stopped automatically when the machine starts and stops. In SLES 10 SP1 (xen
3.0.4) and newer, the proper way to start and stop VMs automatically is to set
the "on_xend_start" and "on_xend_stop" settings in the VMs configuration.
(Deprecating xendomains was necessary because xend, not the configuration file
in /etc/xen/vm, is now the authoritative source for the VM's settings.)
Consult the online documentation for more information.


Creating a VM with vm-install
-----------------------------
The vm-install program (part of the vm-install package, and accessible
through YaST's Control Center) is the recommended method to create VMs. This
program handles creating both the VM's configuration file and disk(s). It can
help install any operating system, not just SUSE.

From the command line, run "vm-install". If the DISPLAY environment variable
is set and the supporting packages (python-gtk) are installed, a graphical
wizard will start. Otherwise, a text wizard will start.

Each VM needs to have its own root filesystem. The root filesystem can live
on a block device (e.g., a hard disk partition, or an LVM2 or EVMS volume) or
in a file that holds the filesystem image.

VMs can share filesystems, such as /usr or /opt, that are mounted read-only
from _all_ VMs. Never try to share a filesystem that is mounted read-write;
filesystem corruption will result. For sharing writable data between VMs, use
NFS or other networked or cluster filesystems.

When defining the virtual network adapter(s), we recommend using a static MAC
for the VM rather than allowing Xen to randomly select one each time the VM
boots. (See "Network Troubleshooting" below.) XenSource has been allocated a
range of MAC addresses with the OUI of 00-16-3E. By using MACs from this
range you can be sure they will not conflict with any physical adapters.

Once you have the VM configured, click "OK". The wizard will now create a
configuration file for the VM, and create a disk image. The disk image will
exist in /var/lib/xen/images, and a corresponding config file will exist in
/etc/xen/vm. The operating system's installation program will then run within
the VM.

When the VM shuts down (because the installation -- or at least the first
stage of it -- is done), the wizard finalizes the VM's configuration and
restarts the VM.

The creation of VMs can be automated; read the vm-install man page for more
details. The installation of an OS within the VM can be automated if the OS
supports it.


Creating a VM Manually
----------------------
If you create a VM manually (as opposed to using vm-install, which is the
recommended way), you will need to create a disk (or reuse an existing one)
and a configuration file.

If you are using a disk or disk image that is already installed with an
operating system, you'll probably need to replace its kernel with a
Xen-enabled kernel.

The kernel and ramdisk used to bootstrap the VM must match any kernel modules
that might be present in the VM's disk. It is possible to manually copy the
kernel and ramdisk from the VM's disk (for example, after updating the kernel
within that VM) to the VM server's filesystem. However, an easier (and less
error-prone) method is to use something called the "domUloader". Before a new
VM is started, this loader automatically copies the kernel and ramdisk into
the VM server's filesystem, so that it can be used to bootstrap the new VM.
See /etc/xen/examples/xmexample.domUloader for an example.

Next, make a copy of one of the /etc/xen/examples/* files, and modify it to
suit your needs. For paravirtualized VMs, start with
/etc/xen/examples/xmexample1; for fully virtualized VMs, start with
/etc/xen/examples/xmexample.hvm. You'll need to change (at very least) the
"name" and "disk" parameters.


Managing Virtual Machines
-------------------------
VMs can be managed from the command line or from virt-manager.

Before a VM can be started, xend must be informed of it. vm-install will
automatically import new VM configurations into xend. However, if you copy a
VM from another machine, or manually create a VM configuration file, you will
need to import it into xend with a command like:
xm new my-vm
If your VM's configuration file is not located in /etc/xen/vm, you must
specify the full path. This imports the configuration into xend (and
therefore virt-manager becomes aware of it, also).

Now to start the VM:
xm start my-vm
or start it graphically from virt-manager.

Have a look at running sessions with "xm list". Note the ID of the newly
created VM. Attach to the VM's text console with "xm console <ID>" (replacing
ID with the VM's ID). Attaching to multiple VM consoles is most conveniently
done with the terminal multiplexer "screen".

Have a look at the other xm commands by typing "xm help". Note that most xm
commands must be done as root.


Using the Mouse via VNC in Fully Virtual Mode
---------------------------------------------
In a fully virtualized VM, the mouse may be emulated as a PS/2 mouse, USB
mouse, or USB tablet. The vm-install tool selects the best emulation that is
known to be automatically detected and supported by the operating system.

However, when accessing some fully virtualized operating systems via VNC, the
mouse may be difficult to control if the VM is emulating a PS/2 mouse. PS/2
provides mouse deltas, but VNC only provides absolute coordinates. In such
cases, you may want to manually switch the operating system and VM to use a
USB tablet.

Emulation of a SummaSketch graphics tablet is provided for this reason. To
use the Summa emulation, you will need to configure your fully virtualized OS.
Note that the virtual tablet is connected to the second virtual serial port
(/dev/ttyS1 or COM2).

Most Linux distributions ship with appropriate drivers, and only need to be
configured. To configure gpm, edit /etc/sysconfig/mouse and add these lines:
MOUSETYPE="summa"
XMOUSETYPE="SUMMA"
DEVICE=/dev/ttyS1
The format and location of your configuration file could vary depending upon
your Linux distribution. The goal is to run the gpm daemon as follows:
gpm -t summa -m /dev/ttyS1
X also needs to be configured to use the Summa emulation. Add the following
stanza to /etc/X11/xorg.conf, or use your distribution's tools to add these
settings:
Section "InputDevice"
Identifier "Mouse0"
Driver "summa"
Option "Device" "/dev/ttyS1"
Option "InputFashion" "Tablet"
Option "Mode" "Absolute"
Option "Name" "EasyPen"
Option "Compatible" "True"
Option "Protocol" "Auto"
Option "SendCoreEvents" "on"
Option "Vendor" "GENIUS"
EndSection
After making these changes, restart gpm and X.

To ensure the VM is emulating a USB tablet, add these lines to the
configuration file in /etc/xen/vm:
usb=1
usbdevice='tablet'
Then re-import the configuration into xend:
xm new my-vm


HVM Console in Fully Virtual Mode
---------------------------------
When running a VM in fully virtual mode, a special console is available that
provides some additional ways to control the VM. Press Ctrl-Alt-2 to access
the console; press Ctrl-Alt-1 to return to the VM. While at the console,
type "help" for help.

The two most important commands are "send-key" and "change". The "send-key"
command allows you to send any key sequence to the VM, which might otherwise
be intercepted by your local window manager.

The "change" command allows the target of a block device to be changed; for
example, use it to change from one CD ISO to another. Some versions of Xen
have this command disabled for security reasons. Consult the online
documentation for workarounds.


Networking
----------
Your virtual machines become much more useful if you can reach them via the
network. Starting with openSUSE11.1 and SLE11, networking in domain 0 is
configured and managed via YaST. The yast2-networking module can be used
to create and manage bridged networks. During initial installation, a bridged
networking proposal will be presented if the "Xen Virtual Machine Host Server"
pattern is selected. The proposal will also be presented if you install Xen
after initial installation using the "Install Hypervisor and Tools" module in
YaST.

The default proposal creates a virtual bridge in domain 0 for each active
ethernet device, enslaving the device to the bridge. Consider a machine
containing two ethernet devices (eth0 and eth1), both with active carriers.
YaST will create br0 and br1, enslaving the eth0 and eth1 devices repectively.

VMs get a virtual network interface (e.g. eth0), which is visible in domain 0
as vifN.0 and connected to the bridge. This means that if you set up an IP
address in the VMs belonging to the same subnet as br0 from your domain 0,
you'll be able to communicate not only with the other slave VMs, but also with
domain 0 and with the external network. If you have a DHCP server running in
your network, your VMs should succeed in getting an IP address.

Be aware that this may have unwanted security implications. You may want to
opt for routing instead of bridging, so you can set up firewalling rules in
domain 0.

Please read about the network configuration in the Xen manual. You can set up
bridging or routing for other interfaces also.

For debugging, here's what happens on bootup of a domU:
- xenstored saves the device setup in xenstore
- domU is created
- vifN.0 shows up in domain 0 and a hotplug event is triggered
- hotplug is /sbin/udev; udev looks at /etc/udev/rules.d/40-xen.rules and
calls /etc/xen/scripts/vif-bridge online
- vif-bridge set the vifN.0 device up and enslaves it to the bridge
- eth0 shows up in domU (hotplug event triggered)
Similar things happen for block devices, except that /etc/xen/scripts/block is
called.

It's not recommended to use ifplugd nor NetworkManager for managing the
interfaces if you use bridging mode. Use routing with nat or proxy-arp
in that case. You also need to do that in case you want to send out packets
on wireless; you can't bridge Xen "ethernet" packets into 802.11 packets.


Thread-Local Storage
--------------------
For some time now, the glibc thread library (NPTL) has used a shortcut to
access thread-local variables at a negative segment offset from the segment
selector GS instead of reading the linear address from the TDB (offset 0).
Unfortunately, this optimization has been made the default by the glibc and
gcc maintainers, as it saves one indirection. For Xen this is bad: The access
to these variables will trap, and Xen will need to use some tricks to make the
access work. It does work, but it's very slow.

SUSE Linux 9.1 and SLES 9 were prior to this change, and thus are not
affected. SUSE Linux 9.2 and 9.3 are affected. For SUSE Linux 10.x and SLES
10, we have disabled negative segment references in gcc and glibc, and so
these are not affected. Other non-SUSE Linux distributions may be affected.

For affected distributions, one way to work around the problem is to rename
the /lib/tls directory, so the pre-i686 version gets used, where no such
tricks are done. An example LSB-compliant init script which automates these
steps is installed at /usr/share/doc/packages/xen/boot.xen. This script
renames /lib/tls when running on Xen, and restores it when not running on Xen.
Modify this script to work with your specific distribution.

Mono has a similar problem, but this has been fixed in SUSE Linux 10.1 and
SLES 10. Older or non-SUSE versions of Mono may have a performance impact.


Security
--------
Domain 0 has control over all domains. This means that care should be taken to
keep domain 0 safe; ideally you strip it down to only do as little there as
possible, preferably with no local users except for the system administrator.
Most commands in domain 0 can only be performed as root, but this protection
scheme only has moderate security and might be defeated. In case domain 0 is
compromised, all other domains are compromised as well.

To allow relocation of VMs (migration), the receiving machine listens on TCP
port 8002. You might want to put firewall rules in place in domain 0 to
restrict this to machines which you trust. You have some access control in
xend-config.sxp as well by tweaking the xend-relocation-hosts-allow
setting. Relocating VMs with sensitive data is not a good idea in untrusted
networks, since the data is not sent encrypted.

The memory protections for the domUs are effective; so far no way to break out
of a virtual machine is known. A VM is an effective jail.


Limitations
-----------
When booting, Linux reserves data structures matching the amount of RAM found.
This has the side-effect that you can't dynamically grow the memory beyond
what the kernel has been booted with. But you can trick domU Linux to prepare
for a larger amount of RAM by passing the mem= boot parameter.

The export of virtual hard disks from files in Xen can be handled via the
loopback driver (although in Xen >= 3.0.4, this is can be replaced by the
"blktap" user-space driver.) If you are still using loopback, it may be
possible to run out of loopback devices, as by default only 64 are supported.
You can change this by inserting:
options loop max_loop=128
into /etc/modprobe.conf.local in domain 0.


Network Troubleshooting
-----------------------
First ensure the VM server is configured correctly and can access the network.

Do not use ifplugd or NetworkManager, neither are bridge aware.

Specify a static virtual MAC in the VM's configuration file. Random MACs can
be problematic, since with each boot of the VM it appears that some hardware
has been removed (the previous random MAC) and new hardware is present (the
new random MAC). This can cause network configuration files (which were
intended for the old MAC) to not be matched up with the new virtual hardware.

In the VM's filesystem, ensure the ifcfg-eth* files are named appropriately.
For example, if you do decide to use a randomly-selected MAC for the VM, the
ifcfg-eth* file must not include the MAC in its name; name it generically
("ifcfg-eth0") instead. If you use a static virtual MAC for the VM, be sure
that is reflected in the file's name.


Troubleshooting
---------------
First try to get Linux running on bare iron before trying with Xen.

Be sure your Xen hypervisor (xen) and VM kernels (kernel-xen) are compatible.
The hypervisor and domain 0 kernel are a matched set, and usually must be
upgraded together. Consult the online documentation for a matrix of supported
32- and 64-bit combinations

On certain machines with 2GB or less of RAM, domain 0 Linux may fail to boot,
printing the following messages:
PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
...
Kernel panic - not syncing: PCI-DMA: Memory would be corrupted
Fix this by adding "swiotlb=16" to the Linux kernel command line, which
reserves additional memory for the swiotlb (the actual number to be used here
of course depends on the system configuration).

If you have trouble early in the boot, try passing pnpacpi=off to the Linux
kernel. If you have trouble with interrupts or timers, passing lapic to Xen
may help. Xen and Linux understand similar ACPI boot parameters. Try the
options acpi=off,force,strict,ht,noirq or acpi_skip_timer_override.

Other useful debugging options to Xen may be nosmp, noreboot, mem=1024M,
sync_console, noirqbalance (Dell). For a complete list of Xen boot options,
consult chapter 11.3 of the Xen users' manual.

If domain 0 Linux crashes on X11 startup, please try to boot into runlevel 3.

To debug Xen or domain 0 Linux crashes or hangs, it may be useful to use the
debug-enabled hypervisor, and/or to prevent automatic rebooting. Change your
Grub configuration from something like this:
kernel (hd0,5)/xen.gz
To something like this:
kernel (hd0,5)/xen-dbg.gz noreboot
After rebooting, the Xen hypervisor will write any error messages to the log
file (viewable with the "xm dmesg" command).

If problems persist, check if a newer version is available. Well-tested
versions will be shipped with SUSE and via YaST Online Update. More frequent
(but less supported) updates are available on Novell's Forge site:
http://forge.novell.com/modules/xfmod/project/?xenpreview


Upgrading the Host Operating System
-----------------------------------
When upgrading the host operating system from one major release to another
(for example, SLES 10 to SLES 11 or openSUSE 11.4 to openSUSE 12.1) or when
applying a service pack like SLES 11 SP2 to SLES 11 SP1 all running VMs must
be shut down before the upgrade process is begun.


Memory Ballooning in VMs
------------------------
Setting a VMs maximum memory value greater than the initial memory value
requires support for memory ballooning in the VMs operating system. Modern SLES
and openSUSE guests have this capabilitity built-in. Windows installation media
does not support memory ballooning so you must first install the VM without
memory ballooning (maxmem equal to initial memory). After the installation, the
Virtual Machine Driver Pack (vmdp) must be installed. After this, the VMs
maxmem value may be increased. A reboot of the VM is required for this action
to take effect.


Known Issues
------------
For a list of known issues and work-arounds, see
http://www.novell.com/documentation/vmserver/.


Disclaimer
----------
Xen performed amazingly well in our tests and proved very stable. Still, you
should be careful when using it, just like you'd be careful if you boot an
experimental kernel. Expect that it may not boot and be prepared to have a
fall-back solution for that scenario. Be prepared that it may not support all
of your hardware. And for the worst of all cases, have your most valuable
data backed up. (This is always a good idea, of course.)


Feedback
--------
In case you have remarks about, problems with, ideas for, or praise for Xen,
please report it back to the xen-devel list:
xen-devel@xxxxxxxxxxxxxxxxxxx
If you find issues with the packaging or setup done by Novell/SUSE, please
report it to:
http://www.suse.de/feedback/


ENJOY!
Your Novell SUSE Team.
++++++ VNC-Support-for-ExtendedKeyEvent-client-message.patch ++++++
From 9ca313aa0824f2d350a7a6c9b1ef6c47e0408f1d Mon Sep 17 00:00:00 2001
From: aliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
Date: Sat, 23 Aug 2008 23:27:37 +0000
Subject: [PATCH] VNC: Support for ExtendedKeyEvent client message

This patch adds support for the ExtendedKeyEvent client message. This message
allows a client to send raw scan codes directly to the server. If the client
and server are using the same keymap, then it's unnecessary to use the '-k'
option with QEMU when this extension is supported.

This is extension is currently only implemented by gtk-vnc based clients
(gvncviewer, virt-manager, vinagre, etc.).

Signed-off-by: Anthony Liguori <aliguori@xxxxxxxxxx>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5076
c046a42c-6fe2-441c-8c8c-71466251a162
---
vnc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 50 insertions(+), 9 deletions(-)

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1285,35 +1285,22 @@ static void press_key_altgr_down(VncStat
}
}

-static void do_key_event(VncState *vs, int down, uint32_t sym)
+static void do_key_event(VncState *vs, int down, int keycode, int sym, int
shift)
{
- int keycode;
int shift_keys = 0;
- int shift = 0;
int keypad = 0;
int altgr = 0;
int altgr_keys = 0;

if (is_graphic_console()) {
- if (sym >= 'A' && sym <= 'Z') {
- sym = sym - 'A' + 'a';
- shift = 1;
- }
- else {
+ if (!shift)
shift = keysym_is_shift(vs->kbd_layout, sym & 0xFFFF);
- }

altgr = keysym_is_altgr(vs->kbd_layout, sym & 0xFFFF);
}
shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36];
altgr_keys = vs->modifiers_state[0xb8];

- keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
- if (keycode == 0) {
- fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym);
- return;
- }
-
/* QEMU console switch */
switch(keycode) {
case 0x2a: /* Left Shift */
@@ -1445,7 +1432,25 @@ static void do_key_event(VncState *vs, i

static void key_event(VncState *vs, int down, uint32_t sym)
{
- do_key_event(vs, down, sym);
+ int keycode;
+ int shift = 0;
+
+ if (sym >= 'A' && sym <= 'Z' && is_graphic_console()) {
+ sym = sym - 'A' + 'a';
+ shift = 1;
+ }
+ keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
+ do_key_event(vs, down, keycode, sym, shift);
+}
+
+static void ext_key_event(VncState *vs, int down,
+ uint32_t sym, uint16_t keycode)
+{
+ /* if the user specifies a keyboard layout, always use it */
+ if (keyboard_layout)
+ key_event(vs, down, sym);
+ else
+ do_key_event(vs, down, keycode, sym, 0);
}

static void framebuffer_set_updated(VncState *vs, int x, int y, int w, int h)
@@ -1534,6 +1539,15 @@ static void framebuffer_update_request(V
qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock));
}

+static void send_ext_key_event_ack(VncState *vs)
+{
+ vnc_write_u8(vs, 0);
+ vnc_write_u8(vs, 0);
+ vnc_write_u16(vs, 1);
+ vnc_framebuffer_update(vs, 0, 0, ds_get_width(vs->ds),
ds_get_height(vs->ds), -258);
+ vnc_flush(vs);
+}
+
static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings)
{
int i;
@@ -1562,6 +1576,9 @@ static void set_encodings(VncState *vs,
case -257:
vs->has_pointer_type_change = 1;
break;
+ case -258:
+ send_ext_key_event_ack(vs);
+ break;
case 0x574D5669:
vs->has_WMVi = 1;
default:
@@ -1774,6 +1791,24 @@ static int protocol_client_msg(VncState

client_cut_text(vs, read_u32(data, 4), (char *)(data + 8));
break;
+ case 255:
+ if (len == 1)
+ return 2;
+
+ switch (read_u8(data, 1)) {
+ case 0:
+ if (len == 2)
+ return 12;
+
+ ext_key_event(vs, read_u16(data, 2),
+ read_u32(data, 4), read_u32(data, 8));
+ break;
+ default:
+ printf("Msg: %d\n", read_u16(data, 0));
+ vnc_client_error(vs);
+ break;
+ }
+ break;
default:
printf("Msg: %d\n", data[0]);
vnc_client_error(vs);
@@ -2445,10 +2480,11 @@ void vnc_display_init(DisplayState *ds)

vs->ds = ds;

- if (!keyboard_layout)
- keyboard_layout = "en-us";
+ if (keyboard_layout)
+ vs->kbd_layout = init_keyboard_layout(keyboard_layout);
+ else
+ vs->kbd_layout = init_keyboard_layout("en-us");

- vs->kbd_layout = init_keyboard_layout(keyboard_layout);
if (!vs->kbd_layout)
exit(1);
vs->modifiers_state[0x45] = 1; /* NumLock on - on boot */
++++++ altgr_2.patch ++++++
When access domU from Windows VNC client, spanish keyboard altgr key
doesn't work. According to log info, we found that the keycodes passed
from vncclient to qemu vncserver have something wrong. When altgr and "2"
pressed, keycodes vncserver receives are:
ALT_R down,
CTRL_L down,
CTRL_L up,
ATL_R up,
"2" down,
"2" up,
...
Since when send "2" down, there is no altgr modifier, the char displayed
on screen will be "2" but not "@".

To solve this problem, there is another patch applied by upstream which
sends an additional altgr modifier before "2" down in the above case.
It works well when domU is windows, but on sles10 sp3 domU, sometimes it
display "@" and sometimes it still displays "2", especially when press
altgr+2 continuously.

For the sles10 sp3 domU problem, maybe because there are two many alt_r (same
keycode as altgr on "es") up and down events and the domU OS couldn't handle
it well.

To furtherly solve this problem, I write this patch, when vncserver
is "es" and receives a alt_r keysym (this is already abnormal since "es" has
no alt_r), then treat the alt_r as alt_l. This can avoid too many altgr
keycodes up and down events and make sure the intentionally added altgr keycode
can take effect.

Signed-off by Chunyan Liu (cyliu@xxxxxxxxxx)

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1440,6 +1440,9 @@ static void key_event(VncState *vs, int
int keycode;
int shift = 0;

+ if ( sym == 0xffea && keyboard_layout && !strcmp(keyboard_layout,"es") )
+ sym = 0xffe9;
+
if (sym >= 'A' && sym <= 'Z' && is_graphic_console()) {
sym = sym - 'A' + 'a';
shift = 1;
++++++ baselibs.conf ++++++
xen-libs
++++++ bdrv_default_rwflag.patch ++++++
Subject: modify default read/write flag in bdrv_init.
Signed-off by Chunyan Liu <cyliu@xxxxxxxxxx>

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -2627,6 +2627,8 @@ int drive_init(struct drive_opt *arg, in
strncpy(drives_table[nb_drives].serial, serial, sizeof(serial));
nb_drives++;

+ bdrv_flags = BDRV_O_RDWR;
+
switch(type) {
case IF_IDE:
case IF_XEN:
@@ -2640,6 +2642,7 @@ int drive_init(struct drive_opt *arg, in
break;
case MEDIA_CDROM:
bdrv_set_type_hint(bdrv, BDRV_TYPE_CDROM);
+ bdrv_flags &= ~BDRV_O_RDWR;
break;
}
break;
@@ -2660,7 +2663,6 @@ int drive_init(struct drive_opt *arg, in
}
if (!file[0])
return -2;
- bdrv_flags = 0;
if (snapshot) {
bdrv_flags |= BDRV_O_SNAPSHOT;
cache = 2; /* always use write-back with snapshot */
++++++ bdrv_open2_fix_flags.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.c
@@ -350,7 +350,7 @@ int bdrv_file_open(BlockDriverState **pb

int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
{
- return bdrv_open2(bs, filename, flags, NULL);
+ return bdrv_open2(bs, filename, flags|BDRV_O_RDWR, NULL);
}

int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
@@ -419,12 +419,13 @@ int bdrv_open2(BlockDriverState *bs, con
}
bs->drv = drv;
bs->opaque = qemu_mallocz(drv->instance_size);
- /* Note: for compatibility, we open disk image files as RDWR, and
- RDONLY as fallback */
if (!(flags & BDRV_O_FILE))
- open_flags = (flags & BDRV_O_ACCESS) | (flags & BDRV_O_CACHE_MASK);
+ open_flags = flags;
else
open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
+ if (!(open_flags & BDRV_O_RDWR))
+ bs->read_only = 1;
+
ret = drv->bdrv_open(bs, filename, open_flags);
if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/usb-msd.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/usb-msd.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/usb-msd.c
@@ -551,7 +551,7 @@ USBDevice *usb_msd_init(const char *file
s = qemu_mallocz(sizeof(MSDState));

bdrv = bdrv_new("usb");
- if (bdrv_open2(bdrv, filename, 0, drv) < 0)
+ if (bdrv_open2(bdrv, filename, BDRV_O_RDWR, drv) < 0)
goto fail;
s->bs = bdrv;
*pbs = bdrv;
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-img.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-img.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-img.c
@@ -32,7 +32,7 @@
#endif

/* Default to cache=writeback as data integrity is not important for qemu-tcg.
*/
-#define BRDV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_O_FLAGS BDRV_O_CACHE_WB

static void QEMU_NORETURN error(const char *fmt, ...)
{
@@ -185,7 +185,7 @@ static int read_password(char *buf, int
#endif

static BlockDriverState *bdrv_new_open(const char *filename,
- const char *fmt)
+ const char *fmt, int flags)
{
BlockDriverState *bs;
BlockDriver *drv;
@@ -201,7 +201,7 @@ static BlockDriverState *bdrv_new_open(c
} else {
drv = &bdrv_raw;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, flags, drv) < 0) {
error("Could not open '%s'", filename);
}
if (bdrv_is_encrypted(bs)) {
@@ -253,7 +253,7 @@ static int img_create(int argc, char **a
size = 0;
if (base_filename) {
BlockDriverState *bs;
- bs = bdrv_new_open(base_filename, NULL);
+ bs = bdrv_new_open(base_filename, NULL, BDRV_O_RDWR);
bdrv_get_geometry(bs, &size);
size *= 512;
bdrv_delete(bs);
@@ -332,7 +332,7 @@ static int img_commit(int argc, char **a
} else {
drv = NULL;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_RDWR, drv) < 0) {
error("Could not open '%s'", filename);
}
ret = bdrv_commit(bs);
@@ -455,7 +455,8 @@ static int img_convert(int argc, char **

total_sectors = 0;
for (bs_i = 0; bs_i < bs_n; bs_i++) {
- bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt);
+ bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt,
+ BDRV_O_CACHE_WB|BDRV_O_RDONLY);
if (!bs[bs_i])
error("Could not open '%s'", argv[optind + bs_i]);
bdrv_get_geometry(bs[bs_i], &bs_sectors);
@@ -483,7 +484,7 @@ static int img_convert(int argc, char **
}
}

- out_bs = bdrv_new_open(out_filename, out_fmt);
+ out_bs = bdrv_new_open(out_filename, out_fmt, BDRV_O_CACHE_WB|BDRV_O_RDWR);

bs_i = 0;
bs_offset = 0;
@@ -706,7 +707,7 @@ static int img_info(int argc, char **arg
} else {
drv = NULL;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_FLAGS|BDRV_O_RDWR, drv) < 0) {
error("Could not open '%s'", filename);
}
bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
@@ -810,7 +811,7 @@ static void img_snapshot(int argc, char
if (!bs)
error("Not enough memory");

- if (bdrv_open2(bs, filename, 0, NULL) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_RDWR, NULL) < 0) {
error("Could not open '%s'", filename);
}

++++++ bdrv_open2_flags_2.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -225,6 +225,7 @@ static int open_disk(struct td_state *s,
BlockDriver* drv;
char* devname;
static int devnumber = 0;
+ int flags = readonly ? BDRV_O_RDONLY : BDRV_O_RDWR;
int i;

DPRINTF("Opening %s as blktap%d\n", path, devnumber);
@@ -247,7 +248,7 @@ static int open_disk(struct td_state *s,
DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");

/* Open the image */
- if (bdrv_open2(bs, path, 0, drv) != 0) {
+ if (bdrv_open2(bs, path, flags, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -133,7 +133,8 @@ static void insert_media(void *opaque)
else
format = &bdrv_raw;

- bdrv_open2(bs, media_filename[i], 0, format);
+ /* Temporary BDRV_O_RDWR */
+ bdrv_open2(bs, media_filename[i], BDRV_O_RDWR, format);
#ifdef CONFIG_STUBDOM
{
char *buf, *backend, *params_path, *params;
@@ -508,7 +509,8 @@ void xenstore_parse_domain_config(int hv
}

for (i = 0; i < num; i++) {
- format = NULL; /* don't know what the format is yet */
+ flags = 0;
+ format = NULL; /* don't know what the format is yet */
/* read the backend path */
xenstore_get_backend_path(&bpath, "vbd", danger_path, hvm_domid,
e_danger[i]);
if (bpath == NULL)
@@ -594,6 +596,17 @@ void xenstore_parse_domain_config(int hv
format = &bdrv_raw;
}

+ /* read the mode of the device */
+ if (pasprintf(&buf, "%s/mode", bpath) == -1)
+ continue;
+ free(mode);
+ mode = xs_read(xsh, XBT_NULL, buf, &len);
+
+ if (!strcmp(mode, "r") || !strcmp(mode, "ro"))
+ flags |= BDRV_O_RDONLY;
+ if (!strcmp(mode, "w") || !strcmp(mode, "rw"))
+ flags |= BDRV_O_RDWR;
+
#if 0
/* Phantom VBDs are disabled because the use of paths
* from guest-controlled areas in xenstore is unsafe.
@@ -661,7 +674,7 @@ void xenstore_parse_domain_config(int hv
#ifdef CONFIG_STUBDOM
if (pasprintf(&danger_buf, "%s/device/vbd/%s", danger_path,
e_danger[i]) == -1)
continue;
- if (bdrv_open2(bs, danger_buf, BDRV_O_CACHE_WB /* snapshot and
write-back */, &bdrv_raw) == 0) {
+ if (bdrv_open2(bs, danger_buf, flags|BDRV_O_CACHE_WB /* snapshot and
write-back */, &bdrv_raw) == 0) {
if (pasprintf(&buf, "%s/params", bpath) == -1)
continue;
free(params);
++++++ blktap-close-fifos.patch ++++++
Index: xen-4.2.0-testing/tools/blktap/drivers/blktapctrl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-4.2.0-testing/tools/blktap/drivers/blktapctrl.c
@@ -282,7 +282,7 @@ static int del_disktype(blkif_t *blkif)
* qemu-dm instance. We may close the file handle only if there is
* no other disk left for this domain.
*/
- if (dtypes[type]->use_ioemu)
+ if (dtypes[type]->use_ioemu && dtypes[type]->idnum != DISK_TYPE_AIO)
return !qemu_instance_has_disks(blkif->tappid);

/* Caller should close() if no single controller, or list is empty. */
++++++ blktap-disable-debug-printf.patch ++++++
Index: xen-4.2.0-testing/tools/blktap/drivers/blktapctrl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-4.2.0-testing/tools/blktap/drivers/blktapctrl.c
@@ -61,6 +61,9 @@
#include "list.h"
#include "xs_api.h" /* for xs_fire_next_watch() */

+#undef DPRINTF
+#define DPRINTF(_f, _a...) ((void)0)
+
#define PIDFILE "/var/run/blktapctrl.pid"

#define NUM_POLL_FDS 2
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -46,7 +46,7 @@
#define BLKTAP_CTRL_DIR "/var/run/tap"

/* If enabled, print debug messages to stderr */
-#if 1
+#if 0
#define DPRINTF(_f, _a...) fprintf(stderr, __FILE__ ":%d: " _f, __LINE__, ##_a)
#else
#define DPRINTF(_f, _a...) ((void)0)
++++++ blktap-pv-cdrom.patch ++++++
++++ 847 lines (skipped)

++++++ blktap.patch ++++++
bug #239173
bug #242953

Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3315,7 +3315,7 @@ class XendDomainInfo:
(fn, BOOTLOADER_LOOPBACK_DEVICE))

vbd = {
- 'mode': 'RO',
+ 'mode': 'RW',
'device': BOOTLOADER_LOOPBACK_DEVICE,
}

Index: xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.2.1-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -447,9 +447,9 @@ void xenstore_parse_domain_config(int hv
{
char **e_danger = NULL;
char *buf = NULL;
- char *fpath = NULL, *bpath = NULL,
+ char *fpath = NULL, *bpath = NULL, *btype = NULL,
*dev = NULL, *params = NULL, *drv = NULL;
- int i, ret;
+ int i, ret, is_tap;
unsigned int len, num, hd_index, pci_devid = 0;
BlockDriverState *bs;
BlockDriver *format;
@@ -486,6 +486,14 @@ void xenstore_parse_domain_config(int hv
e_danger[i]);
if (bpath == NULL)
continue;
+ /* check to see if type is tap or not */
+ if (pasprintf(&buf, "%s/type", bpath) == -1)
+ continue;
+ free(btype);
+ btype = xs_read(xsh, XBT_NULL, buf, &len);
+ if (btype == NULL)
+ continue;
+ is_tap = !strncmp(btype, "tap", 3);
/* read the name of the device */
if (pasprintf(&buf, "%s/dev", bpath) == -1)
continue;
@@ -777,6 +785,7 @@ void xenstore_parse_domain_config(int hv
free(mode);
free(params);
free(dev);
+ free(btype);
free(bpath);
free(buf);
free(danger_buf);
++++++ blktapctrl-default-to-ioemu.patch ++++++
Index: xen-4.2.0-testing/tools/blktap/drivers/tapdisk.h
===================================================================
--- xen-4.2.0-testing.orig/tools/blktap/drivers/tapdisk.h
+++ xen-4.2.0-testing/tools/blktap/drivers/tapdisk.h
@@ -168,7 +168,7 @@ static disk_info_t aio_disk = {
"raw image (aio)",
"aio",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_aio,
#endif
@@ -179,7 +179,7 @@ static disk_info_t sync_disk = {
"raw image (sync)",
"sync",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_sync,
#endif
@@ -190,7 +190,7 @@ static disk_info_t vmdk_disk = {
"vmware image (vmdk)",
"vmdk",
1,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_vmdk,
#endif
@@ -212,7 +212,7 @@ static disk_info_t qcow_disk = {
"qcow disk (qcow)",
"qcow",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_qcow,
#endif
@@ -223,7 +223,7 @@ static disk_info_t qcow2_disk = {
"qcow2 disk (qcow2)",
"qcow2",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_qcow2,
#endif
++++++ block-dmmd ++++++
#! /bin/bash

# Usage: block-dmmd [add args | remove args]
#
# the xm config file should have something like:
# dmmd:md;/dev/md0;md;/dev/md1;lvm;/dev/vg1/lv1
# or
# dmmd:lvm;/dev/vg1/lv1;lvm;/dev/vg1/lv2;md;/dev/md0
# note the last device will be used for VM

# History:
# 2009-06-09, mh@xxxxxxxxxx:
# Emit debugging messages into a temporary file; if no longer needed,
# just comment the exec I/O redirection below
# Make variables used in functions local to avoid global overridings
# Use vgscan and vgchange where required
# Use the C locale to avoid dealing with localized messages
# Assign output from assembling an MD device to a variable to aid
debugging

# We do not want to deal with localized messages:
LANG=C
LC_MESSAGES=C
export LANG LC_MESSAGES

dir=$(dirname "$0")
. "$dir/block-common.sh"

#exec >> /tmp/block-dmmd-`date +%F_%T.%N`.log 2>&1
#echo shell-flags: $-

command=$1

# We check for errors ourselves:
set +e

function run_mdadm()
{
local mdadm_cmd=$1
local msg
local rc

msg="`/sbin/mdadm $mdadm_cmd 2>&1`"
rc=$?
case "$msg" in
*"has been started"* | *"already active"* )
return 0
;;
*"is already in use"* )
# hmm, might be used by another device in this domU
# leave it to upper layers to detect a real error
return 2
;;
* )
return $rc
;;
esac
return 1
}

function activate_md()
{
local par=$1
local already_active=0 cfg dev rc t

if [ ${par} = ${par%%(*} ]; then
# No configuration file specified:
dev=$par
cfg=
else
dev=${par%%(*}
t=${par#*(}
cfg="-c ${t%%)*}"
fi
if /sbin/mdadm -Q -D $dev; then
already_active=1
fi
run_mdadm "-A $dev $cfg"
rc=$?
if [ $already_active -eq 1 ] && [ $rc -eq 2 ]; then
return 0
fi
return $rc
}

function deactivate_md()
{
local par=$1 # Make it explicitly local

## We need the device name only while deactivating
/sbin/mdadm -S ${par%%(*}
return $?
}

function activate_lvm()
{
# First scan for PVs and VGs; we may then have to activate the VG
# first, but can ignore errors:
# /sbin/pvscan || :
# /sbin/vgscan --mknodes || :
# /sbin/vgchange -ay ${1%/*} || :
/sbin/lvchange -ay $1
if [ $? -eq 0 ]; then
return 0
fi
return 1
}

function deactivate_lvm()
{
/sbin/lvchange -an $1
if [ $? -eq 0 ]; then
# We may have to deactivate the VG now, but can ignore errors:
# /sbin/vgchange -an ${1%/*} || :
# Maybe we need to cleanup the LVM cache:
# /sbin/vgscan --mknodes || :
return 0
fi
return 1
}

BP=100
SP=$BP
VBD=

declare -a stack
function push()
{
if [ -z "$1" ]; then
return
fi
let "SP -= 1"
stack[$SP]="${1}"
}

function pop()
{
VBD=

if [ "$SP" -eq "$BP" ]; then
return
fi

VBD=${stack[$SP]}
let "SP += 1"
}

function activate_dmmd()
{
case $1 in
md)
activate_md $2
return
;;
lvm)
activate_lvm $2
return
;;
esac
}

function deactivate_dmmd()
{
case "$1" in
md)
deactivate_md $2
return
;;
lvm)
deactivate_lvm $2
return
;;
esac
}

function cleanup_stack()
{
while [ 1 ]; do
pop
if [ -z "$VBD" ]; then
break
fi
deactivate_dmmd $VBD
done
}

function parse_par()
{
local ac par rc s t # Make these explicitly local vars

ac=$1
par="$2"

par="$par;"
while [ 1 ]; do
t=${par%%;*}
if [ -z "$t" ]; then
return 0
fi
par=${par#*;}

s=${par%%;*}
if [ -z "$s" ]; then
return 1
fi
par=${par#*;}

if [ "$ac" = "activate" ]; then
activate_dmmd $t $s
rc=$?
if [ $rc -ne 0 ]; then
return 1
fi
fi
push "$t $s"

done
}


case "$command" in
add)
p=`xenstore-read $XENBUS_PATH/params` || true
claim_lock "dmmd"
dmmd=$p
parse_par activate "$dmmd"
rc=$?
if [ $rc -ne 0 ]; then
cleanup_stack
release_lock "dmmd"
exit 1
fi
lastparam=${dmmd##*;}
usedevice=${lastparam%(*}
xenstore-write $XENBUS_PATH/node "$usedevice"
write_dev "$usedevice"
release_lock "dmmd"
exit 0
;;

remove)
p=`xenstore-read $XENBUS_PATH/params` || true
claim_lock "dmmd"
dmmd=$p
parse_par noactivate "$dmmd"
cleanup_stack
release_lock "dmmd"
exit 0
;;
esac
++++++ block-iscsi ++++++
#!/bin/bash

# Usage: block-iscsi [add tgtname | remove dev]
#
# This assumes you're running a correctly configured
# iscsi target (server) at the other end!
# Note that we assume that the passwords for discovery (if needed)
# are in /etc/iscsid.conf
# and the node session passwords (if required) in the
# open-iscsi database below /var/lib/open-iscsi/node.db
#
# (c) Kurt Garloff <kurt@xxxxxxxxxx>, 2006-09-04, GNU GPL
# Contributors: Jim Fehlig <jfehlig@xxxxxxxxxx>
# Stefan de Konink <skinkie@xxxxxxxxx>

dir=$(dirname "$0")
. "$dir/block-common.sh"

# echo "DBG:xen/scripts/block-iscsi $1 $2 XENBUS_PATH=$XENBUS_PATH $par $node"

find_sdev()
{
unset dev
for session in /sys/class/iscsi_session/session*; do
if [ "$1" = "`cat $session/targetname 2>/dev/null`" ]; then
dev=`basename $session/device/target*/*:0:*/block*/*`
return
fi
done
}

find_sdev_rev()
{
unset tgt
for session in /sys/class/iscsi_session/session*; do
dev=`basename $session/device/target*/*:0:*/block*/*`
if [ "$dev" = "$1" ]; then
tgt=`cat $session/targetname 2>/dev/null`
return
fi
done
}

case "$command" in
add)
# load modules and start iscsid
/etc/init.d/open-iscsi status >/dev/null 2>&1 ||
{ /etc/init.d/open-iscsi start >/dev/null 2>&1; sleep 1; }
par=`xenstore-read $XENBUS_PATH/params` || true
TGTID=`echo $par | sed "s/\/\///g"`
while read rec uuid; do
if [ "$uuid" = "$TGTID" ]; then
find_sdev $TGTID
if [ -z "$dev" ]; then
/sbin/iscsiadm -m node -T $uuid -p $rec --login || exit 2
sleep 4
find_sdev $TGTID
fi
xenstore-write $XENBUS_PATH/node /dev/$dev
write_dev /dev/$dev
exit 0
fi
done < <(/sbin/iscsiadm -m node)
exit 1
;;

remove)
node=`xenstore-read $XENBUS_PATH/node` || true
dev=$node; dev=${dev#/dev/}
find_sdev_rev $dev
if [ -x /sbin/blockdev -a -n "$node" ]; then blockdev --flushbufs "$node";
fi
test -z "$tgt" && exit 2
/sbin/iscsiadm -m node -T $tgt --logout
exit 1
;;
esac
++++++ block-nbd ++++++
#!/bin/sh

# Usage: block-nbd [bind server ctl_port |unbind node]
#
# The node argument to unbind is the name of the device node we are to
# unbind.
#
# This assumes you're running a correctly configured server at the other end!

dir=$(dirname "$0")
. "$dir/block-common.sh"

#set -x
par=`xenstore-read $XENBUS_PATH/params` || true
#echo $par

case "$command" in
add)
modprobe nbd
for dev in /dev/nbd*; do
if nbd-client $par $dev; then
xenstore-write $XENBUS_PATH/node $dev
write_dev $dev
exit 0
fi
done
exit 1
;;
remove)
node=`xenstore-read $XENBUS_PATH/node` || true
nbd-client -d $node
exit 0
;;
esac
++++++ block-npiv ++++++
#!/bin/bash

# Usage: block-npiv [add npiv | remove dev]

dir=$(dirname "$0")
. "$dir/block-npiv-common.sh"
. "$dir/block-common.sh"

#set -x
#command=$1

case "$command" in
add)
# Params is one big arg, with fields separated by hyphens:
# single path:
# FABRIC-VPWWPN-VPWWNN-TGTWWPN-LUN#
# multipath:
# {FABRIC1.FABRIC2}-{VPWWPN1.VPWWPN2.VPWWPN3}-VPWWNN-TGTWWPN-LUN#
# arg 2 - Fabric Name
# arg 3 - VPORT's WWPN
# arg 4 - VPORT's WWNN
# arg 5 - Target's WWPN
# arg 6 - LUN # on Target
# no wwn contains a leading 0x - it is a 16 character hex value
# You may want to optionally pick a specific adapter ?
par=`xenstore-read $XENBUS_PATH/params` || true
#par=$2
NPIVARGS=$par;
LUN=${NPIVARGS##*-*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $LUN = $NPIVARGS ; then exit 1; fi
TGTWWPN=${NPIVARGS##*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $TGTWWPN = $NPIVARGS ; then exit 1; fi
VPORTWWNN=${NPIVARGS##*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWNN = $NPIVARGS ; then exit 1; fi
VPORTWWPNS=${NPIVARGS##*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWPNS = $NPIVARGS ; then exit 1; fi
FABRICNMS=$NPIVARGS

# Ensure we compare everything using lower-case hex characters
TGTWWPN=`echo $TGTWWPN | tr A-Z a-z`
VPORTWWPNS=`echo $VPORTWWPNS | tr A-Z a-z |sed 's/[{.}]/ /g'`
VPORTWWNN=`echo $VPORTWWNN | tr A-Z a-z`
FABRICNMS=`echo $FABRICNMS | tr A-Z a-z |sed 's/[{.}]/ /g'`


claim_lock "npiv"
paths=0
for FABRICNM in $FABRICNMS; do
for VPORTWWPN in $VPORTWWPNS; do
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then
create_vport $FABRICNM $VPORTWWPN $VPORTWWNN
if [ $? -ne 0 ] ; then exit 2; fi
sleep 8
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 3; fi
fi
find_sdev $vhost $TGTWWPN $LUN
if test -z "$dev"; then
echo "- - -" > /sys/class/scsi_host/$vhost/scan
sleep 2
find_sdev $vhost $TGTWWPN $LUN
fi
if test -z "$dev"; then
exit 4
fi
paths=$(($paths+1))
done
done
release_lock "npiv"

if test $paths -gt 1; then
xenstore-write $XENBUS_PATH/multipath 1
/etc/init.d/multipathd start
if test $? -ne 0 ; then exit 4; fi
dm=`multipath -l /dev/$dev | grep dm | cut -f2 -d' '`
else
xenstore-write $XENBUS_PATH/multipath 0
dm=$dev
fi

if test ! -z "$dm"; then
xenstore-write $XENBUS_PATH/node /dev/$dm
write_dev /dev/$dm
exit 0
fi

exit 4
;;

remove)
node=`xenstore-read $XENBUS_PATH/node` || true
multipath=`xenstore-read $XENBUS_PATH/multipath` || true
# this is really screwy. the first delete of a lun will
# terminate the entire vport (all luns)
if test $multipath = 1; then
par=`xenstore-read $XENBUS_PATH/params` || true
NPIVARGS=$par;
FABRICNMS=${NPIVARGS%%-*}; NPIVARGS=${NPIVARGS#*-}
VPORTWWPNS=${NPIVARGS%%-*}
VPORTWWPNS=`echo $VPORTWWPNS | tr A-Z a-z |sed 's/[{.}]/ /g'`
FABRICNMS=`echo $FABRICNMS | tr A-Z a-z |sed 's/[{.}]/ /g'`
for FABRICNM in $FABRICNMS; do
for VPORTWWPN in $VPORTWWPNS; do
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 5; fi
flush_nodes_on_vhost $vhost
delete_vhost $vhost
done
done
else
dev=$node; dev=${dev#/dev/}
find_vhost_from_dev $dev
if test -z "$vhost" ; then exit 5; fi
flush_nodes_on_vhost $vhost
delete_vhost $vhost
fi

exit 0
;;
esac
++++++ block-npiv-common.sh ++++++


# Look for the NPIV vport with the WWPN
# $1 contains the WWPN (assumes it does not contain a leading "0x")
# $2 contains the FABRICNM (assumes it does not contain "0x")
find_vhost()
{
unset vhost

# look in upstream locations
for fchost in /sys/class/fc_vports/* ; do
if test -e $fchost/port_name ; then
wwpn=`cat $fchost/port_name | sed -e s/^0x//`
if test $wwpn = $1 ; then
# Note: makes the assumption the vport will always have an scsi_host
child
vhost=`ls -d $fchost/device/host*`
vhost=`basename $vhost`
fname=`cat /sys/class/fc_host/$vhost/fabric_name | sed -e s/^0x//`
if test $fname = $2 ; then
return
fi
fi
fi
done

# look in vendor-specific locations

# Emulex - just looks like another scsi_host - so look at fc_hosts...
for fchost in /sys/class/fc_host/* ; do
if test -e $fchost/port_name ; then
wwpn=`cat $fchost/port_name | sed -e s/^0x//`
if test $wwpn = $1 ; then
# Note: makes the assumption the vport will always have an scsi_host
child
vhost=`basename $fchost`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if test $fname = $2 ; then
return
fi
fi
fi
done
}


# Create a NPIV vport on the fabric w/ FABRICNM, with WWPN,WWNN
# $1 contains FABRICNM
# $2 contains the VPORT WWPN
# $3 contains the VPORT WWNN
# (assumes no name contains a leading "0x")
create_vport()
{
# find a base adapter with npiv support that is on the right fabric

# Look via upstream interfaces
for fchost in /sys/class/fc_host/* ; do
if test -e $fchost/vport_create ; then
# is the link up, w/ NPIV support ?
pstate=`cat $fchost/port_state`
ptype=`cat $fchost/port_type | cut -c 1-5`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then
vmax=`cat $fchost/max_npiv_vports`
vinuse=`cat $fchost/npiv_vports_inuse`
avail=`expr $vmax - $vinuse`
if [ $avail -gt 0 ] ; then
# create the vport
echo $2":"$3 > $fchost/vport_create
if [ $? -eq 0 ] ; then
return 0
fi
# failed - so we'll just look for the next adapter
fi
fi
fi
done

# Look in vendor-specific locations

# Emulex: interfaces mirror upstream, but are under adapter scsi_host
for shost in /sys/class/scsi_host/* ; do
if [ -e $shost/vport_create ] ; then
fchost=`ls -d $shost/device/fc_host*`
# is the link up, w/ NPIV support ?
if [ -e $fchost/port_state ] ; then
pstate=`cat $fchost/port_state`
ptype=`cat $fchost/port_type | cut -c 1-5`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then
vmax=`cat $shost/max_npiv_vports`
vinuse=`cat $shost/npiv_vports_inuse`
avail=`expr $vmax - $vinuse`
if [ $avail -gt 0 ] ; then
# create the vport
echo $2":"$3 > $shost/vport_create
if [ $? -eq 0 ] ; then
return 0
fi
# failed - so we'll just look for the next adapter
fi
fi
fi
fi
done

# BFA are under adapter scsi_host
for shost in /sys/class/scsi_host/* ; do
if [ -e $shost/vport_create ] ; then
fchost=`ls -d $shost/device/fc_host/*`
# is the link up, w/ NPIV support ?
if [ -e $fchost/port_state ] ; then
pstate=`cat $fchost/port_state`
ptype=`cat $fchost/port_type | cut -c 1-5`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then
# create the vport
echo $2":"$3 > $shost/vport_create
if [ $? -eq 0 ] ; then
return 0
fi
# failed - so we'll just look for the next adapter
fi
fi
fi
done

return 1
}


# Look for the LUN on the indicated scsi_host (which is an NPIV vport)
# $1 is the scsi_host name (normalized to simply the hostX name)
# $2 is the WWPN of the tgt port the lun is on
# Note: this implies we don't support a multipath'd lun, or we
# are explicitly identifying a "path"
# $3 is the LUN number of the scsi device
find_sdev()
{
unset dev
hostno=${1/*host/}
for sdev in /sys/class/scsi_device/${hostno}:*:$3 ; do
if test -e $sdev/device/../fc_trans*/target${hostno}*/port_name ; then
tgtwwpn=`cat $sdev/device/../fc_trans*/target${hostno}*/port_name | sed
-e s/^0x//`
if test $tgtwwpn = $2 ; then
if test -e $sdev/device/block* ; then
dev=`ls $sdev/device/block*`
dev=${dev##*/}
return
fi
fi
fi
done
}


# Look for the NPIV vhost based on a scsi "sdX" name
# $1 is the "sdX" name
find_vhost_from_dev()
{
unset vhost
hostno=`readlink /sys/block/$1/device`
hostno=${hostno##*/}
hostno=${hostno%%:*}
if test -z "$hostno" ; then return; fi
vhost="host"$hostno
}


# We're about to terminate a vhost based on a scsi device
# Flush all nodes on that vhost as they are about to go away
# $1 is the vhost
flush_nodes_on_vhost()
{
if test ! -x /sbin/blockdev ; then return; fi
hostno=${1/*host/}
for sdev in /sys/class/scsi_device/${hostno}:* ; do
if test -e $sdev/device/block* ; then
dev=`ls $sdev/device/block*`
dev="/dev/"$dev
if test -n "$dev"; then
blockdev --flushbufs $dev
fi
fi
done
}


# Terminate a NPIV vhost
# $1 is vhost
delete_vhost()
{
# use upstream interface
for vport in /sys/class/fc_vports/* ; do
if test -e $vport/device/$1 ; then
if test -e $vport/vport_delete ; then
echo "1" > $vport/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi
fi
done

# use vendor specific interface

# Emulex
if test -e /sys/class/fc_host/$1/device/../scsi_host*/lpfc_drvr_version ; then
shost=`ls -1d /sys/class/fc_host/$1/device/../scsi_host* | sed
s/.*scsi_host://`
vportwwpn=`cat /sys/class/fc_host/$1/port_name | sed s/^0x//`
vportwwnn=`cat /sys/class/fc_host/$1/node_name | sed s/^0x//`
echo "$vportwwpn:$vportwwnn" > /sys/class/scsi_host/$shost/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi

# Qlogic
if test -e /sys/class/fc_host/$1/device/../scsi_host*/driver_version ; then
shost=`ls -1d /sys/class/fc_host/$1/device/../scsi_host* | sed
s/.*scsi_host://`
vportwwpn=`cat /sys/class/fc_host/$1/port_name | sed s/^0x//`
vportwwnn=`cat /sys/class/fc_host/$1/node_name | sed s/^0x//`
echo "$vportwwpn:$vportwwnn" > /sys/class/scsi_host/$shost/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi

# BFA
if test -e /sys/class/fc_host/$1/device/../scsi_host/*/driver_name ; then
shost=`ls -1d /sys/class/fc_host/$1/device/../scsi_host/* | sed
s#.*scsi_host/##`
vportwwpn=`cat /sys/class/fc_host/$1/port_name | sed s/^0x//`
vportwwnn=`cat /sys/class/fc_host/$1/node_name | sed s/^0x//`
echo "$vportwwpn:$vportwwnn" > /sys/class/scsi_host/$shost/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi


exit 6
}


vport_status()
{
# Look via upstream interfaces
for fchost in /sys/class/fc_host/* ; do
if test -e $fchost/vport_create ; then
vport_status_display $fchost $fchost
fi
done

# Look in vendor-specific locations

# Emulex: interfaces mirror upstream, but are under adapter scsi_host
for shost in /sys/class/scsi_host/* ; do
if [ -e $shost/vport_create ] ; then
fchost=`ls -d $shost/device/fc_host*`
vport_status_display $fchost $shost
fi
done

return 0
}


vport_status_display()
{
echo
echo "fc_host: " $2
echo "port_state: " `cat $1/port_state`
echo "port_type: " `cat $1/port_type`
echo "fabric_name: " `cat $1/fabric_name`
echo "max_npiv_vports: " `cat $2/max_npiv_vports`
echo "npiv_vports_inuse: " `cat $2/npiv_vports_inuse`
echo "modeldesc: " `cat $2/modeldesc`
echo "speed: " `cat $1/speed`

return 0
}

++++++ block-npiv-vport ++++++
#!/bin/bash

# Usage: block-npiv-vport [create npivargs | delete vportwwpn | status]

dir=$(dirname "$0")
. "$dir/block-npiv-common.sh"

#set -x
command=$1
params=$2

case "$command" in
create)
# Params is one big arg, with fields separated by hyphens:
# FABRIC-VPWWPN-VPWWNN-TGTWWPN-LUN#
# arg 2 - Fabric Name
# arg 3 - VPORT's WWPN
# arg 4 - VPORT's WWNN
# arg 5 - Target's WWPN
# arg 6 - LUN # on Target
# no wwn contains a leading 0x - it is a 16 character hex value
# You may want to optionally pick a specific adapter ?
NPIVARGS=$params;
LUN=${NPIVARGS##*-*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $LUN = $NPIVARGS ; then exit 1; fi
TGTWWPN=${NPIVARGS##*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $TGTWWPN = $NPIVARGS ; then exit 1; fi
VPORTWWNN=${NPIVARGS##*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWNN = $NPIVARGS ; then exit 1; fi
VPORTWWPN=${NPIVARGS##*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWPN = $NPIVARGS ; then exit 1; fi
FABRICNM=$NPIVARGS

# Ensure we compare everything using lower-case hex characters
TGTWWPN=`echo $TGTWWPN | tr A-Z a-z`
VPORTWWPN=`echo $VPORTWWPN | tr A-Z a-z`
VPORTWWNN=`echo $VPORTWWNN | tr A-Z a-z`
FABRICNM=`echo $FABRICNM | tr A-Z a-z`

find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then
create_vport $FABRICNM $VPORTWWPN $VPORTWWNN
if [ $? -ne 0 ] ; then exit 2; fi
sleep 8
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 3; fi
fi

exit 0
;;

delete)
# Params is VPORT's WWPN
# no wwn contains a leading 0x - it is a 16 character hex value
VPORTWWPN=$params

# Ensure we compare everything using lower-case hex characters
VPORTWWPN=`echo $VPORTWWPN | tr A-Z a-z`

find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 4; fi
delete_vhost $vhost

exit 0
;;

status)
vport_status

exit 0
;;

*)
echo "Usage: block-npiv-vport [create npivargs | delete vportwwpn | status]"

exit 1
;;
esac

++++++ boot.local.xenU ++++++
#! /bin/sh
#
# Copyright (c) 1996 SuSE GmbH Nuernberg, Germany. All rights reserved.
#
# Author: Werner Fink <werner@xxxxxxx>, 1996
# Burchard Steinbild <bs@xxxxxxx>, 1996
#
# /etc/init.d/boot.local
#
# script with local commands to be executed from init on system startup
#
#
# Here you should add things, that should happen directly after booting
# before we're going to the first run level.
#

date

# echo "$MACHINE: running $0 $*"
my_REDIRECT="$(echo $REDIRECT | sed 's#^/dev/##')"
my_DEVICE="$(echo $my_REDIRECT | sed 's#^tty##')"
my_SPEED="$(stty speed)"
# echo REDIRECT $REDIRECT $my_REDIRECT
# echo my_DEVICE $my_DEVICE
# echo my_SPEED $my_SPEED

# compose a line like that for inittab
# S0:12345:respawn:/sbin/agetty -L 9600 ttyS0 vt102

case $my_REDIRECT in
ttyS*)
echo adding this line to inittab
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt102"
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt102" >> /etc/inittab
echo $my_REDIRECT >> /etc/securetty
;;

hvc*)
echo adding this line to inittab
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt320"
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt320" >> /etc/inittab
echo $my_REDIRECT >> /etc/securetty
;;

*)
echo "no modification in inittab needed for: $my_REDIRECT"
;;
esac

telinit q

# Changes for Xen
test -f /lib/modules/`uname -r`/modules.dep || depmod -ae

CMDLINE=`cat /proc/cmdline | grep 'ip='`
if test ! -z "$CMDLINE"; then
OLDIFS=$IFS
IFS=":"
read ip oth mask gw hostname dev dhcp rest < /proc/cmdline
IFS=$OLDIFS
hostname $hostname
ip=`echo $ip | sed 's/ip= *//'`
if test ! -z "$ip"; then
if test -z "$mask"; then
if [ ${ip%/*} = $ip ]; then
ip="$ip/27"
fi
echo "ip addr add $ip dev $dev"
ip addr add $ip dev $dev
ip link set $dev up
else
ifconfig add $ip netmask $mask $dev
fi
fi
if test "${dhcp#dhcp}" != "$dhcp"; then
ifup-dhcp $dev
fi
fi

++++++ boot.xen ++++++
#! /bin/sh
# Copyright (c) 2005-2006 SUSE Linux AG, Nuernberg, Germany.
# All rights reserved.
#
# /etc/init.d/boot.xen
#
# LSB compatible service control script; see http://www.linuxbase.org/spec/
#
### BEGIN INIT INFO
# Provides: Xen
# Required-Start: boot.localfs
# Should-Start: boot.localnet
# Required-Stop: boot.localfs
# Should-Stop:
# Default-Start: B
# Default-Stop:
# Short-Description: Switch on and off TLS depending on whether Xen is running
# Description: Xen gets a major performance hit by the way
# recent glibc (& gcc) set up the TLS offset, as it needs to
# play segmentation tricks. This can be avoided by moving away
# the tls libs.
### END INIT INFO

. /etc/rc.status

# Reset status of this service
rc_reset

case "$1" in
start)
echo -n "Starting Xen setup "
if test -d /proc/xen; then
export LD_ASSUME_KERNEL=2.4.21
echo -n "Xen running "
fi
if test -d /proc/xen -a -d /lib/tls; then
echo -n "move /lib/tls away "
mv /lib/tls /lib/tls.save
elif test ! -d /proc/xen -a -d /lib/tls.save; then
echo -n "move back /lib/tls "
mv /lib/tls.save /lib/tls
fi
rc_status -v
;;
stop)
# rc_status -v
;;
try-restart|condrestart)
$0 restart
# Remember status and be quiet
rc_status
;;
restart)
## Stop the service and regardless of whether it was
## running or not, start it again.
$0 start
# Remember status and be quiet
rc_status
;;
force-reload)
$0 try-restart
rc_status
;;
reload)
rc_failed 3
rc_status -v
;;
status)
echo -n "Checking for Xen "
# Return value is slightly different for the status command:
# 0 - service up and running
# 1 - service dead, but /var/run/ pid file exists
# 2 - service dead, but /var/lock/ lock file exists
# 3 - service not running (unused)
# 4 - service status unknown :-(
# 5--199 reserved (5--99 LSB, 100--149 distro, 150--199 appl.)

if test -d /proc/xen; then
if test -d /lib/tls; then
echo -n "Xen running, /lib/tls existing "
rc_failed 1
else
echo -n "Xen running, /lib/tls not existing "
fi
else
if test -d /lib/tls.save; then
echo -n "Xen not running, /lib/tls existing "
rc_failed 2
else
echo -n "Xen not running, /lib/tls not existing "
rc_failed 3
fi
fi
rc_status -v
;;
*)
echo "Usage: $0
{start|stop|status|try-restart|restart|force-reload|reload}"
exit 1
;;
esac
rc_exit
++++++ bridge-bonding.diff ++++++
Index: xen-4.2.0-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.2.0-testing/tools/hotplug/Linux/network-bridge
@@ -251,6 +251,9 @@ op_start () {

claim_lock "network-bridge"

+ local bonded=""
+ [ -e /sys/class/net/${netdev}/bonding ] && bonded="yes"
+
vlans=$(find_active_vlans "${netdev}")
for vlan in $vlans ; do ifdown $vlan ; done

@@ -268,18 +271,32 @@ op_start () {
ip link set ${netdev} down
ip addr flush ${netdev}
fi
- ip link set ${netdev} name ${pdev}
- ip link set ${tdev} name ${bridge}
-
- setup_physical_bridge_port ${pdev}

- # Restore slaves
- if [ -n "${slaves}" ]; then
- ip link set ${pdev} up
- ifenslave ${pdev} ${slaves}
+ if [ "x${bonded}" = "xyes" ]
+ then
+ ip link set ${tdev} name ${bridge}
+ ln -sf /etc/sysconfig/network/ifcfg-${netdev}
/etc/sysconfig/network/ifcfg-${pdev}
+ ifup ${pdev}
+ local gw=`ip route show dev ${pdev} | fgrep default | sed 's/default
via //'`
+ ip addr flush ${pdev}
+ rm -f /etc/sysconfig/network/ifcfg-${pdev}
+ brctl addif ${bridge} ${pdev}
+ ip link set ${bridge} up
+ [ -n "$gw" ] && ip route add default via ${gw}
+ else
+ ip link set ${netdev} name ${pdev}
+ ip link set ${tdev} name ${bridge}
+
+ _setup_bridge_port ${pdev}
+
+ # Restore slaves
+ if [ -n "${slaves}" ]; then
+ ip link set ${pdev} up
+ ifenslave ${pdev} ${slaves}
+ fi
+ add_to_bridge2 ${bridge} ${pdev}
+ do_ifup ${bridge}
fi
- add_to_bridge2 ${bridge} ${pdev}
- do_ifup ${bridge}

for vlan in $vlans ; do ifup $vlan ; done

++++++ bridge-opensuse.patch ++++++
Index: xen-4.2.0-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.2.0-testing/tools/hotplug/Linux/network-bridge
@@ -280,19 +280,19 @@ op_stop () {
transfer_addrs ${bridge} ${pdev}
if ! ifdown ${bridge}; then
get_ip_info ${bridge}
- fi
- ip link set ${pdev} down
- ip addr flush ${bridge}
+ ip link set ${pdev} down
+ ip addr flush ${bridge}

- brctl delif ${bridge} ${pdev}
- ip link set ${bridge} down
+ brctl delif ${bridge} ${pdev}
+ ip link set ${bridge} down

- ip link set ${bridge} name ${tdev}
+ ip link set ${bridge} name ${tdev}
+ brctl delbr ${tdev}
+ fi
+ ip link set ${pdev} down
ip link set ${pdev} name ${netdev}
do_ifup ${netdev}

- brctl delbr ${tdev}
-
release_lock "network-bridge"
}

++++++ bridge-record-creation.patch ++++++
Index: xen-4.2.0-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.2.0-testing/tools/hotplug/Linux/network-bridge
@@ -259,6 +259,11 @@ op_start () {

create_bridge ${tdev}

+ # Record creation of bridge in /dev/.sysconfig/network/xenbridges so other
+ # tools, e.g. yast2 lan, know that Xen bridging is active.
+ [ -d /dev/.sysconfig/network/xenbridges ] || mkdir
/dev/.sysconfig/network/xenbridges
+ touch /dev/.sysconfig/network/xenbridges/${bridge}
+
preiftransfer ${netdev}
transfer_addrs ${netdev} ${tdev}
# Remember slaves for bonding interface.
@@ -340,6 +345,13 @@ op_stop () {
ip link set ${pdev} name ${netdev}
do_ifup ${netdev}

+ # Remove record of bridge from /dev/.sysconfig/network/xenbridges ...
+ rm -f /dev/.sysconfig/network/xenbridges/${bridge}
+ # ... and directory itself if empty
+ if [ -z "$(ls -A /dev/.sysconfig/network/xenbridges 2>/dev/null)" ]; then
+ rmdir /dev/.sysconfig/network/xenbridges
+ fi
+
for vlan in $vlans ; do ifup $vlan ; done

release_lock "network-bridge"
++++++ bridge-vlan.diff ++++++
Index: xen-4.2.0-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.2.0-testing/tools/hotplug/Linux/network-bridge
@@ -195,6 +195,28 @@ antispoofing () {
iptables -A FORWARD -m physdev --physdev-in ${pdev} -j ACCEPT
}

+find_active_vlans() {
+ local netdev=$1
+ local vlan
+ local vlans
+ vlans=""
+ for vifcfg in /etc/sysconfig/network/ifcfg-vlan* ; do
+ vlan=${vifcfg/*\/ifcfg-}
+ if [ "$vlan" = "vlan*" ]; then
+ continue
+ fi
+ . $vifcfg
+ etherdevice="$ETHERDEVICE"
+ if [ -x /sbin/getcfg-interface ]; then
+ etherdevice=$(/sbin/getcfg-interface "$ETHERDEVICE")
+ fi
+ if [ "$ETHERDEVICE" = "$netdev" ] || [ "$etherdevice" = "$netdev" ] ;
then
+ link_exists "$vlan" && vlans="$vlans $vlan"
+ fi
+ done
+ echo "$vlans"
+}
+
# Usage: show_status dev bridge
# Print ifconfig and routes.
show_status () {
@@ -229,6 +251,9 @@ op_start () {

claim_lock "network-bridge"

+ vlans=$(find_active_vlans "${netdev}")
+ for vlan in $vlans ; do ifdown $vlan ; done
+
create_bridge ${tdev}

preiftransfer ${netdev}
@@ -256,6 +281,8 @@ op_start () {
add_to_bridge2 ${bridge} ${pdev}
do_ifup ${bridge}

+ for vlan in $vlans ; do ifup $vlan ; done
+
if [ ${antispoof} = 'yes' ] ; then
antispoofing
fi
@@ -277,6 +304,9 @@ op_stop () {

claim_lock "network-bridge"

+ vlans=$(find_active_vlans "${netdev}")
+ for vlan in $vlans ; do ifdown $vlan ; done
+
transfer_addrs ${bridge} ${pdev}
if ! ifdown ${bridge}; then
get_ip_info ${bridge}
@@ -293,6 +323,8 @@ op_stop () {
ip link set ${pdev} name ${netdev}
do_ifup ${netdev}

+ for vlan in $vlans ; do ifup $vlan ; done
+
release_lock "network-bridge"
}

++++++ build-tapdisk-ioemu.patch ++++++
From f1ebeae7802a5775422004f62630c42e46dcf664 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@xxxxxxx>
Date: Tue, 10 Mar 2009 16:32:40 +0100
Subject: [PATCH 3/6] ioemu: Build tapdisk-ioemu binary

When changing away from the old ioemu, changes in the Makefiles
resulted in tapdisk-ioemu appearing there, but actually not
being built. This patch re-enables the build of tapdisk-ioemu.

Signed-off-by: Kevin Wolf <kwolf@xxxxxxx>
---
Makefile | 22 +++++++++++++++-------
configure | 2 +-
qemu-tool.c | 2 +-
tapdisk-ioemu.c | 17 -----------------
4 files changed, 17 insertions(+), 26 deletions(-)

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/Makefile
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile
@@ -46,14 +46,6 @@ $(filter %-user,$(SUBDIR_RULES)): libqem

recurse-all: $(SUBDIR_RULES)

-CPPFLAGS += -I$(XEN_ROOT)/tools/libxc
-CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib
-CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore
-CPPFLAGS += -I$(XEN_ROOT)/tools/include
-
-tapdisk-ioemu: tapdisk-ioemu.c cutils.c block.c block-raw.c block-cow.c
block-qcow.c aes.c block-vmdk.c block-cloop.c block-dmg.c block-bochs.c
block-vpc.c block-vvfat.c block-qcow2.c hw/xen_blktap.c osdep.c
- $(CC) -DQEMU_TOOL $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) $(LDFLAGS)
$(BASE_LDFLAGS) -o $@ $^ -lz $(LIBS)
-
#######################################################################
# BLOCK_OBJS is code used by both qemu system emulation and qemu-img

@@ -72,6 +64,21 @@ endif
BLOCK_OBJS += block-raw-posix.o
endif

+#######################################################################
+# tapdisk-ioemu
+
+hw/tapdisk-xen_blktap.o: hw/xen_blktap.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $<
+tapdisk-ioemu.o: tapdisk-ioemu.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $<
+
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/libxc
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/include
+tapdisk-ioemu: tapdisk-ioemu.o $(BLOCK_OBJS) qemu-tool.o
hw/tapdisk-xen_blktap.o
+ $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS)
+
######################################################################
# libqemu_common.a: Target independent part of system emulation. The
# long term path is to suppress *all* target specific code in case of
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/configure
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/configure
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/configure
@@ -1511,7 +1511,7 @@ bsd)
;;
esac

-tools=
+tools="tapdisk-ioemu"
if test `expr "$target_list" : ".*softmmu.*"` != 0 ; then
tools="qemu-img\$(EXESUF) $tools"
if [ "$linux" = "yes" ] ; then
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-tool.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-tool.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-tool.c
@@ -68,7 +68,7 @@ void qemu_bh_delete(QEMUBH *bh)
qemu_free(bh);
}

-int qemu_set_fd_handler2(int fd,
+int __attribute__((weak)) qemu_set_fd_handler2(int fd,
IOCanRWHandler *fd_read_poll,
IOHandler *fd_read,
IOHandler *fd_write,
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
@@ -12,34 +12,12 @@

extern void qemu_aio_init(void);
extern void qemu_aio_poll(void);
-extern void bdrv_init(void);
-
-extern void *qemu_mallocz(size_t size);
-extern void qemu_free(void *ptr);

extern void *fd_start;

int domid = 0;
FILE* logfile;

-void term_printf(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
-}
-
-void term_print_filename(const char *filename)
-{
- term_printf(filename);
-}
-
-
-typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
-typedef int IOCanRWHandler(void *opaque);
-typedef void IOHandler(void *opaque);
-
typedef struct IOHandlerRecord {
int fd;
IOCanRWHandler *fd_read_poll;
@@ -103,7 +81,6 @@ int main(void)
logfile = stderr;

bdrv_init();
- qemu_aio_init();
init_blktap();

/* Daemonize */
@@ -115,8 +92,6 @@ int main(void)
* completed aio operations.
*/
while (1) {
- qemu_aio_poll();
-
max_fd = -1;
FD_ZERO(&rfds);
for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next)
++++++ capslock_enable.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1329,6 +1329,11 @@ static void do_key_event(VncState *vs, i
}
break;
case 0x3a: /* CapsLock */
+ if(!down){
+ vs->modifiers_state[keycode] ^= 1;
+ kbd_put_keycode(keycode | 0x80);
+ }
+ return;
case 0x45: /* NumLock */
if (down) {
kbd_put_keycode(keycode & 0x7f);
++++++ cdrom-removable.patch ++++++
Index: xen-4.2.0-testing/tools/python/xen/xend/server/HalDaemon.py
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/tools/python/xen/xend/server/HalDaemon.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+# -*- mode: python; -*-
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2007 Pat Campbell <plc@xxxxxxxxxx>
+# Copyright (C) 2007 Novell Inc.
+#============================================================================
+
+"""hald (Hardware Abstraction Layer Daemon) watcher for Xen management
+ of removable block device media.
+
+"""
+
+import gobject
+import dbus
+import dbus.glib
+import os
+import types
+import sys
+import signal
+import traceback
+from xen.xend.xenstore.xstransact import xstransact, complete
+from xen.xend.xenstore.xsutil import xshandle
+from xen.xend import PrettyPrint
+from xen.xend import XendLogging
+from xen.xend.XendLogging import log
+
+DEVICE_TYPES = ['vbd', 'tap']
+
+class HalDaemon:
+ """The Hald block device watcher for XEN
+ """
+
+ """Default path to the log file. """
+ logfile_default = "/var/log/xen/hald.log"
+
+ """Default level of information to be logged."""
+ loglevel_default = 'INFO'
+
+
+ def __init__(self):
+
+ XendLogging.init(self.logfile_default, self.loglevel_default)
+ log.debug( "%s", "__init__")
+
+ self.udi_dict = {}
+ self.debug = 0
+ self.dbpath = "/local/domain/0/backend"
+ self.bus = dbus.SystemBus()
+ self.hal_manager_obj = self.bus.get_object('org.freedesktop.Hal',
'/org/freedesktop/Hal/Manager')
+ self.hal_manager = dbus.Interface( self.hal_manager_obj,
'org.freedesktop.Hal.Manager')
+ self.gatherBlockDevices()
+ self.registerDeviceCallbacks()
+
+ def run(self):
+ log.debug( "%s", "In new run" );
+ try:
+ self.mainloop = gobject.MainLoop()
+ self.mainloop.run()
+ except KeyboardInterrupt, ex:
+ log.debug('Keyboard exception handler: %s', ex )
+ self.mainloop.quit()
+ except Exception, ex:
+ log.debug('Generic exception handler: %s', ex )
+ self.mainloop.quit()
+
+ def __del__(self):
+ log.debug( "%s", "In del " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def shutdown(self):
+ log.debug( "%s", "In shutdown now " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def stop(self):
+ log.debug( "%s", "In stop now " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def gatherBlockDevices(self):
+
+ # Get all the current devices from hal and save in a dictionary
+ try:
+ device_names = self.hal_manager.GetAllDevices()
+ i = 0;
+ for name in device_names:
+ #log.debug("device name, device=%s",name)
+ dev_obj = self.bus.get_object ('org.freedesktop.Hal', name)
+ dev = dbus.Interface (dev_obj, 'org.freedesktop.Hal.Device')
+ dev_properties =
dev_obj.GetAllProperties(dbus_interface="org.freedesktop.Hal.Device")
+ if dev_properties.has_key('block.device'):
+ dev_str = dev_properties['block.device']
+ dev_major = dev_properties['block.major']
+ dev_minor = dev_properties['block.minor']
+ udi_info = {}
+ udi_info['device'] = dev_str
+ udi_info['major'] = dev_major
+ udi_info['minor'] = dev_minor
+ udi_info['udi'] = name
+ self.udi_dict[i] = udi_info
+ i = i + 1
+ except Exception, ex:
+ print >>sys.stderr, 'Exception gathering block devices:', ex
+ log.warn("Exception gathering block devices (%s)",ex)
+
+ #
+ def registerDeviceCallbacks(self):
+ # setup the callbacks for when the gdl changes
+ self.hal_manager.connect_to_signal('DeviceAdded',
self.device_added_callback)
+ self.hal_manager.connect_to_signal('DeviceRemoved',
self.device_removed_callback)
+
+ #
+ def unRegisterDeviceCallbacks(self):
+ # setup the callbacks for when the gdl changes
+
self.hal_manager.remove_signal_receiver(self.device_added_callback,'DeviceAdded')
+
self.hal_manager.remove_signal_receiver(self.device_removed_callback,'DeviceRemoved')
+
+ #
+ def device_removed_callback(self,udi):
+ log.debug('UDI %s was removed',udi)
+ self.show_dict(self.udi_dict)
+ for key in self.udi_dict:
+ udi_info = self.udi_dict[key]
+ if udi_info['udi'] == udi:
+ device = udi_info['device']
+ major = udi_info['major']
+ minor = udi_info['minor']
+ self.change_xenstore( "remove", device, major, minor)
+
+ # Adds device to dictionary if not already there
+ def device_added_callback(self,udi):
+ log.debug('UDI %s was added', udi)
+ self.show_dict(self.udi_dict)
+ dev_obj = self.bus.get_object ('org.freedesktop.Hal', udi)
+ dev = dbus.Interface (dev_obj, 'org.freedesktop.Hal.Device')
+ device = dev.GetProperty ('block.device')
+ major = dev.GetProperty ('block.major')
+ minor = dev.GetProperty ('block.minor')
+ udi_info = {}
+ udi_info['device'] = device
+ udi_info['major'] = major
+ udi_info['minor'] = minor
+ udi_info['udi'] = udi
+ already = 0
+ cnt = 0;
+ for key in self.udi_dict:
+ info = self.udi_dict[key]
+ if info['udi'] == udi:
+ already = 1
+ break
+ cnt = cnt + 1
+ if already == 0:
+ self.udi_dict[cnt] = udi_info;
+ log.debug('UDI %s was added, device:%s major:%s minor:%s
index:%d\n', udi, device, major, minor, cnt)
+ self.change_xenstore( "add", device, major, minor)
+
+ # Debug helper, shows dictionary contents
+ def show_dict(self,dict=None):
+ if self.debug == 0 :
+ return
+ if dict == None :
+ dict = self.udi_dict
+ for key in dict:
+ log.debug('udi_info %s udi_info:%s',key,dict[key])
+
+ # Set or clear xenstore media-present depending on the action argument
+ # for every vbd that has this block device
+ def change_xenstore(self,action, device, major, minor):
+ for type in DEVICE_TYPES:
+ path = self.dbpath + '/' + type
+ domains = xstransact.List(path)
+ log.debug('domains: %s', domains)
+ for domain in domains: # for each domain
+ devices = xstransact.List( path + '/' + domain)
+ log.debug('devices: %s',devices)
+ for device in devices: # for each vbd device
+ str = device.split('/')
+ vbd_type = None;
+ vbd_physical_device = None
+ vbd_media = None
+ vbd_device_path = path + '/' + domain + '/' + device
+ listing = xstransact.List(vbd_device_path)
+ for entry in listing: # for each entry
+ item = path + '/' + entry
+ value = xstransact.Read( vbd_device_path + '/' + entry)
+ log.debug('%s=%s',item,value)
+ if item.find('media-present') != -1:
+ vbd_media = item;
+ vbd_media_path = item
+ if item.find('physical-device') != -1:
+ vbd_physical_device = value;
+ if item.find('type') != -1:
+ vbd_type = value;
+ if vbd_type is not None and vbd_physical_device is not None
and vbd_media is not None :
+ inode = vbd_physical_device.split(':')
+ imajor = parse_hex(inode[0])
+ iminor = parse_hex(inode[1])
+ log.debug("action:%s major:%s- minor:%s- imajor:%s-
iminor:%s- inode: %s",
+ action,major,minor, imajor, iminor, inode)
+ if int(imajor) == int(major) and int(iminor) ==
int(minor):
+ if action == "add":
+ xs_dict = {'media': "1"}
+ xstransact.Write(vbd_device_path,
'media-present', "1" )
+ log.debug("wrote xenstore media-present 1
path:%s",vbd_media_path)
+ else:
+ xstransact.Write(vbd_device_path,
'media-present', "0" )
+ log.debug("wrote xenstore media 0
path:%s",vbd_media_path)
+
+def mylog( fmt, *args):
+ f = open('/tmp/haldaemon.log', 'a')
+ print >>f, "HalDaemon ", fmt % args
+ f.close()
+
+
+def parse_hex(val):
+ try:
+ if isinstance(val, types.StringTypes):
+ return int(val, 16)
+ else:
+ return val
+ except ValueError:
+ return None
+
+if __name__ == "__main__":
+ watcher = HalDaemon()
+ watcher.run()
+ print 'Falling off end'
+
+
Index: xen-4.2.0-testing/tools/python/xen/xend/server/Hald.py
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/tools/python/xen/xend/server/Hald.py
@@ -0,0 +1,125 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2007 Pat Campbell <plc@xxxxxxxxxx>
+# Copyright (C) 2007 Novell Inc.
+#============================================================================
+
+import errno
+import types
+import os
+import sys
+import time
+import signal
+from traceback import print_exc
+
+from xen.xend.XendLogging import log
+
+class Hald:
+ def __init__(self):
+ self.ready = False
+ self.running = True
+
+ def run(self):
+ """Starts the HalDaemon process
+ """
+ self.ready = True
+ try:
+ myfile = self.find("xen/xend/server/HalDaemon.py")
+ args = (["python", myfile ])
+ self.pid = self.daemonize("python", args )
+ #log.debug( "%s %s pid:%d", "Hald.py starting ", args, self.pid )
+ except:
+ self.pid = -1
+ log.debug("Unable to start HalDaemon process")
+
+ def shutdown(self):
+ """Shutdown the HalDaemon process
+ """
+ log.debug("%s pid:%d", "Hald.shutdown()", self.pid)
+ self.running = False
+ self.ready = False
+ if self.pid != -1:
+ try:
+ os.kill(self.pid, signal.SIGINT)
+ except:
+ print_exc()
+
+ def daemonize(self,prog, args):
+ """Runs a program as a daemon with the list of arguments. Returns the
PID
+ of the daemonized program, or returns 0 on error.
+ Copied from xm/create.py instead of importing to reduce coupling
+ """
+ r, w = os.pipe()
+ pid = os.fork()
+
+ if pid == 0:
+ os.close(r)
+ w = os.fdopen(w, 'w')
+ os.setsid()
+ try:
+ pid2 = os.fork()
+ except:
+ pid2 = None
+ if pid2 == 0:
+ os.chdir("/")
+ env = os.environ.copy()
+ env['PYTHONPATH'] = self.getpythonpath()
+ for fd in range(0, 256):
+ try:
+ os.close(fd)
+ except:
+ pass
+ os.open("/dev/null", os.O_RDWR)
+ os.dup2(0, 1)
+ os.dup2(0, 2)
+ os.execvpe(prog, args, env)
+ os._exit(1)
+ else:
+ w.write(str(pid2 or 0))
+ w.close()
+ os._exit(0)
+ os.close(w)
+ r = os.fdopen(r)
+ daemon_pid = int(r.read())
+ r.close()
+ os.waitpid(pid, 0)
+ #log.debug( "daemon_pid: %d", daemon_pid )
+ return daemon_pid
+
+ def getpythonpath(self):
+ str = " "
+ for p in sys.path:
+ if str != " ":
+ str = str + ":" + p
+ else:
+ if str != "":
+ str = p
+ return str
+
+ def find(self,path, matchFunc=os.path.isfile):
+ """Find a module in the sys.path
+ From web page:
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52224
+ """
+ for dirname in sys.path:
+ candidate = os.path.join(dirname, path)
+ if matchFunc(candidate):
+ return candidate
+ raise Error("Can't find file %s" % path)
+
+if __name__ == "__main__":
+ watcher = Hald()
+ watcher.run()
+ time.sleep(10)
+ watcher.shutdown()
Index: xen-4.2.0-testing/tools/python/xen/xend/server/SrvServer.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/server/SrvServer.py
+++ xen-4.2.0-testing/tools/python/xen/xend/server/SrvServer.py
@@ -57,6 +57,7 @@ from xen.web.SrvDir import SrvDir

from SrvRoot import SrvRoot
from XMLRPCServer import XMLRPCServer
+from xen.xend.server.Hald import Hald

xoptions = XendOptions.instance()

@@ -252,6 +253,8 @@ def _loadConfig(servers, root, reload):
if xoptions.get_xend_unix_xmlrpc_server():
servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False))

+ servers.add(Hald())
+

def create():
root = SrvDir()
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -18,6 +18,7 @@
#include "exec-all.h"
#include "sysemu.h"

+#include "console.h"
#include "hw.h"
#include "pci.h"
#include "qemu-timer.h"
@@ -595,6 +596,21 @@ void xenstore_parse_domain_config(int hv
#endif

bs = bdrv_new(dev);
+
+ /* if cdrom physical put a watch on media-present */
+ if (bdrv_get_type_hint(bs) == BDRV_TYPE_CDROM) {
+ if (drv && !strcmp(drv, "phy")) {
+ if (pasprintf(&buf, "%s/media-present", bpath) != -1) {
+ if (bdrv_is_inserted(bs))
+ xs_write(xsh, XBT_NULL, buf, "1", strlen("1"));
+ else {
+ xs_write(xsh, XBT_NULL, buf, "0", strlen("0"));
+ }
+ xs_watch(xsh, buf, "media-present");
+ }
+ }
+ }
+
/* check if it is a cdrom */
if (danger_type && !strcmp(danger_type, "cdrom")) {
bdrv_set_type_hint(bs, BDRV_TYPE_CDROM);
@@ -1030,6 +1046,50 @@ static void xenstore_process_vcpu_set_ev
return;
}

+static void xenstore_process_media_change_event(char **vec)
+{
+ char *media_present = NULL;
+ unsigned int len;
+
+ media_present = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len);
+
+ if (media_present) {
+ BlockDriverState *bs;
+ char *buf = NULL, *cp = NULL, *path = NULL, *dev = NULL;
+
+ path = strdup(vec[XS_WATCH_PATH]);
+ cp = strstr(path, "media-present");
+ if (cp){
+ *(cp-1) = '\0';
+ pasprintf(&buf, "%s/dev", path);
+ dev = xs_read(xsh, XBT_NULL, buf, &len);
+ if (dev) {
+ if ( !strncmp(dev, "xvd", 3)) {
+ memmove(dev, dev+1, strlen(dev));
+ dev[0] = 'h';
+ dev[1] = 'd';
+ }
+ bs = bdrv_find(dev);
+ if (!bs) {
+ term_printf("device not found\n");
+ return;
+ }
+ if (strcmp(media_present, "0") == 0 && bs) {
+ bdrv_close(bs);
+ }
+ else if (strcmp(media_present, "1") == 0 &&
+ bs != NULL && bs->drv == NULL) {
+ if (bdrv_open(bs, bs->filename, 0 /* snapshot */) < 0) {
+ fprintf(logfile, "%s() qemu: could not open cdrom disk
'%s'\n",
+ __func__, bs->filename);
+ }
+ bs->media_changed = 1;
+ }
+ }
+ }
+ }
+}
+
void xenstore_process_event(void *opaque)
{
char **vec, *offset, *bpath = NULL, *buf = NULL, *drv = NULL, *image =
NULL;
@@ -1065,6 +1125,11 @@ void xenstore_process_event(void *opaque
xenstore_watch_callbacks[i].cb(vec[XS_WATCH_TOKEN],
xenstore_watch_callbacks[i].opaque);

+ if (!strcmp(vec[XS_WATCH_TOKEN], "media-present")) {
+ xenstore_process_media_change_event(vec);
+ goto out;
+ }
+
hd_index = drive_name_to_index(vec[XS_WATCH_TOKEN]);
if (hd_index == -1) {
fprintf(stderr,"medium change watch on `%s' -"
++++++ change-vnc-passwd.patch ++++++
Add support of change-vnc-password while vm is running.

Signed-off-by: Chunyan Liu <cyliu@xxxxxxxxxx>

Index: xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.2.1-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -200,7 +200,7 @@ DriveInfo drives_table[MAX_DRIVES+1];
int nb_drives;
enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
int vga_ram_size;
-static DisplayState *display_state;
+DisplayState *display_state;
int nographic;
static int curses;
static int sdl;
Index: xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.1-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -2627,6 +2627,7 @@ int vnc_display_password(DisplayState *d
if (password && password[0]) {
if (!(vs->password = qemu_strdup(password)))
return -1;
+ vs->auth = VNC_AUTH_VNC;
}

return 0;
Index: xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.2.1-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -24,6 +24,7 @@
#include "qemu-timer.h"
#include "qemu-xen.h"

+extern DisplayState *display_state;
struct xs_handle *xsh = NULL;
static char *media_filename[MAX_DRIVES+1];
static QEMUTimer *insert_timer = NULL;
@@ -939,6 +940,19 @@ static void xenstore_process_dm_command_
} else if (!strncmp(command, "continue", len)) {
fprintf(logfile, "dm-command: continue after state save\n");
xen_pause_requested = 0;
+ } else if (!strncmp(command, "chgvncpasswd", len)) {
+ fprintf(logfile, "dm-command: change vnc passwd\n");
+ if (pasprintf(&path,
+ "/local/domain/0/backend/vfb/%u/0/vncpasswd", domid) == -1) {
+ fprintf(logfile, "out of memory reading dm command parameter\n");
+ goto out;
+ }
+ par = xs_read(xsh, XBT_NULL, path, &len);
+ if (!par)
+ goto out;
+ if (vnc_display_password(display_state, par) == 0)
+ xenstore_record_dm_state("vncpasswdchged");
+ free(par);
} else if (!strncmp(command, "usb-add", len)) {
fprintf(logfile, "dm-command: usb-add a usb device\n");
if (pasprintf(&path,
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1504,6 +1504,20 @@ class XendDomainInfo:
target = max_target
self.setMemoryTarget(target)

+ def chgvncpasswd(self, passwd):
+ if self._stateGet() != DOM_STATE_HALTED:
+ path = '/local/domain/0/backend/vfb/%u/0/' % self.getDomid()
+ xstransact.Write(path, 'vncpasswd', passwd)
+ self.image.signalDeviceModel("chgvncpasswd", "vncpasswdchged")
+
+ for dev_uuid, (dev_type, dev_info) in self.info['devices'].items():
+ if dev_type == 'vfb':
+ dev_info['vncpasswd'] = passwd
+ dev_info['other_config']['vncpasswd'] = passwd
+ self.info.device_update(dev_uuid, cfg_xenapi = dev_info)
+ break
+ xen.xend.XendDomain.instance().managed_config_save(self)
+
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
@param target: In MiB.
Index: xen-4.2.1-testing/tools/python/xen/xend/server/XMLRPCServer.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/server/XMLRPCServer.py
+++ xen-4.2.1-testing/tools/python/xen/xend/server/XMLRPCServer.py
@@ -95,7 +95,7 @@ methods = ['device_create', 'device_conf
'destroyDevice','getDeviceSxprs',
'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown',
'send_sysrq', 'getVCPUInfo', 'waitForDevices',
- 'getRestartCount', 'getBlockDeviceClass']
+ 'getRestartCount', 'getBlockDeviceClass', 'chgvncpasswd']

exclude = ['domain_create', 'domain_restore']

Index: xen-4.2.1-testing/tools/python/xen/xm/main.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xm/main.py
+++ xen-4.2.1-testing/tools/python/xen/xm/main.py
@@ -21,6 +21,7 @@

"""Grand unified management application for Xen.
"""
+import getpass
import atexit
import cmd
import os
@@ -281,6 +282,9 @@ SUBCOMMAND_HELP = {
'getenforce' : ('', 'Returns the current enforcing mode for the Flask
XSM module (Enforcing,Permissive)'),
'setenforce' : ('[ (Enforcing|1) | (Permissive|0) ]',
'Modifies the current enforcing mode for the Flask XSM
module'),
+ #change vnc password
+ 'change-vnc-passwd' : ('<Domain>',\
+ 'Change vnc password'),
}

SUBCOMMAND_OPTIONS = {
@@ -406,6 +410,7 @@ common_commands = [
"usb-del",
"domstate",
"vcpu-set",
+ "change-vnc-passwd",
]

domain_commands = [
@@ -443,6 +448,7 @@ domain_commands = [
"vcpu-list",
"vcpu-pin",
"vcpu-set",
+ "change-vnc-passwd",
]

host_commands = [
@@ -3806,6 +3812,10 @@ def xm_cpupool_migrate(args):
else:
server.xend.cpu_pool.migrate(domname, poolname)

+def xm_chgvncpasswd(args):
+ arg_check(args, "change-vnc-passwd", 1)
+ pwd = getpass.getpass("Enter new password: ")
+ server.xend.domain.chgvncpasswd(args[0], pwd)

commands = {
"shell": xm_shell,
@@ -3914,6 +3924,8 @@ commands = {
"usb-del": xm_usb_del,
#domstate
"domstate": xm_domstate,
+ #change vnc password:
+ "change-vnc-passwd": xm_chgvncpasswd,
}

## The commands supported by a separate argument parser in xend.xm.
++++++ change_home_server.patch ++++++
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3153,6 +3153,11 @@ class XendDomainInfo:
self._cleanup_phantom_devs(paths)
self._cleanupVm()

+ if "change_home_server" in self.info:
+ chs = self.info["change_home_server"]
+ if (type(chs) is str and chs == "False") or \
+ (type(chs) is bool and chs is False):
+ self.setChangeHomeServer(None)
if ("transient" in self.info["other_config"] and \
bool(self.info["other_config"]["transient"])) or \
("change_home_server" in self.info and \
++++++ check_device_status.patch ++++++
Improve check_device_status to handle HA cases

In HA environment, sometimes xenstore status has changed but ev.wait() cannot
get the signal, it will wait until timeout, thus incorrect device status is
returned. To fix this problem, we do not depend on ev.wait() result, but read
xenstore directly to get correct device status.

Index: xen-4.2.0-testing/tools/python/xen/xend/server/DevController.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/server/DevController.py
+++ xen-4.2.0-testing/tools/python/xen/xend/server/DevController.py
@@ -149,7 +149,10 @@ class DevController:
(status, err) = self.waitForBackend(devid)

if status == Timeout:
- self.destroyDevice(devid, False)
+ #Clean timeout backend resource
+ dev = self.convertToDeviceNumber(devid)
+ self.writeBackend(dev, HOTPLUG_STATUS_NODE, HOTPLUG_STATUS_ERROR)
+ self.destroyDevice(devid, True)
raise VmError("Device %s (%s) could not be connected. "
"Hotplug scripts not working." %
(devid, self.deviceClass))
@@ -554,7 +557,17 @@ class DevController:

xswatch(statusPath, hotplugStatusCallback, ev, result)

- ev.wait(DEVICE_CREATE_TIMEOUT)
+ for i in range(1, 50):
+ ev.wait(DEVICE_CREATE_TIMEOUT/50)
+ status = xstransact.Read(statusPath)
+ if status is not None:
+ if status == HOTPLUG_STATUS_ERROR:
+ result['status'] = Error
+ elif status == HOTPLUG_STATUS_BUSY:
+ result['status'] = Busy
+ else:
+ result['status'] = Connected
+ break

err = xstransact.Read(backpath, HOTPLUG_ERROR_NODE)

@@ -571,7 +584,12 @@ class DevController:

xswatch(statusPath, deviceDestroyCallback, ev, result)

- ev.wait(DEVICE_DESTROY_TIMEOUT)
+ for i in range(1, 50):
+ ev.wait(DEVICE_DESTROY_TIMEOUT/50)
+ status = xstransact.Read(statusPath)
+ if status is None:
+ result['status'] = Disconnected
+ break

return result['status']

++++++ checkpoint-rename.patch ++++++
Index: xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendCheckpoint.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
@@ -185,7 +185,7 @@ def save(fd, dominfo, network, live, dst
dominfo.destroy()
dominfo.testDeviceComplete()
try:
- dominfo.setName(domain_name, False)
+ dominfo.setName(domain_name)
except VmError:
# Ignore this. The name conflict (hopefully) arises because we
# are doing localhost migration; if we are doing a suspend of a
++++++ del_usb_xend_entry.patch ++++++
Index: xen-4.2.0-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.0-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1298,8 +1298,15 @@ class XendDomainInfo:
frontpath =
self.getDeviceController(deviceClass).frontendPath(dev)
backpath = xstransact.Read(frontpath, "backend")

thread.start_new_thread(self.getDeviceController(deviceClass).finishDeviceCleanup,
(backpath, path))
-
- rc = self.getDeviceController(deviceClass).destroyDevice(devid,
force)
+ if deviceClass =='vusb':
+ dev =
self.getDeviceController(deviceClass).convertToDeviceNumber(devid)
+ state = self.getDeviceController(deviceClass).readBackend(dev,
'state')
+ if state == '1':
+ rc =
self.getDeviceController(deviceClass).destroyDevice(devid, True)
+ else:
+ rc =
self.getDeviceController(deviceClass).destroyDevice(devid, force)
+ else:
+ rc =
self.getDeviceController(deviceClass).destroyDevice(devid, force)
if not force and rm_cfg:
# The backend path, other than the device itself,
# has to be passed because its accompanied frontend
++++++ disable_emulated_device.diff ++++++
Index:
xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
===================================================================
---
xen-4.2.0-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -424,6 +424,11 @@ static int __devinit platform_pci_init(s
platform_mmio = mmio_addr;
platform_mmiolen = mmio_len;

+ /*
+ * Disconnect the emulated devices.
+ */
+ outl(1, (ioaddr + 4));
+
ret = init_hypercall_stubs();
if (ret < 0)
goto out;
++++++ domUloader.py ++++++
#!/usr/bin/env python
# domUloader.py
"""Loader for kernel and (optional) ramdisk from domU filesystem

Given a physical disk (or disk image) for a domU and the path of a kernel and
optional ramdisk, copies the kernel and ramdisk from the domU disk to a
temporary location in dom0.

The --entry parameter specifies the location of the kernel (and optional
ramdisk) within the domU filesystem. dev is the disk as seen by domU.
Filenames are relative to that filesystem.

The disk is passed as the last parameter. It must be a block device or raw
disk image. More complex disk images (QCOW, VMDK, etc) must already be
configured via blktap and presented as a block device.

The script writes an sxpr specifying the locations of the copied kernel and
ramdisk into the file specified by --output (default is stdout).

Limitations:
- It is assumed both kernel and ramdisk are on the same filesystem.
- domUs might use LVM; the script currently does not have support for setting
up LVM mappings for domUs; it's not trivial and we might risk namespace
conflicts. If you want to use LVM inside domUs, set up a small non-LVM boot
partition and specify it in bootentry.

The script uses kpartx (multipath-tools) to create mappings for devices that
are exported as whole disk devices that are partitioned.

(c) 01/2006 Novell Inc
License: GNU GPL
Author: Kurt Garloff <garloff@xxxxxxx>
"""

import os, sys, getopt
from stat import *
from xen.xend import sxp
import tempfile
import time
import xnloader

# Global options
quiet = False
verbose = False
dryrun = False
tmpdir = '/var/lib/xen/tmp'
in_args = ''
# kpartx, left to its own devices, does not consistently pick the
# same partition separator. Explicitly specify it.
kpartx_args = '-p -part'

# Helper functions

def kpartx_has_opt(opt):
""" Return True if kpartx supports option opt, otherwise False"""
have_opt = True
kpartx_cmd = 'kpartx -' + opt + ' 2>&1'
p = os.popen(kpartx_cmd)
for line in p.readlines():
if line.find('invalid option') >= 0:
have_opt = False
break
p.close()
return have_opt

def error(s):
print >> sys.stderr, "domUloader error: %s" % s

def verbose_print(s):
if verbose:
print >> sys.stderr, "domUloader: %s" % s

def traildigits(strg):
"""Return the trailing digits, used to split the partition number off"""
idx = len(strg)-1
while strg[idx].isdigit():
if len == 0:
return strg
idx -= 1
return strg[idx+1:]

def getWholedisk(part):
while len(part) and part[len(part)-1].isdigit():
part = part[:-1]
return part

#def isWholedisk(domUname):
# """Determines whether dev is a wholedisk dev"""
# return not domUname[-1:].isdigit()

# If available, add '-f' option (bnc#613584)
if kpartx_has_opt('f'):
kpartx_args += ' -f'

class Wholedisk:
"Class representing a whole disk that may have partitions"
def __init__(self, vdev, pdev):
"c'tor: set up"
# Initialize object; will not raise:
self.ldev = None
self.vdev = vdev
self.pdev = pdev
self.mapped = 0
self.partitions = []
self.pcount = 0
self.lvm = False
# Finish initialization; may raise:
self.is_blk = (S_ISBLK(os.stat(pdev)[ST_MODE]))
self.pcount = self.scanpartitions()

def physdev(self):
"""Gets the physical device used to access the device from dom0"""
if self.ldev:
return self.ldev
return self.pdev

def findPart(self, vdev):
"Find device dev in list of partitions"
if len(vdev) > 5 and vdev[:5] == "/dev/":
vdev = vdev[5:]
for part in self.partitions:
if vdev == part.vdev:
return part
if len(self.partitions):
return self.partitions[0]
return None

def loopsetup(self):
"""Sets up the loop mapping for a disk image.

Will raise if no loopbacks are available.
"""
if not self.is_blk and not self.ldev:
# Loops through all loopback devices, attempting to
# find a free one to set up. Don't scan for free and
# then try to set it up as a separate step - too racy!
i = 0
while True:
ldev = '/dev/loop%i' % (i)
if not os.path.exists(ldev):
break
i += 1
fd = os.popen("losetup %s '%s' 2> /dev/null" % (ldev,
self.pdev))
if not fd.close():
verbose_print("losetup %s '%s'" % (ldev, self.pdev))
self.ldev = ldev
break
if not self.ldev:
raise RuntimeError("No free loop device found")

def loopclean(self):
"""Delete the loop mapping.

Will never raise.
"""
if self.ldev:
verbose_print("losetup -d %s" % self.ldev)
# Even seemingly innocent queries like "losetup /dev/loop0"
# can temporarily block the loopback and cause transient
# failures deleting the loopback, hence the retry logic.
retries = 10
while retries:
fd = os.popen("losetup -d %s" % self.ldev)
if not fd.close():
self.ldev = None
break
else:
# Mappings may not have been deleted due to race
# between udev and dm - see bnc#379032. Causes
# loop devices to leak. Call kpartx -d again
os.system("kpartx %s -d '%s'" % (kpartx_args,
self.physdev()))
time.sleep(0.1)
retries -= 1

def scanlvmpartitions(self):
pcount = 0
verbose_print("vgchange -ay '%s'" % (self.vdev))
ret = os.system("vgchange -ay '%s' > /dev/null 2>&1" % (self.vdev)) >> 8
if not ret:
self.lvm = True
verbose_print("lvscan | grep '/dev/%s'" % (self.vdev))
fd = os.popen("lvscan | grep '/dev/%s'" % (self.vdev))
for line in fd.readlines():
line = line.strip()
(t1, lvname, t2) = line.split('\'')
pname = lvname[lvname.rfind('/')+1:]
pname = pname.strip()
pname = "/dev/mapper/" + self.vdev + "-" + pname
verbose_print("Found partition: vdev %s, pdev %s" % (self.vdev,
pname))
self.partitions.append(Partition(self, self.vdev, pname))
pcount += 1
fd.close()
verbose_print("vgchange -an '%s'" % (self.vdev))
os.system("vgchange -an '%s' > /dev/null 2>&1" % (self.vdev))
else:
verbose_print("vgchange -ay %s ... failed: -%d" % (self.vdev, ret))

return pcount

def scanpartitions(self):
"""Scan device for partitions (kpartx -l) and set up data structures,
Returns number of partitions found."""
self.loopsetup()
# TODO: We could use fdisk -l instead and look at the type of
# partitions; this way we could also detect LVM and support it.
verbose_print("kpartx %s -l '%s'" % (kpartx_args, self.physdev()))
fd = os.popen("kpartx %s -l '%s'" % (kpartx_args, self.physdev()))
pcount = 0
for line in fd.readlines():
line = line.strip()
verbose_print("kpartx -l: %s" % (line,))
(pname, params) = line.split(' : ')
pname = pname.strip()
pno = int(traildigits(pname))
#if pname.rfind('/') != -1:
# pname = pname[pname.rfind('/')+1:]
#pname = self.pdev[:self.pdev.rfind('/')] + '/' + pname
pname = "/dev/mapper/" + pname
verbose_print("Found partition: vdev %s, pdev %s" % ('%s%i' %
(self.vdev, pno), pname))
self.partitions.append(Partition(self, '%s%i' % (self.vdev, pno),
pname))
pcount += 1
fd.close()

# Try lvm
if not pcount:
pcount = self.scanlvmpartitions()

# Add self to partition table
if not pcount:
if self.ldev:
ref = self
else:
ref = None
self.partitions.append(Partition(ref, self.vdev, self.pdev))
return pcount

def activatepartitions(self):
"Set up loop mapping and device-mapper mappings"
verbose_print("activatepartitions")
if not self.mapped:
self.loopsetup()
if self.pcount:
verbose_print("kpartx %s -a '%s'" % (kpartx_args,
self.physdev()))
fd = os.popen("kpartx %s -a '%s'" % (kpartx_args,
self.physdev()))
fd.close()
if self.pcount and self.lvm:
verbose_print("vgchange -ay '%s'" % (self.vdev))
ret = os.system("vgchange -ay '%s' > /dev/null 2>&1" %
(self.vdev)) >> 8
if not ret:
verbose_print("lvchange -ay '%s'" % (self.vdev))
os.system("lvchange -ay '%s' > /dev/null 2>&1" %
(self.vdev))
self.mapped += 1

def partitionsdeactivated(self):
"Return True if partition mappings have been removed, False otherwise"
for part in self.partitions:
if os.access(part.pdev, os.F_OK):
return False
return True

def deactivatepartitions(self):
"""Remove device-mapper mappings and loop mapping.

Will never raise.
"""
verbose_print("deactivatepartitions")
if not self.mapped:
return
self.mapped -= 1
if not self.mapped:
if self.pcount:
retries = 10
while retries and not self.partitionsdeactivated():
verbose_print("kpartx %s -d '%s'" % (kpartx_args,
self.physdev()))
os.system("kpartx %s -d '%s'" % (kpartx_args,
self.physdev()))
time.sleep(0.1)
retries -= 1
if retries == 0:
error("unable to remove partition mappings with kpartx -d")
if self.pcount and self.lvm:
verbose_print("lvchange -an '%s'" % (self.vdev))
ret = os.system("lvchange -an '%s' > /dev/null 2>&1" %
(self.vdev)) >> 8
if ret:
time.sleep(0.3)
os.system("lvchange -an '/dev/%s' > /dev/null 2>&1" %
(self.vdev))
verbose_print("vgchange -an '%s'" % (self.vdev))
ret = os.system("vgchange -an '%s' > /dev/null 2>&1" %
(self.vdev)) >> 8
if ret:
time.sleep(0.3)
os.system("vgchange -an '%s' > /dev/null 2>&1" %
(self.vdev))
self.loopclean()

def __del__(self):
"d'tor: clean up"
self.deactivatepartitions()
self.loopclean()

def __repr__(self):
"string representation for debugging"
strg = "[" + self.vdev + "," + self.pdev + ","
if self.ldev:
strg += self.ldev
strg += "," + str(self.pcount) + ",mapped %ix]" % self.mapped
return strg

class Partition:
"""Class representing a domU filesystem (partition) that can be
mounted in dom0"""
def __init__(self, whole = None, vdev = None, pdev = None):
"c'tor: setup"
self.wholedisk = whole
self.vdev = vdev
self.pdev = pdev
self.mountpoint = None

def __del__(self):
"d'tor: cleanup"
if self.mountpoint:
self.umount()
# Not needed: Refcounting will take care of it.
#if self.wholedisk:
# self.wholedisk.deactivatepartitions()

def __repr__(self):
"string representation for debugging"
strg = "[" + self.vdev + "," + self.pdev + ","
if self.mountpoint:
strg += "mounted on " + self.mountpoint + ","
else:
strg += "not mounted,"
if self.wholedisk:
return strg + self.wholedisk.__repr__() + "]"
else:
return strg + "]"

def mount(self, fstype = None, options = "ro"):
"mount filesystem, sets self.mountpoint"
if self.mountpoint:
return
if self.wholedisk:
self.wholedisk.activatepartitions()
mtpt = tempfile.mkdtemp(prefix = "%s." % self.vdev, dir = tmpdir)
mopts = ""
if fstype:
mopts += " -t %s" % fstype
if options:
mopts += " -o %s" % options
verbose_print("mount %s '%s' %s" % (mopts, self.pdev, mtpt))
fd = os.popen("mount %s '%s' %s" % (mopts, self.pdev, mtpt))
err = fd.close()
if err:
try:
os.rmdir(mtpt)
except:
pass
raise RuntimeError("Error %i from mount %s '%s' on %s" % \
(err, mopts, self.pdev, mtpt))
self.mountpoint = mtpt

def umount(self):
"""umount filesystem at self.mountpoint"""
if not self.mountpoint:
return
verbose_print("umount %s" % self.mountpoint)
fd = os.popen("umount %s" % self.mountpoint)
err = fd.close()
try:
os.rmdir(self.mountpoint)
except:
pass
if err:
error("Error %i from umount %s" % (err, self.mountpoint))
else:
self.mountpoint = None
if self.wholedisk:
self.wholedisk.deactivatepartitions()

def parseEntry(entry):
"disects bootentry and returns vdev, kernel, ramdisk"
def bad():
raise RuntimeError, "Malformed --entry"
fsspl = entry.split(':')
if len(fsspl) != 2:
bad()
vdev = fsspl[0]
entry = fsspl[1]
enspl = entry.split(',')
if len(enspl) not in (1, 2):
bad()
# Prepend '/' if missing
kernel = enspl[0]
if kernel == '':
bad()
if kernel[0] != '/':
kernel = '/' + kernel
ramdisk = None
if len(enspl) > 1:
ramdisk = enspl[1]
if ramdisk != '' and ramdisk[0] != '/':
ramdisk = '/' + ramdisk
return vdev, kernel, ramdisk

def copyFile(src, dst):
"Wrapper for shutil.filecopy"
import shutil
verbose_print("cp %s %s" % (src, dst))
stat = os.stat(src)
if stat.st_size > 16*1024*1024:
raise RuntimeError("Too large file %s (%s larger than 16MB)" \
% (src, stat.st_size))
try:
shutil.copyfile(src, dst)
except:
os.unlink(dst)
raise()

def copyKernelAndRamdisk(disk, vdev, kernel, ramdisk):
"""Finds vdev in list of partitions, mounts the partition, copies
kernel [and ramdisk] off to dom0 files, umounts the parition again,
and returns sxpr pointing to these copies."""
verbose_print("copyKernelAndRamdisk(%s, %s, %s, %s)" % (disk, vdev, kernel,
ramdisk))
if dryrun:
return "linux (kernel kernel.dummy) (ramdisk ramdisk.dummy)"
part = disk.findPart(vdev)
if not part:
raise RuntimeError("Partition '%s' does not exist" % vdev)
part.mount()
try:
(fd, knm) = tempfile.mkstemp(prefix = "kernel.", dir = tmpdir)
os.close(fd)
copyFile(part.mountpoint + kernel, knm)
except:
os.unlink(knm)
part.umount()
raise
if not quiet:
print "Copy kernel %s from %s to %s for booting" % (kernel, vdev, knm)
sxpr = "linux (kernel %s)" % knm
if ramdisk:
try:
(fd, inm) = tempfile.mkstemp(prefix = "ramdisk.", dir = tmpdir)
os.close(fd)
copyFile(part.mountpoint + ramdisk, inm)
except:
os.unlink(knm)
os.unlink(inm)
part.umount()
raise
sxpr += "(ramdisk %s)" % inm
part.umount()
xnloader.patch_netware_loader(knm)
return sxpr

def main(argv):
"Main routine: Parses options etc."
global quiet, dryrun, verbose, tmpdir, in_args
def usage():
"Help output (usage info)"
global verbose, quiet, dryrun
print >> sys.stderr, "domUloader usage: domUloader [--output=fd]
[--quiet] [--dryrun] [--verbose]\n" +\
"[--args] [--help] --entry=dev:kernel[,ramdisk]
physdisk [virtdisk]\n" +\
"\n" +\
"dev format: hd[a-p][0-9]*, xvd[a-p][0-9]*,
LVM-vgname-lvname\n"
print >> sys.stderr, __doc__

try:
(optlist, args) = getopt.gnu_getopt(argv, 'qvh', \
('entry=', 'output=', 'tmpdir=', 'args=', 'kernel=', 'ramdisk=',
'help', 'quiet', 'dryrun', 'verbose'))
except:
usage()
sys.exit(1)

entry = None
output = None
pdisk = None
vdisk = None

for (opt, oarg) in optlist:
if opt in ('-h', '--help'):
usage()
sys.exit(1)
elif opt in ('-q', '--quiet'):
quiet = True
elif opt in ('-n', '--dryrun'):
dryrun = True
elif opt in ('-v', '--verbose'):
verbose = True
elif opt == '--output':
output = oarg
elif opt == '--entry':
entry = oarg
elif opt == '--tmpdir':
tmpdir = oarg
elif opt == '--args':
in_args = oarg

verbose_print(str(argv))

if args:
if len(args) == 2:
pdisk = args[1]
elif len(args) == 3:
pdisk = args[1]
vdisk = args[2]

if not entry or not pdisk:
usage()
sys.exit(1)

if output is None or output == "-":
fd = sys.stdout.fileno()
else:
fd = os.open(output, os.O_WRONLY)

if not os.access(tmpdir, os.X_OK):
os.mkdir(tmpdir)
os.chmod(tmpdir, 0750)

vdev, kernel, ramdisk = parseEntry(entry)
if vdev[:vdev.find('-')] == "LVM":
vdev = vdev.split('-')[1]
if not vdisk:
vdisk = getWholedisk(vdev)
verbose_print("vdisk not specified; guessing '%s' based on '%s'" %
(vdisk, vdev))
if not vdev.startswith(vdisk):
error("Virtual disk '%s' does not match entry '%s'" % (vdisk, entry))
sys.exit(1)
disk = Wholedisk(vdisk, pdisk)

r = 0
try:
sxpr = copyKernelAndRamdisk(disk, vdev, kernel, ramdisk)
if in_args:
sxpr += "(args '%s')" % in_args
os.write(fd, sxpr)
except Exception, e:
error(str(e))
r = 1

for part in disk.partitions:
part.wholedisk = None
del disk

return r

# Call main if called (and not imported)
if __name__ == "__main__":
r = 1
try:
r = main(sys.argv)
except Exception, e:
error(str(e))
sys.exit(r)
++++++ domu-usb-controller.patch ++++++
Index: xen-4.2.0-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.2.0-testing/tools/python/xen/xend/XendConfig.py
@@ -1875,7 +1875,14 @@ class XendConfig(dict):
ports = sxp.child(dev_sxp, 'port')
for port in ports[1:]:
try:
- num, bus = port
+ # When ['port' ['1','']] is saved into sxp file, it will
become (port (1 ))
+ # If using this sxp file, here variable "port" will be port=1,
+ # we should process it, otherwise, it will report error.
+ if len(port) == 1:
+ num = port[0]
+ bus = ""
+ else:
+ num, bus = port
dev_config['port-%i' % int(num)] = str(bus)
except TypeError:
pass
++++++ etc_pam.d_xen-api ++++++
#%PAM-1.0
auth required pam_listfile.so onerr=fail item=user \
sense=allow file=/etc/xen/xenapiusers
auth include common-auth
account include common-account
password include common-password
session include common-session
++++++ hibernate.patch ++++++
Index: xen-4.2.0-testing/tools/firmware/hvmloader/acpi/ssdt_s3.asl
===================================================================
--- xen-4.2.0-testing.orig/tools/firmware/hvmloader/acpi/ssdt_s3.asl
+++ xen-4.2.0-testing/tools/firmware/hvmloader/acpi/ssdt_s3.asl
@@ -20,13 +20,9 @@

DefinitionBlock ("SSDT_S3.aml", "SSDT", 2, "Xen", "HVM", 0)
{
- /* Must match piix emulation */
- Name (\_S3, Package (0x04)
- {
- 0x01, /* PM1a_CNT.SLP_TYP */
- 0x01, /* PM1b_CNT.SLP_TYP */
- 0x0, /* reserved */
- 0x0 /* reserved */
- })
+ /*
+ * Turn off support for s3 sleep state to deal with SVVP tests.
+ * This is what MSFT does on HyperV.
+ */
}

Index: xen-4.2.0-testing/tools/firmware/hvmloader/acpi/ssdt_s4.asl
===================================================================
--- xen-4.2.0-testing.orig/tools/firmware/hvmloader/acpi/ssdt_s4.asl
+++ xen-4.2.0-testing/tools/firmware/hvmloader/acpi/ssdt_s4.asl
@@ -20,13 +20,9 @@

DefinitionBlock ("SSDT_S4.aml", "SSDT", 2, "Xen", "HVM", 0)
{
- /* Must match piix emulation */
- Name (\_S4, Package (0x04)
- {
- 0x00, /* PM1a_CNT.SLP_TYP */
- 0x00, /* PM1b_CNT.SLP_TYP */
- 0x00, /* reserved */
- 0x00 /* reserved */
- })
+ /*
+ * Turn off support for s4 sleep state to deal with SVVP tests.
+ * This is what MSFT does on HyperV.
+ */
}

++++++ hv_extid_compatibility.patch ++++++
Index: xen-4.2.1-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendConfig.py
@@ -161,6 +161,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
'nographic': int,
'nomigrate': int,
'pae' : int,
+ 'extid': int,
'rtc_timeoffset': int,
'parallel': str,
'serial': str,
@@ -523,6 +524,8 @@ class XendConfig(dict):
self['platform']['acpi_firmware'] = ""
if 'timer_mode' not in self['platform']:
self['platform']['timer_mode'] = 1
+ if 'extid' in self['platform'] and int(self['platform']['extid'])
== 1:
+ self['platform']['viridian'] = 1
if 'viridian' not in self['platform']:
self['platform']['viridian'] = 0
if 'rtc_timeoffset' not in self['platform']:
Index: xen-4.2.1-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.2.1-testing/tools/python/xen/xend/image.py
@@ -830,6 +830,7 @@ class HVMImageHandler(ImageHandler):

self.apic = int(vmConfig['platform'].get('apic', 0))
self.acpi = int(vmConfig['platform'].get('acpi', 0))
+ self.extid = int(vmConfig['platform'].get('extid', 0))
self.guest_os_type = vmConfig['platform'].get('guest_os_type')
self.memory_sharing = int(vmConfig['memory_sharing'])
try:
Index: xen-4.2.1-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xm/create.py
+++ xen-4.2.1-testing/tools/python/xen/xm/create.py
@@ -242,6 +242,10 @@ gopts.var('viridian', val='VIRIDIAN',
use="""Expose Viridian interface to x86 HVM guest?
(Default is 0).""")

+gopts.var('extid', val='EXTID',
+ fn=set_int, default=0,
+ use="Specify extention ID for a HVM domain.")
+
gopts.var('acpi', val='ACPI',
fn=set_int, default=1,
use="Disable or enable ACPI of HVM domain.")
@@ -1120,7 +1124,7 @@ def configure_hvm(config_image, vals):
'timer_mode',
'usb', 'usbdevice',
'vcpus', 'vnc', 'vncconsole', 'vncdisplay', 'vnclisten',
- 'vncunused', 'viridian', 'vpt_align',
+ 'vncunused', 'vpt_align',
'watchdog', 'watchdog_action',
'xauthority', 'xen_extended_power_mgmt', 'xen_platform_pci',
'memory_sharing' ]
@@ -1130,6 +1134,10 @@ def configure_hvm(config_image, vals):
config_image.append([a, vals.__dict__[a]])
if vals.vncpasswd is not None:
config_image.append(['vncpasswd', vals.vncpasswd])
+ if vals.extid and vals.extid == 1:
+ config_image.append(['viridian', vals.extid])
+ elif vals.viridian:
+ config_image.append(['viridian', vals.viridian])


def make_config(vals):
++++++ init.pciback ++++++
#!/bin/bash
#
# Copyright (c) 2001 SuSE GmbH Nuernberg, Germany. All rights reserved.
#
# /etc/init.d/pciback
#
### BEGIN INIT INFO
# Provides: pciback
# Required-Start: $syslog $network
# Should-Start: $null
# Required-Stop: $syslog $network
# Should-Stop: $null
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Description: bind PCI devices to pciback
### END INIT INFO

. /etc/rc.status
. /etc/sysconfig/pciback

rc_reset

load_pciback() {
if ! lsmod | grep -qi "pciback"
then
echo "Loading pciback ..."
modprobe pciback
fi
}

unload_pciback() {
if lsmod | grep -qi "pciback"
then
echo "Unloading pciback ..."
modprobe -r pciback
fi
}

bind_dev_to_pciback() {
for DEVICE in ${XEN_PCI_HIDE_LIST}
do
local DRV=`echo ${DEVICE} | /usr/bin/cut -d "," -f 1`
local PCIID=`echo ${DEVICE} | /usr/bin/cut -d "," -f 2`

if ! ls /sys/bus/pci/drivers/pciback/${PCIID} > /dev/null 2>&1
then
echo "Binding ${PCIID} ..."
if ls /sys/bus/pci/drivers/${DRV}/${PCIID} > /dev/null 2>&1
then
echo -n ${PCIID} > /sys/bus/pci/drivers/${DRV}/unbind
fi
echo -n ${PCIID} > /sys/bus/pci/drivers/pciback/new_slot
echo -n ${PCIID} > /sys/bus/pci/drivers/pciback/bind
fi
done
}

unbind_dev_from_pciback() {
for DEVICE in ${XEN_PCI_HIDE_LIST}
do
local DRV=`echo ${DEVICE} | /usr/bin/cut -d "," -f 1`
local PCIID=`echo ${DEVICE} | /usr/bin/cut -d "," -f 2`

if ls /sys/bus/pci/drivers/pciback/${PCIID} > /dev/null
then
echo "Unbinding ${PCIID} ..."
echo -n ${PCIID} > /sys/bus/pci/drivers/pciback/unbind
fi
done
}

test "uname -r" | grep xen && exit 0

case $1 in
start)
echo "Starting pciback ..."
echo
load_pciback
bind_dev_to_pciback

rc_status -v -r
;;
stop)
echo "Stopping pciback ..."
echo
unbind_dev_from_pciback
unload_pciback

rc_status -v
;;
reload|restart)
echo "Stopping pciback ..."
echo
unbind_dev_from_pciback
unload_pciback
echo "Starting pciback ..."
echo
load_pciback
bind_dev_to_pciback
;;
status)
if lsmod | grep -qi pciback
then
echo
echo "pciback: loaded"
echo
echo "Currently bound devices ..."
echo "-----------------------------"
ls /sys/bus/pci/drivers/pciback | grep ^0000
echo
else
echo "pciback: not loaded"
fi
;;
*)
echo "Usage: $0 [start|stop|restart|reload|status]"
exit 1
;;
esac
++++++ init.xen_loop ++++++
# Increase the number of loopback devices available for vm creation
options loop max_loop=64
++++++ init.xend ++++++
#!/bin/bash
#
# xend Starts and stops the Xen management daemon
#
# chkconfig: 35 98 01
# description: Starts and stops the Xen management daemon
#
### BEGIN INIT INFO
# Provides: xend
# Required-Start: $syslog $network $remote_fs
# Should-Start: iscsi $time
# Required-Stop: $syslog $network $remote_fs
# Should-Stop: iscsi $time
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Starts and stops the Xen management daemon
# Description: Starts and stops the Xen management daemon. xend is needed
# to create and manage VMs on Xen.
### END INIT INFO

. /etc/rc.status
rc_reset

XEND=`pidof -x /usr/sbin/xend`

await_daemons_up()
{
i=1
rets=10
xend status
while [ $? -ne 0 -a $i -lt $rets ]; do
sleep 1
echo -n .
i=$(($i + 1))
xend status
done
}

xend_abort()
{
echo -n "xend "
rc_failed $1
rc_status -v
rc_exit
}

cleanup()
{
rm -f /var/lib/xen/tmp/* 2>/dev/null
rm -f /var/lib/xen/xenbl* 2>/dev/null
}

check()
{
if [ "$1" == status ]; then
if [ ! -e /proc/xen/capabilities ]; then
xend_abort 3
fi
else
if [ `id -u` != 0 ]; then
xend_abort 4
fi
if [ ! -e /proc/xen/capabilities ] ||
! grep control_d /proc/xen/capabilities >/dev/null 2>&1;
then
if [ "$1" == stop ] ||
[ "$1" == try-restart ]; then
xend_abort 0
else
xend_abort 6
fi
fi
fi
}

case "$1" in
start)
check $1
echo -n "Starting xend "
if [ ! -z "$XEND" ]; then
echo -n "(already running pid $XEND) "
else
cleanup
fi
xend start
await_daemons_up
;;
stop)
check $1
echo -n "Stopping xend "
if [ -z "$XEND" ]; then
echo -n "(not running) "
xend stop
rc_reset
else
echo -n "(pid $XEND) "
xend stop
cleanup
rc_reset
fi
;;
status)
check $1
echo -n "Checking status of xend "
if [ ! -z "$XEND" ]; then
echo -n "(pid $XEND) "
fi
checkproc /usr/sbin/xend
;;
restart|reload)
check $1
echo -n "Restarting xend "
if [ -z "$XEND" ]; then
echo -n "(not running) "
else
echo -n "(old pid $XEND) "
fi
xend restart
await_daemons_up
;;
try-restart)
check $1
$0 status
if [ $? = 0 ]; then
$0 restart
else
rc_reset
fi
;;
*)
echo "Usage: $0 {start|stop|restart|try-restart|reload|status}"
rc_failed 2
rc_exit
esac

rc_status -v
rc_exit
++++++ init.xendomains ++++++
#!/bin/bash
#
# xendomains Starts and stops Xen VMs
#
# chkconfig: 35 99 00
# description: Starts and stops Xen VMs
#
### BEGIN INIT INFO
# Provides: xendomains
# Required-Start: $syslog $remote_fs xenstored xenconsoled
# Should-Start: xend iscsi o2cb ocfs2
# Required-Stop: $syslog $remote_fs xenstored xenconsoled
# Should-Stop: xend iscsi
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Starts and stops Xen VMs
# Description: Starts and stops Xen VMs automatically when the
# host starts and stops.
### END INIT INFO

. /etc/rc.status
rc_reset

RETCODE_FILE=/tmp/xendomains.rc.$$
xm_cmd=echo

# See docs/misc/distro_mapping.txt
if [ -d /var/lock/subsys ]; then
LOCKFILE=/var/lock/subsys/xendomains
else
LOCKFILE=/var/lock/xendomains
fi

if [ -d /etc/sysconfig ]; then
XENDOM_CONFIG=/etc/sysconfig/xendomains
else
XENDOM_CONFIG=/etc/default/xendomains
fi

test -r $XENDOM_CONFIG || { echo "$XENDOM_CONFIG not existing";
if [ "$1" = "stop" ]; then exit 0;
else exit 6; fi; }

. "$XENDOM_CONFIG"

shopt -s dotglob nullglob

smart_term=1
if [ -z "$esc" ]; then
smart_term=0
rc_timer_on()
{
(trap "exit 0" TERM; sleep $1) & _rc_timer_pid=$!
}
rc_timer_off()
{
if [ -n "$_rc_timer_pid" ]; then
kill -TERM $_rc_timer_pid > /dev/null 2>&1
fi
unset _rc_timer_pid
}
fi

xendomains_abort()
{
echo -n "xendomains "
rc_failed $1
rc_status -v
rc_exit
}

check()
{
XEND=`pidof -x /usr/sbin/xend`
if [ -z "$XEND" ]; then
xm_cmd="xl -f"
XEND="xl"
else
xm_cmd="xm"
fi
if [ "$1" = status ]; then
if [ ! -e /proc/xen/capabilities ] || [ ! -r "$XENDOM_CONFIG" ]
|| [ -z "$XEND" ]; then
xendomains_abort 3
fi
else
if [ `id -u` != 0 ]; then
xendomains_abort 4
fi
if [ ! -e /proc/xen/capabilities ] || [ -z "$XEND" ] ||
! grep control_d /proc/xen/capabilities >/dev/null 2>&1;
then
if [ "$1" = stop ] ||
[ "$1" = restart ]; then
xendomains_abort 0
else
xendomains_abort 6
fi
fi
if [ ! -r "$XENDOM_CONFIG" ]; then
xendomains_abort 6
fi
fi
}

dir_contains_something()
{
[ -d "$1" ] || return 1
local dirfiles=( "$1"/* )
[ ${#dirfiles[@]} != 0 ]
}

get_name_from_cfg()
{
if grep -q "^name" "$1";then
NM=`grep '^name[ ]*=' "$1" | sed -e 's/^name[ ]*=[
]*['\''"]\([^'\''"]*\)['\''"].*$/\1/'`
elif grep -q "(name " "$1";then
NM=`grep '(name ' "$1" | sed -e 's/^ *//' | cut -d " " -f 2 |
sed -e 's/)//'`
fi
}

running_auto_names()
{
unset AUTONAMES[@]
if ! dir_contains_something "$XENDOMAINS_AUTO"; then
return
fi
for dom in "$XENDOMAINS_AUTO"/*; do
get_name_from_cfg "$dom"
AUTONAMES+=("$NM")
done
}

parseln()
{
name=${1:0:$((${#1}-36))}
name=${name%% *}
rest="${1: -36}"
id=${rest:0:4}
id=`echo $id`
mem=${rest:4:6}
mem=`echo $mem`
vcpu=${rest:10:6}
vcpu=`echo $vcpu`
state=${rest:16:11}
state=`echo $state`
tm=${rest:27}
tm=`echo $tm`
}

xm_list()
{
TERM=vt100 ${xm_cmd} list | grep -v '^Name *ID'
}

is_cfg_running()
{
get_name_from_cfg "$1"
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
if [ "$name" = "$NM" ]; then
[ -z "$state" ] && return 1
return 0
fi
done < <(xm_list)
return 1
}

start()
{
if [ -f "$LOCKFILE" ]; then
echo -n "xendomains already running (lockfile exists)"
rc_reset
rc_status -v
return 0
fi

local printed=0

if [ "$XENDOMAINS_RESTORE" = "true" ] &&
dir_contains_something "$XENDOMAINS_SAVE"; then
mkdir -p $(dirname "$LOCKFILE")
touch "$LOCKFILE"
echo "Restoring saved Xen domains"
printed=1
for dom in "$XENDOMAINS_SAVE"/*; do
echo -n " ${dom##*/}: "
${xm_cmd} restore "$dom" >/dev/null 2>&1
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
rm -f "$dom"
fi
rc_status -v
done
fi

if dir_contains_something "$XENDOMAINS_AUTO"; then
touch "$LOCKFILE"
echo "Starting auto Xen domains"
printed=1
for dom in "$XENDOMAINS_AUTO"/*; do
echo -n " ${dom##*/}: "
if is_cfg_running "$dom"; then
rc_status -s
else
if grep -q "^name" "$dom";then
${xm_cmd} create --quiet --defconfig
"$dom"
elif grep -q "(name .*" "$dom";then
${xm_cmd} create --quiet --config "$dom"
fi
if [ $? -ne 0 ]; then
rc_failed
else
usleep $XENDOMAINS_CREATE_USLEEP
rc_reset
fi
rc_status -v
fi
done
fi

if [ $printed -eq 0 ]; then
echo -n "Starting xendomains"
rc_failed 6 # not configured
rc_status -v
fi
}

is_zombie_state()
{
[ "$1" = "-b---d" ] || [ "$1" = "-----d" ]
}

any_non_zombies()
{
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
[ -z "$state" ] && continue
is_zombie_state "$state" || return 0
done < <(xm_list)
return 1
}

migrate_with_watchdog()
{
(${xm_cmd} migrate "$@" ; echo $? > "$RETCODE_FILE") >/dev/null 2>&1 &
watchdog_xm $!
}

save_with_watchdog()
{
(${xm_cmd} save "$@" ; echo $? > "$RETCODE_FILE") >/dev/null 2>&1 &
watchdog_xm $!
}

shutdown_with_watchdog()
{
(${xm_cmd} shutdown -w "$@" ; echo $? > "$RETCODE_FILE") >/dev/null
2>&1 &
watchdog_xm $!
}

get_return_code()
{
local RC=127
[ -r "$RETCODE_FILE" ] && RC=`head -c10 "$RETCODE_FILE"`
rm -f "$RETCODE_FILE"
return $RC
}

# $1: The PID to wait on.
watchdog_xm()
{
local col=$((COLUMNS-11))
if [ -z "$XENDOMAINS_STOP_MAXWAIT" ] || [ "$XENDOMAINS_STOP_MAXWAIT" =
"0" ]; then
wait $1 >/dev/null 2>&1
get_return_code
return
fi

rc_timer_on $XENDOMAINS_STOP_MAXWAIT $col
while true; do
# Prefer "jobs" over "ps": faster and no false positives
pid=`jobs -l | grep " $1 Running"`
if [ -z "$pid" ]; then
break
fi
pid=`jobs -l | grep " $_rc_timer_pid Running"`
if [ -z "$pid" ]; then
disown $1 # To avoid the "Terminated..." message
kill $1 >/dev/null 2>&1
fi
sleep 1
done
rc_timer_off
if [ $smart_term -ne 0 ]; then
echo -en "\015${esc}[${col}C "
fi
get_return_code
}

stop()
{
echo "Shutting down Xen domains"
if [ "$XENDOMAINS_AUTO_ONLY" = "true" ]; then
running_auto_names
fi
local printed=0
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
[ -z "$state" ] && continue
printed=1
if [ "$XENDOMAINS_AUTO_ONLY" = "true" ]; then
is_auto_domain=0
for n in "${AUTONAMES[@]}"; do
if [ "$name" = "$n" ]; then
is_auto_domain=1
break
fi
done
if [ $is_auto_domain -eq 0 ]; then
echo -n " $name: "
rc_status -s
continue
fi
fi
if [ -n "$XENDOMAINS_SYSRQ" ]; then
for sysrq in $XENDOMAINS_SYSRQ; do
echo -n " $name: "
echo -n "sending sysrq '$sysrq'... "
${xm_cmd} sysrq $id $sysrq
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
fi
rc_status -v
# usleep just ignores empty arg
usleep $XENDOMAINS_USLEEP
done
fi
if is_zombie_state "$state"; then
echo -n " $name: "
echo -n "destroying zombie... "
${xm_cmd} destroy $id
rc_reset
rc_status -v
continue
fi
if [ -n "$XENDOMAINS_MIGRATE" ]; then
echo -n " $name: "
echo -n "migrating... "
migrate_with_watchdog $id "$XENDOMAINS_MIGRATE"
if [ $? -ne 0 ]; then
rc_failed
rc_status -v
else
rc_reset
rc_status -v
continue
fi
fi
if [ -n "$XENDOMAINS_SAVE" ]; then
echo -n " $name: "
echo -n "saving... "
save_with_watchdog $id "$XENDOMAINS_SAVE/$name"
if [ $? -ne 0 ]; then
rm -f "$XENDOMAINS_SAVE/$name"
rc_failed
rc_status -v
else
rc_reset
rc_status -v
continue
fi
fi
if [ -n "$XENDOMAINS_SHUTDOWN" ]; then
echo -n " $name: "
echo -n "shutting down... "
shutdown_with_watchdog $id $XENDOMAINS_SHUTDOWN
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
fi
rc_status -v
fi
done < <(xm_list)

if [ -n "$XENDOMAINS_SHUTDOWN_ALL" ] && any_non_zombies ; then
echo -n " others: shutting down... "
shutdown_with_watchdog $XENDOMAINS_SHUTDOWN_ALL
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
fi
rc_status -v
fi

if [ $printed -eq 0 ]; then
echo -e "${rc_done_up}"
fi

# Unconditionally delete lock file
rm -f "$LOCKFILE"
}

check_domain_up()
{
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
if [ "$name" = "$1" ]; then
[ -z "$state" ] && return 1
return 0
fi
done < <(xm_list)
return 1
}

check_all_domains_up()
{
any_auto=0
any_save=0
dir_contains_something "$XENDOMAINS_AUTO" && any_auto=1
dir_contains_something "$XENDOMAINS_SAVE" && any_save=1
if [ $any_auto -eq 0 ] && [ $any_save -eq 0 ]; then
rc_reset
rc_status -v
return
fi
echo
if [ $any_auto -ne 0 ]; then
for nm in "$XENDOMAINS_AUTO"/*; do
get_name_from_cfg "$nm"
echo -n " $nm: "
if check_domain_up "$NM"; then
rc_reset
else
rc_failed 2
fi
rc_status -v
done
fi
if [ $any_save -ne 0 ]; then
for nm in "$XENDOMAINS_SAVE"/*; do
echo -n " $nm: "
rc_failed 3
rc_status -v
done
fi
}

# This does NOT necessarily restart all running domains: instead it
# stops all running domains and then boots all the domains specified in
# AUTODIR. If other domains have been started manually then they will
# not get restarted.
restart()
{
"$0" stop
start
}

case "$1" in
start)
check $1
start
;;

stop)
check $1
stop
;;

restart|reload)
check $1
restart
;;

try-restart)
check $1
"$0" status
if [ $? = 0 ]; then
"$0" restart
else
rc_reset
rc_status -v
fi
;;

status)
check $1
echo -n "Checking status of Xen domains"
if [ ! -f "$LOCKFILE" ]; then
rc_failed 3
rc_status -v
else
check_all_domains_up
fi
;;

*)
echo "Usage: $0 {start|stop|restart|try-restart|reload|status}"
rc_failed 2
;;
esac

rc_exit
++++++ ioemu-7615-qcow2-fix-alloc_cluster_link_l2.patch ++++++
qcow2 corruption: Fix alloc_cluster_link_l2 (Kevin Wolf)

This patch fixes a qcow2 corruption bug introduced in SVN Rev 5861. L2 tables
are big endian, so entries must be converted before being passed to functions.

This bug is easy to trigger. The following script will create and destroy a
qcow2 image (the header is gone after three loop iterations):

#!/bin/bash
qemu-img create -f qcow2 test.qcow 1M
for i in $(seq 1 10); do
qemu-system-x86_64 -hda test.qcow -monitor stdio > /dev/null 2>&1 <<EOF
savevm test-$i
quit
EOF
done

Signed-off-by: Kevin Wolf <kwolf@xxxxxxxxxx>

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block-qcow2.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block-qcow2.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block-qcow2.c
@@ -916,7 +916,7 @@ static int alloc_cluster_link_l2(BlockDr
goto err;

for (i = 0; i < j; i++)
- free_any_clusters(bs, old_cluster[i], 1);
+ free_any_clusters(bs, be64_to_cpu(old_cluster[i]) &
~QCOW_OFLAG_COPIED, 1);

ret = 0;
err:
++++++ ioemu-bdrv-open-CACHE_WB.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -247,8 +247,11 @@ static int open_disk(struct td_state *s,
drv = blktap_drivers[i].drv;
DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");

- /* Open the image */
- if (bdrv_open2(bs, path, flags, drv) != 0) {
+ /* Open the image
+ * Use BDRV_O_CACHE_WB for write-through caching,
+ * no flags for write-back caching
+ */
+ if (bdrv_open2(bs, path, flags|BDRV_O_CACHE_WB, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}
++++++ ioemu-blktap-barriers.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -360,6 +360,15 @@ static void qemu_send_responses(void* op
}

/**
+ * Callback function for AIO flush
+ */
+static void qemu_flush_response(void* opaque, int ret) {
+ if (ret != 0) {
+ DPRINTF("aio_flush: ret = %d (%s)\n", ret, strerror(-ret));
+ }
+}
+
+/**
* Callback function for the IO message pipe. Reads requests from the ring
* and processes them (call qemu read/write functions).
*
@@ -378,6 +387,7 @@ static void handle_blktap_iomsg(void* pr
blkif_t *blkif = s->blkif;
tapdev_info_t *info = s->ring_info;
int page_size = getpagesize();
+ int sync;

struct aiocb_info *aiocb_info;

@@ -410,7 +420,7 @@ static void handle_blktap_iomsg(void* pr

/* Don't allow writes on readonly devices */
if ((s->flags & TD_RDONLY) &&
- (req->operation == BLKIF_OP_WRITE)) {
+ (req->operation != BLKIF_OP_READ)) {
blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
goto send_response;
}
@@ -431,7 +441,7 @@ static void handle_blktap_iomsg(void* pr
DPRINTF("Sector request failed:\n");
DPRINTF("%s request, idx [%d,%d] size [%llu], "
"sector [%llu,%llu]\n",
- (req->operation == BLKIF_OP_WRITE ?
+ (req->operation != BLKIF_OP_READ ?
"WRITE" : "READ"),
idx,i,
(long long unsigned)
@@ -444,8 +454,14 @@ static void handle_blktap_iomsg(void* pr

blkif->pending_list[idx].secs_pending += nsects;

- switch (req->operation)
+ sync = 0;
+ switch (req->operation)
{
+ case BLKIF_OP_WRITE_BARRIER:
+ sync = 1;
+ bdrv_aio_flush(s->bs, qemu_flush_response,
NULL);
+ /* fall through */
+
case BLKIF_OP_WRITE:
aiocb_info = malloc(sizeof(*aiocb_info));

@@ -465,6 +481,10 @@ static void handle_blktap_iomsg(void* pr
DPRINTF("ERROR: bdrv_write() ==
NULL\n");
goto send_response;
}
+
+ if (sync)
+ bdrv_aio_flush(s->bs,
qemu_flush_response, NULL);
+
break;

case BLKIF_OP_READ:
++++++ ioemu-blktap-fv-init.patch ++++++
Index:
xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_machine_fv.c
===================================================================
---
xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_machine_fv.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_machine_fv.c
@@ -270,6 +270,7 @@ void qemu_invalidate_entry(uint8_t *buff

#endif /* defined(MAPCACHE) */

+extern void init_blktap(void);

static void xen_init_fv(ram_addr_t ram_size, int vga_ram_size,
const char *boot_device,
@@ -295,6 +296,11 @@ static void xen_init_fv(ram_addr_t ram_s
}
#endif

+#ifndef CONFIG_STUBDOM
+ /* Initialize tapdisk client */
+ init_blktap();
+#endif
+
#ifdef CONFIG_STUBDOM /* the hvmop is not supported on older hypervisors */
xc_set_hvm_param(xc_handle, domid, HVM_PARAM_DM_DOMAIN, DOMID_SELF);
#endif
++++++ ioemu-blktap-image-format.patch ++++++
From 5ac882a6d7499e4a36103db071203bf4d1ddfe1f Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@xxxxxxx>
Date: Tue, 10 Mar 2009 16:26:45 +0100
Subject: [PATCH 2/6] ioemu: Use the image format sent by blktapctrl

Currently the blktap backend in ioemu lets qemu guess which format an
image is in. This was a security problem and the blktap backend
doesn't work any more since this was fixed in qemu.

This patch changes ioemu to respect the format it gets from blktapctrl.

Signed-off-by: Kevin Wolf <kwolf@xxxxxxx>
---
hw/xen_blktap.c | 22 +++++++++++++++++++---
hw/xen_blktap.h | 14 ++++++++++++++
2 files changed, 33 insertions(+), 3 deletions(-)

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -218,9 +218,10 @@ static int map_new_dev(struct td_state *
return -1;
}

-static int open_disk(struct td_state *s, char *path, int readonly)
+static int open_disk(struct td_state *s, char *path, int driver, int readonly)
{
BlockDriverState* bs;
+ BlockDriver* drv;
char* devname;
static int devnumber = 0;
int i;
@@ -230,7 +231,22 @@ static int open_disk(struct td_state *s,
bs = bdrv_new(devname);
free(devname);

- if (bdrv_open(bs, path, 0) != 0) {
+ /* Search for disk driver */
+ for (i = 0; blktap_drivers[i].idnum >= 0; i++) {
+ if (blktap_drivers[i].idnum == driver)
+ break;
+ }
+
+ if (blktap_drivers[i].idnum < 0) {
+ fprintf(stderr, "Could not find image format id %d\n", driver);
+ return -ENOMEM;
+ }
+
+ drv = blktap_drivers[i].drv;
+ DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");
+
+ /* Open the image */
+ if (bdrv_open2(bs, path, 0, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}
@@ -521,7 +537,7 @@ static void handle_blktap_ctrlmsg(void*
s = state_init();

/*Open file*/
- if (s == NULL || open_disk(s, path, msg->readonly)) {
+ if (s == NULL || open_disk(s, path, msg->drivertype,
msg->readonly)) {
msglen = sizeof(msg_hdr_t);
msg->type = CTLMSG_IMG_FAIL;
msg->len = msglen;
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.h
@@ -52,4 +52,18 @@ typedef struct fd_list_entry {

int init_blktap(void);

+typedef struct disk_info {
+ int idnum;
+ struct BlockDriver *drv;
+} disk_info_t;
+
+static disk_info_t blktap_drivers[] = {
+ { DISK_TYPE_AIO, &bdrv_raw },
+ { DISK_TYPE_SYNC, &bdrv_raw },
+ { DISK_TYPE_VMDK, &bdrv_vmdk },
+ { DISK_TYPE_QCOW, &bdrv_qcow },
+ { DISK_TYPE_QCOW2, &bdrv_qcow2 },
+ { -1, NULL }
+};
+
#endif /*XEN_BLKTAP_H_*/
++++++ ioemu-blktap-zero-size.patch ++++++
From cb982fd919a52ff86f01025d0f92225bc7b2a956 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@xxxxxxx>
Date: Tue, 10 Mar 2009 16:44:31 +0100
Subject: [PATCH 5/6] ioemu: Fail on too small blktap disks

The blktap infrastructure doesn't seems to be able to cope with images
that are smaller than a sector, it produced hangs for me. Such an
image isn't really useful anyway, so just fail gracefully.

Signed-off-by: Kevin Wolf <kwolf@xxxxxxx>
---
hw/xen_blktap.c | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -256,6 +256,12 @@ static int open_disk(struct td_state *s,
s->size = bs->total_sectors;
s->sector_size = 512;

+ if (s->size == 0) {
+ fprintf(stderr, "Error: Disk image %s is too small\n",
+ path);
+ return -ENOMEM;
+ }
+
s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0);

#ifndef QEMU_TOOL
Index: xen-4.2.0-testing/tools/python/xen/xend/server/DevController.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/server/DevController.py
+++ xen-4.2.0-testing/tools/python/xen/xend/server/DevController.py
@@ -155,7 +155,7 @@ class DevController:
(devid, self.deviceClass))

elif status == Error:
- self.destroyDevice(devid, False)
+ self.destroyDevice(devid, True)
if err is None:
raise VmError("Device %s (%s) could not be connected. "
"Backend device not found." %
++++++ ioemu-debuginfo.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/Makefile
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile
@@ -243,7 +243,7 @@ endif
install: all $(if $(BUILD_DOCS),install-doc)
mkdir -p "$(DESTDIR)$(bindir)"
ifneq ($(TOOLS),)
- $(INSTALL) -m 755 -s $(TOOLS) "$(DESTDIR)$(bindir)"
+ $(INSTALL) -m 755 $(TOOLS) "$(DESTDIR)$(bindir)"
endif
ifneq ($(BLOBS),)
mkdir -p "$(DESTDIR)$(datadir)"
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile.target
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/Makefile.target
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile.target
@@ -755,7 +755,7 @@ clean:

install: all install-hook
ifneq ($(PROGS),)
- $(INSTALL) -m 755 -s $(PROGS) "$(DESTDIR)$(bindir)"
+ $(INSTALL) -m 755 $(PROGS) "$(DESTDIR)$(bindir)"
endif

# Include automatically generated dependency files
++++++ ioemu-disable-emulated-ide-if-pv.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
@@ -1,6 +1,8 @@
#ifndef QEMU_XEN_H
#define QEMU_XEN_H

+#include "hw/boards.h"
+
/* vl.c */
extern int restore;
extern int vga_ram_size;
@@ -65,7 +67,7 @@ void handle_buffered_pio(void);
/* xenstore.c */
void xenstore_init(void);
uint32_t xenstore_read_target(void);
-void xenstore_parse_domain_config(int domid);
+void xenstore_parse_domain_config(int domid, QEMUMachine *machine);
int xenstore_parse_disable_pf_config(void);
int xenstore_fd(void);
void xenstore_process_event(void *opaque);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -5862,9 +5862,9 @@ int main(int argc, char **argv, char **e
if ((msg = xenbus_read(XBT_NIL, "domid", &domid_s)))
fprintf(stderr,"Can not read our own domid: %s\n", msg);
else
- xenstore_parse_domain_config(atoi(domid_s));
+ xenstore_parse_domain_config(atoi(domid_s), machine);
#else
- xenstore_parse_domain_config(domid);
+ xenstore_parse_domain_config(domid, machine);
#endif /* CONFIG_STUBDOM */
}

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -445,7 +445,7 @@ void xenstore_init(void)
}
}

-void xenstore_parse_domain_config(int hvm_domid)
+void xenstore_parse_domain_config(int hvm_domid, QEMUMachine *machine)
{
char **e_danger = NULL;
char *buf = NULL;
@@ -739,11 +739,19 @@ void xenstore_parse_domain_config(int hv

#endif

- drives_table[nb_drives].bdrv = bs;
- drives_table[nb_drives].used = 1;
- media_filename[nb_drives] = strdup(bs->filename);
- nb_drives++;
-
+ if (machine == &xenfv_machine) {
+ drives_table[nb_drives].bdrv = bs;
+ drives_table[nb_drives].used = 1;
+#ifdef CONFIG_STUBDOM
+ media_filename[nb_drives] = strdup(danger_buf);
+#else
+ media_filename[nb_drives] = strdup(bs->filename);
+#endif
+ nb_drives++;
+ } else {
+ qemu_aio_flush();
+ bdrv_close(bs);
+ }
}

#ifdef CONFIG_STUBDOM
++++++ ioemu-disable-scsi.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
===================================================================
---
xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
@@ -359,6 +359,8 @@ static void platform_ioport_write(void *
case 4:
fprintf(logfile, "Disconnect IDE hard disk...\n");
ide_unplug_harddisks();
+ fprintf(logfile, "Disconnect SCSI hard disk...\n");
+ pci_unplug_scsi();
fprintf(logfile, "Disconnect netifs...\n");
pci_unplug_netifs();
fprintf(logfile, "Shutdown taps...\n");
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
@@ -47,6 +47,7 @@ void unset_vram_mapping(void *opaque);
#endif

void pci_unplug_netifs(void);
+void pci_unplug_scsi(void);
void destroy_hvm_domain(void);
void unregister_iomem(target_phys_addr_t start);

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pci.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pci.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pci.c
@@ -871,6 +871,50 @@ void pci_unplug_netifs(void)
}
}

+void pci_unplug_scsi(void)
+{
+ PCIBus *bus;
+ PCIDevice *dev;
+ PCIIORegion *region;
+ int x;
+ int i;
+
+ /* We only support one PCI bus */
+ for (bus = first_bus; bus; bus = NULL) {
+ for (x = 0; x < 256; x++) {
+ dev = bus->devices[x];
+ if (dev &&
+ dev->config[0xa] == 0 &&
+ dev->config[0xb] == 1
+#ifdef CONFIG_PASSTHROUGH
+ && test_pci_devfn(x) != 1
+#endif
+ ) {
+ /* Found a scsi disk. Remove it from the bus. Note that
+ we don't free it here, since there could still be
+ references to it floating around. There are only
+ ever one or two structures leaked, and it's not
+ worth finding them all. */
+ bus->devices[x] = NULL;
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ region = &dev->io_regions[i];
+ if (region->addr == (uint32_t)-1 ||
+ region->size == 0)
+ continue;
+ fprintf(logfile, "region type %d at [%x,%x).\n",
+ region->type, region->addr,
+ region->addr+region->size);
+ if (region->type == PCI_ADDRESS_SPACE_IO) {
+ isa_unassign_ioport(region->addr, region->size);
+ } else if (region->type == PCI_ADDRESS_SPACE_MEM) {
+ unregister_iomem(region->addr);
+ }
+ }
+ }
+ }
+ }
+}
+
typedef struct {
PCIDevice dev;
PCIBus *bus;
++++++ ioemu-vnc-resize.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1751,6 +1751,25 @@ static int protocol_client_msg(VncState
}

set_encodings(vs, (int32_t *)(data + 4), limit);
+
+ /*
+ * The initialization of a VNC connection can race with xenfb changing
+ * the resolution. This happens when the VNC connection is already
+ * established, but the client has not yet advertised has_resize, so it
+ * won't get notified of the switch.
+ *
+ * Therefore we resend the resolution as soon as the client has sent its
+ * encodings.
+ */
+ if (vs->has_resize) {
+ /* Resize the VNC window */
+ vnc_write_u8(vs, 0); /* msg id */
+ vnc_write_u8(vs, 0);
+ vnc_write_u16(vs, 1); /* number of rects */
+ vnc_framebuffer_update(vs, 0, 0, vs->serverds.width,
vs->serverds.height, -223);
+
+ vnc_flush(vs);
+ }
break;
case 3:
if (len == 1)
++++++ ioemu-watchdog-ib700-timer.patch ++++++

Subject: qdev: convert watchdogs
From: Markus Armbruster armbru@xxxxxxxxxx Fri Aug 21 10:31:34 2009 +0200
Date: Thu Aug 27 20:35:24 2009 -0500:
Git: 09aaa1602f9381c0e0fb539390b1793e51bdfc7b

* THIS IS ONLY THE BUG FIX PART OF THE UPSTREAM PATCH *

Fixes ib700 not to use vm_clock before it is initialized: in
wdt_ib700_init(), called from register_watchdogs(), which runs before
init_timers(). The bug made ib700_write_enable_reg() crash in
qemu_del_timer().

Signed-off-by: Markus Armbruster <armbru@xxxxxxxxxx>
Signed-off-by: Anthony Liguori <aliguori@xxxxxxxxxx>

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
@@ -93,6 +93,7 @@ static int ib700_load(QEMUFile *f, void
/* Create and initialize a virtual IB700 during PC creation. */
static void ib700_pc_init(PCIBus *unused)
{
+ timer = qemu_new_timer(vm_clock, ib700_timer_expired, NULL);
register_savevm("ib700_wdt", -1, 0, ib700_save, ib700_load, NULL);

register_ioport_write(0x441, 2, 1, ib700_write_disable_reg, NULL);
@@ -108,5 +109,4 @@ static WatchdogTimerModel model = {
void wdt_ib700_init(void)
{
watchdog_add_model(&model);
- timer = qemu_new_timer(vm_clock, ib700_timer_expired, NULL);
}
++++++ ioemu-watchdog-linkage.patch ++++++

Subject: Move watchdog, watchdog_action, give them internal linkage
From: Markus Armbruster armbru@xxxxxxxxxx Fri Aug 21 10:31:32 2009 +0200
Date: Thu Aug 27 20:30:23 2009 -0500:
Git: 88b3be201acf64e0bd19782bebd533901c951c87

Signed-off-by: Markus Armbruster <armbru@xxxxxxxxxx>
Signed-off-by: Anthony Liguori <aliguori@xxxxxxxxxx>

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
@@ -26,6 +26,16 @@
#include "sysemu.h"
#include "hw/watchdog.h"

+/* Possible values for action parameter. */
+#define WDT_RESET 1 /* Hard reset. */
+#define WDT_SHUTDOWN 2 /* Shutdown. */
+#define WDT_POWEROFF 3 /* Quit. */
+#define WDT_PAUSE 4 /* Pause. */
+#define WDT_DEBUG 5 /* Prints a message and continues running. */
+#define WDT_NONE 6 /* Do nothing. */
+
+static WatchdogTimerModel *watchdog;
+static int watchdog_action = WDT_RESET;
static LIST_HEAD(watchdog_list, WatchdogTimerModel) watchdog_list;

void watchdog_add_model(WatchdogTimerModel *model)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
@@ -27,13 +27,6 @@
extern void wdt_i6300esb_init(void);
extern void wdt_ib700_init(void);

-/* Possible values for action parameter. */
-#define WDT_RESET 1 /* Hard reset. */
-#define WDT_SHUTDOWN 2 /* Shutdown. */
-#define WDT_POWEROFF 3 /* Quit. */
-#define WDT_PAUSE 4 /* Pause. */
-#define WDT_DEBUG 5 /* Prints a message and continues running. */
-#define WDT_NONE 6 /* Do nothing. */

struct WatchdogTimerModel {
LIST_ENTRY(WatchdogTimerModel) entry;
@@ -50,10 +43,6 @@ struct WatchdogTimerModel {
};
typedef struct WatchdogTimerModel WatchdogTimerModel;

-/* in vl.c */
-extern WatchdogTimerModel *watchdog;
-extern int watchdog_action;
-
/* in hw/watchdog.c */
extern int select_watchdog(const char *p);
extern int select_watchdog_action(const char *action);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -250,8 +250,6 @@ int no_shutdown = 0;
int cursor_hide = 1;
int graphic_rotate = 0;
int daemonize = 0;
-WatchdogTimerModel *watchdog = NULL;
-int watchdog_action = WDT_RESET;
const char *option_rom[MAX_OPTION_ROMS];
int nb_option_roms;
int semihosting_enabled = 0;
++++++ ioemu-watchdog-support.patch ++++++
++++ 963 lines (skipped)

++++++ ipxe-enable-nics.patch ++++++
Index: xen-4.2.0-testing/tools/firmware/etherboot/Config
===================================================================
--- xen-4.2.0-testing.orig/tools/firmware/etherboot/Config
+++ xen-4.2.0-testing/tools/firmware/etherboot/Config
@@ -1,3 +1,4 @@
+NICS = rtl8139 8086100e eepro100 e1000 pcnet32 10ec8029

CFLAGS += -UPXE_DHCP_STRICT
CFLAGS += -DPXE_DHCP_STRICT
++++++ kernel-boot-hvm.patch ++++++
Direct kernel boot to HVM guests has regression from xen-3.3 to xen-4.0.
Foreport this feature to latest qemu-xen. Make a fake boot sector with given
kernel and initrd, which could be accessed by hvmloader.

Signed-off-by: Chunyan Liu <cyliu@xxxxxxxxxx>

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.c
@@ -596,6 +596,16 @@ int bdrv_read(BlockDriverState *bs, int6

if (bdrv_check_request(bs, sector_num, nb_sectors))
return -EIO;
+
+ if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
+ memcpy(buf, bs->boot_sector_data, 512);
+ sector_num++;
+ nb_sectors--;
+ buf += 512;
+ if (nb_sectors == 0)
+ return 0;
+ }
+
if (drv->bdrv_pread) {
int ret, len;
len = nb_sectors * 512;
@@ -631,6 +641,10 @@ int bdrv_write(BlockDriverState *bs, int
if (bdrv_check_request(bs, sector_num, nb_sectors))
return -EIO;

+ if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
+ memcpy(bs->boot_sector_data, buf, 512);
+ }
+
if (drv->bdrv_pwrite) {
int ret, len, count = 0;
len = nb_sectors * 512;
@@ -934,6 +948,16 @@ void bdrv_guess_geometry(BlockDriverStat
}
}

+/* force a given boot sector. */
+void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size)
+{
+ bs->boot_sector_enabled = 1;
+ if (size > 512)
+ size = 512;
+ memcpy(bs->boot_sector_data, data, size);
+ memset(bs->boot_sector_data + size, 0, 512 - size);
+}
+
void bdrv_set_geometry_hint(BlockDriverState *bs,
int cyls, int heads, int secs)
{
@@ -1464,6 +1488,14 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDri
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;

+ /* XXX: we assume that nb_sectors == 0 is suppored by the async read */
+ if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
+ memcpy(buf, bs->boot_sector_data, 512);
+ sector_num++;
+ nb_sectors--;
+ buf += 512;
+ }
+
ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);

if (ret) {
@@ -1489,6 +1521,10 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDr
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;

+ if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
+ memcpy(bs->boot_sector_data, buf, 512);
+ }
+
ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);

if (ret) {
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block_int.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block_int.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block_int.h
@@ -122,6 +122,9 @@ struct BlockDriverState {
BlockDriver *drv; /* NULL means no media */
void *opaque;

+ int boot_sector_enabled;
+ uint8_t boot_sector_data[512];
+
char filename[1024];
char backing_file[1024]; /* if non zero, the image is a diff of
this file image */
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pc.c
@@ -474,45 +474,28 @@ static void bochs_bios_init(void)

/* Generate an initial boot sector which sets state and jump to
a specified vector */
-static void generate_bootsect(uint8_t *option_rom,
- uint32_t gpr[8], uint16_t segs[6], uint16_t ip)
+static void generate_bootsect(uint32_t gpr[8], uint16_t segs[6], uint16_t ip)
{
- uint8_t rom[512], *p, *reloc;
- uint8_t sum;
+ uint8_t bootsect[512], *p;
int i;
+ int hda;
+
+ hda = drive_get_index(IF_IDE, 0, 0);
+ if (hda == -1) {
+ fprintf(stderr, "A disk image must be given for 'hda' when booting "
+ "a Linux kernel\n(if you really don't want it, use
/dev/zero)\n");
+ exit(1);
+ }
+ memset(bootsect, 0, sizeof(bootsect));

- memset(rom, 0, sizeof(rom));
-
- p = rom;
- /* Make sure we have an option rom signature */
- *p++ = 0x55;
- *p++ = 0xaa;
-
- /* ROM size in sectors*/
- *p++ = 1;
-
- /* Hook int19 */
-
- *p++ = 0x50; /* push ax */
- *p++ = 0x1e; /* push ds */
- *p++ = 0x31; *p++ = 0xc0; /* xor ax, ax */
- *p++ = 0x8e; *p++ = 0xd8; /* mov ax, ds */
-
- *p++ = 0xc7; *p++ = 0x06; /* movvw _start,0x64 */
- *p++ = 0x64; *p++ = 0x00;
- reloc = p;
- *p++ = 0x00; *p++ = 0x00;
-
- *p++ = 0x8c; *p++ = 0x0e; /* mov cs,0x66 */
- *p++ = 0x66; *p++ = 0x00;
-
- *p++ = 0x1f; /* pop ds */
- *p++ = 0x58; /* pop ax */
- *p++ = 0xcb; /* lret */
-
- /* Actual code */
- *reloc = (p - rom);
+ /* Copy the MSDOS partition table if possible */
+ bdrv_read(drives_table[hda].bdrv, 0, bootsect, 1);
+ /* Make sure we have a partition signature */
+ bootsect[510] = 0x55;
+ bootsect[511] = 0xaa;

+ /* Actual code */
+ p = bootsect;
*p++ = 0xfa; /* CLI */
*p++ = 0xfc; /* CLD */

@@ -542,13 +525,7 @@ static void generate_bootsect(uint8_t *o
*p++ = segs[1]; /* CS */
*p++ = segs[1] >> 8;

- /* sign rom */
- sum = 0;
- for (i = 0; i < (sizeof(rom) - 1); i++)
- sum += rom[i];
- rom[sizeof(rom) - 1] = -sum;
-
- memcpy(option_rom, rom, sizeof(rom));
+ bdrv_set_boot_sector(drives_table[hda].bdrv, bootsect, sizeof(bootsect));
}

static long get_file_size(FILE *f)
@@ -565,8 +542,7 @@ static long get_file_size(FILE *f)
return size;
}

-static void load_linux(uint8_t *option_rom,
- const char *kernel_filename,
+static void load_linux(const char *kernel_filename,
const char *initrd_filename,
const char *kernel_cmdline)
{
@@ -632,7 +608,9 @@ static void load_linux(uint8_t *option_r

/* Special pages are placed at end of low RAM: pick an arbitrary one and
* subtract a suitably large amount of padding (64kB) to skip BIOS data. */
- xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &end_low_ram);
+ //xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &end_low_ram);
+ /* BUFIO Page beyond last_pfn, use 0x7ffc instead. Fix ME. */
+ end_low_ram = 0x7ffc;
end_low_ram = (end_low_ram << 12) - (64*1024);

/* highest address for loading the initrd */
@@ -721,7 +699,7 @@ static void load_linux(uint8_t *option_r
memset(gpr, 0, sizeof gpr);
gpr[4] = cmdline_addr-real_addr-16; /* SP (-16 is paranoia) */

- generate_bootsect(option_rom, gpr, seg, 0);
+ generate_bootsect(gpr, seg, 0);
#endif
}

@@ -932,14 +910,6 @@ vga_bios_error:
int size, offset;

offset = 0;
- if (linux_boot) {
- option_rom_offset = qemu_ram_alloc(TARGET_PAGE_SIZE);
- load_linux(phys_ram_base + option_rom_offset,
- kernel_filename, initrd_filename, kernel_cmdline);
- cpu_register_physical_memory(0xd0000, TARGET_PAGE_SIZE,
- option_rom_offset | IO_MEM_ROM);
- offset = TARGET_PAGE_SIZE;
- }

for (i = 0; i < nb_option_roms; i++) {
size = get_image_size(option_rom[i]);
@@ -973,6 +943,9 @@ vga_bios_error:

bochs_bios_init();

+ if (linux_boot)
+ load_linux(kernel_filename, initrd_filename, kernel_cmdline);
+
cpu_irq = qemu_allocate_irqs(pic_irq_request, NULL, 1);
i8259 = i8259_init(cpu_irq[0]);
ferr_irq = i8259[13];
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.h
@@ -82,6 +82,7 @@ int64_t bdrv_getlength(BlockDriverState
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int
*psecs);
int bdrv_commit(BlockDriverState *bs);
+void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size);
/* async block I/O */
typedef struct BlockDriverAIOCB BlockDriverAIOCB;
typedef void BlockDriverCompletionFunc(void *opaque, int ret);
++++++ kmp_filelist ++++++
%defattr (-,root,root)
/lib/modules/%2-%1
/etc/modprobe.d/xen_pvdrivers-%1.conf
++++++ libxen_permissive.patch ++++++
Index: xen-4.2.0-testing/tools/libxen/src/xen_common.c
===================================================================
--- xen-4.2.0-testing.orig/tools/libxen/src/xen_common.c
+++ xen-4.2.0-testing/tools/libxen/src/xen_common.c
@@ -904,8 +904,15 @@ static void parse_into(xen_session *s, x
0 != strcmp((char *)value_node->children->name, "struct") ||
value_node->children->children == NULL)
{
+#if PERMISSIVE
+ fprintf(stderr,
+ "Expected Map from the server, but didn't get one\n");
+ ((arbitrary_map **)value)[slot] = NULL;
+#else
+
server_error(s,
"Expected Map from the server, but didn't get it");
+#endif
}
else
{
++++++ log-guest-console.patch ++++++
Add code to support logging xen-domU console, as what xenconsoled does. Log info
will be saved in /var/log/xen/console/guest-domUname.log.

Signed-off-by: Chunyan Liu <cyliu@xxxxxxxxxx>
---
hw/xen_console.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 71 insertions(+), 0 deletions(-)

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
===================================================================
---
xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
@@ -38,6 +38,8 @@
#include "qemu-char.h"
#include "xen_backend.h"

+static int log_guest = 0;
+
struct buffer {
uint8_t *data;
size_t consumed;
@@ -54,8 +56,24 @@ struct XenConsole {
void *sring;
CharDriverState *chr;
int backlog;
+ int log_fd;
};

+static int write_all(int fd, const char* buf, size_t len)
+{
+ while (len) {
+ ssize_t ret = write(fd, buf, len);
+ if (ret == -1 && errno == EINTR)
+ continue;
+ if (ret < 0)
+ return -1;
+ len -= ret;
+ buf += ret;
+ }
+
+ return 0;
+}
+
static void buffer_append(struct XenConsole *con)
{
struct buffer *buffer = &con->buffer;
@@ -83,6 +101,15 @@ static void buffer_append(struct XenCons
intf->out_cons = cons;
xen_be_send_notify(&con->xendev);

+ if (con->log_fd != -1) {
+ int logret;
+ logret = write_all(con->log_fd, buffer->data + buffer->size - size,
size);
+ if (logret < 0) {
+ xen_be_printf(&con->xendev, 1, "Write to log failed on domain %d:
%d (%s)\n",
+ con->xendev.dom, errno, strerror(errno));
+ }
+ }
+
if (buffer->max_capacity &&
buffer->size > buffer->max_capacity) {
/* Discard the middle of the data. */
@@ -176,6 +203,37 @@ static void xencons_send(struct XenConso
}
}

+static int create_domain_log(struct XenConsole *con)
+{
+ char *logfile;
+ char *path, *domname;
+ int fd;
+ const char *logdir = "/var/log/xen/console";
+
+ path = xs_get_domain_path(xenstore, con->xendev.dom);
+ domname = xenstore_read_str(path, "name");
+ free(path);
+ if (!domname)
+ return -1;
+
+ if (mkdir(logdir, 0755) && errno != EEXIST)
+ {
+ xen_be_printf(&con->xendev, 1, "Directory %s does not exist and fail
to create it!", logdir);
+ return -1;
+ }
+
+ if (asprintf(&logfile, "%s/guest-%s.log", logdir, domname) < 0)
+ return -1;
+ qemu_free(domname);
+
+ fd = open(logfile, O_WRONLY|O_CREAT|O_APPEND, 0644);
+ free(logfile);
+ if (fd == -1)
+ xen_be_printf(&con->xendev, 1, "Failed to open log %s: %d (%s)",
logfile, errno, strerror(errno));
+
+ return fd;
+}
+
/* -------------------------------------------------------------------- */

static int con_init(struct XenDevice *xendev)
@@ -183,6 +241,7 @@ static int con_init(struct XenDevice *xe
struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
char *type, *dom, label[32];
const char *output;
+ char *logenv = NULL;

/* setup */
dom = xs_get_domain_path(xenstore, con->xendev.dom);
@@ -209,6 +268,10 @@ static int con_init(struct XenDevice *xe
con->chr = qemu_chr_open(label, output, NULL);
xenstore_store_pv_console_info(con->xendev.dev, con->chr, output);

+ logenv = getenv("XENCONSOLED_TRACE");
+ if (logenv != NULL && strlen(logenv) == strlen("guest") && !strcmp(logenv,
"guest")) {
+ log_guest = 1;
+ }
return 0;
}

@@ -246,6 +309,9 @@ static int con_initialise(struct XenDevi
con->xendev.remote_port,
con->xendev.local_port,
con->buffer.max_capacity);
+ con->log_fd = -1;
+ if (log_guest)
+ con->log_fd = create_domain_log(con);
return 0;
}

@@ -266,6 +332,12 @@ static void con_disconnect(struct XenDev
xc_gnttab_munmap(xendev->gnttabdev, con->sring, 1);
con->sring = NULL;
}
+
+ if (con->log_fd != -1) {
+ close(con->log_fd);
+ con->log_fd = -1;
+ }
+
}

static void con_event(struct XenDevice *xendev)
++++++ logrotate.conf ++++++
compress
missingok

/var/log/xen/xend*.log {
rotate 5
size 1M
notifempty
copytruncate
}

/var/log/xen/domain-builder-ng.log /var/log/xen/xen-hotplug.log {
rotate 2
size 100k
notifempty
copytruncate
}

/var/log/xen/qemu-dm.*.log {
rotate 0
monthly
}
++++++ magic_ioport_compat.patch ++++++
Make our PV drivers work with older hosts that do not recognize the new PV
driver protocol.

Signed-off-by: K. Y. Srinivasan <ksrinivasan@xxxxxxxxxx>

Index:
xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
===================================================================
---
xen-4.2.0-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -321,7 +321,10 @@ static int check_platform_magic(struct d

if (magic != XEN_IOPORT_MAGIC_VAL) {
err = "unrecognised magic value";
- goto no_dev;
+ /*
+ * Older backend; just return 0 to be compatible.
+ */
+ return 0;
}

protocol = inb(XEN_IOPORT_PROTOVER);
++++++ minios-fixups.patch ++++++
Index: xen-4.2.0-testing/extras/mini-os/lib/math.c
===================================================================
--- xen-4.2.0-testing.orig/extras/mini-os/lib/math.c
+++ xen-4.2.0-testing/extras/mini-os/lib/math.c
@@ -190,6 +190,7 @@ __qdivrem(u_quad_t uq, u_quad_t vq, u_qu
* and thus
* m = 4 - n <= 2
*/
+ tmp.ul[H] = tmp.ul[L] = 0;
tmp.uq = uq;
u[0] = 0;
u[1] = HHALF(tmp.ul[H]);
++++++ multi-xvdp.patch ++++++
Allow multiple bootloader loopback devices

Starting several domains concurrently can fail due to using a single
bootloader loopback device. This patch creates a list of bootloader
loopback devices so more than one instance of bootloader can be run
concurrently.

Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -74,7 +74,7 @@ from xen.xend.XendPSCSI import XendPSCSI
from xen.xend.XendDSCSI import XendDSCSI, XendDSCSI_HBA

MIGRATE_TIMEOUT = 30.0
-BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp'
+BOOTLOADER_LOOPBACK_DEVICES = ['/dev/xvd' + chr(x) for x in range(ord('z'),
ord('d'), -1)]

xc = xen.lowlevel.xc.xc()
xoptions = XendOptions.instance()
@@ -3322,33 +3322,38 @@ class XendDomainInfo:
# This is a file, not a device. pygrub can cope with a
# file if it's raw, but if it's QCOW or other such formats
# used through blktap, then we need to mount it first.
-
- log.info("Mounting %s on %s." %
- (fn, BOOTLOADER_LOOPBACK_DEVICE))
-
- vbd = {
- 'mode': 'RW',
- 'device': BOOTLOADER_LOOPBACK_DEVICE,
- }
-
- from xen.xend import XendDomain
- dom0 = XendDomain.instance().privilegedDomain()
- mounted_vbd_uuid = dom0.create_vbd(vbd, disk);
- vbd_uuid = dom0.create_vbd(vbd, disk)
- dom0._waitForDeviceFrontUUID(vbd_uuid)
- fn = BOOTLOADER_LOOPBACK_DEVICE
-
+ # Try all possible loopback_devices
+ for loopback_device in BOOTLOADER_LOOPBACK_DEVICES:
+ log.info("Mounting %s on %s." % (fn, loopback_device))
+ vbd = { 'mode' : 'RW', 'device' : loopback_device, }
+ try:
+ from xen.xend import XendDomain
+ dom0 = XendDomain.instance().privilegedDomain()
+ mounted_vbd_uuid = dom0.create_vbd(vbd, disk)
+ dom0._waitForDeviceFrontUUID(mounted_vbd_uuid)
+ fn = loopback_device
+ break
+ except VmError, e:
+ if str(e).find('already connected.') != -1:
+ continue
+ elif str(e).find('isn\'t accessible') != -1:
+ dom0.destroyDevice('vbd', loopback_device, force =
True, rm_cfg = True)
+ continue
+ else:
+ raise
+ else:
+ raise
try:
blcfg = bootloader(blexec, fn, self, False,
bootloader_args, kernel, ramdisk, args)
finally:
if mounted:
log.info("Unmounting %s from %s." %
- (fn, BOOTLOADER_LOOPBACK_DEVICE))
+ (fn, loopback_device))
if devtype in ['tap', 'tap2']:
- dom0.destroyDevice('tap', BOOTLOADER_LOOPBACK_DEVICE,
rm_cfg = True)
+ dom0.destroyDevice('tap', loopback_device, rm_cfg =
True)
else:
- dom0.destroyDevice('vbd', BOOTLOADER_LOOPBACK_DEVICE,
rm_cfg = True)
+ dom0.destroyDevice('vbd', loopback_device, rm_cfg =
True)
if blcfg is None:
msg = "Had a bootloader specified, but can't find disk"
log.error(msg)
++++++ network-nat-open-SuSEfirewall2-FORWARD.patch ++++++
Open SuSEfirewall2 FORWARD rule when use xen nat

Index: xen-4.2.0-testing/tools/hotplug/Linux/network-nat
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/network-nat
+++ xen-4.2.0-testing/tools/hotplug/Linux/network-nat
@@ -83,6 +83,7 @@ function dhcp_stop()
op_start() {
echo 1 >/proc/sys/net/ipv4/ip_forward
iptables -t nat -A POSTROUTING -o ${netdev} -j MASQUERADE
+ iptables -P FORWARD ACCEPT
[ "$dhcp" != 'no' ] && dhcp_start
}

@@ -90,6 +91,7 @@ op_start() {
op_stop() {
[ "$dhcp" != 'no' ] && dhcp_stop
iptables -t nat -D POSTROUTING -o ${netdev} -j MASQUERADE
+ iptables -P FORWARD DROP
}


++++++ pvdrv-import-shared-info.patch ++++++
Index: xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
===================================================================
--- xen-4.2.0-testing.orig/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
+++ xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
@@ -40,7 +40,9 @@
#include <xen/platform-compat.h>
#endif

+#ifndef shared_info_area
void *shared_info_area;
+#endif

#define is_valid_evtchn(x) ((x) != 0)
#define evtchn_from_irq(x) (irq_evtchn[irq].evtchn)
Index:
xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
===================================================================
---
xen-4.2.0-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -76,7 +76,6 @@ static uint64_t callback_via;
static int __devinit init_xen_info(void)
{
struct xen_add_to_physmap xatp;
- extern void *shared_info_area;

#ifdef __ia64__
xencomm_initialize();
@@ -84,6 +83,7 @@ static int __devinit init_xen_info(void)

setup_xen_features();

+#ifndef shared_info_area
shared_info_frame = alloc_xen_mmio(PAGE_SIZE) >> PAGE_SHIFT;
xatp.domid = DOMID_SELF;
xatp.idx = 0;
@@ -96,6 +96,11 @@ static int __devinit init_xen_info(void)
ioremap(shared_info_frame << PAGE_SHIFT, PAGE_SIZE);
if (shared_info_area == NULL)
panic("can't map shared info\n");
+#else
+ shared_info_frame = __pa(shared_info_area) >> PAGE_SHIFT;
+ printk(KERN_INFO "Using kernel provided shared info (pfn=%lx)\n",
+ shared_info_frame);
+#endif

return 0;
}
Index:
xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h
===================================================================
---
xen-4.2.0-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h
+++ xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h
@@ -27,6 +27,11 @@
unsigned long alloc_xen_mmio(unsigned long len);
void platform_pci_resume(void);

+#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
+#define shared_info_area xen_shared_info
+#endif
+extern void *shared_info_area;
+
extern struct pci_dev *xen_platform_pdev;

#endif /* _XEN_PLATFORM_PCI_H */
++++++ pvdrv_emulation_control.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
===================================================================
---
xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
@@ -365,6 +365,19 @@ static void platform_ioport_write(void *
net_tap_shutdown_all();
fprintf(logfile, "Done.\n");
break;
+ case 8:
+ if (val ==1 ) {
+ fprintf(logfile, "Disconnect IDE hard disk...\n");
+ ide_unplug_harddisks();
+ fprintf(logfile, "Done.\n");
+ } else if (val == 2) {
+ fprintf(logfile, "Disconnect netifs...\n");
+ pci_unplug_netifs();
+ fprintf(logfile, "Shutdown taps...\n");
+ net_tap_shutdown_all();
+ fprintf(logfile, "Done.\n");
+ }
+ break;
default:
fprintf(logfile, "Write to bad port %x (base %x) on evtchn device.\n",
addr, ioport_base);
++++++ pygrub-netware-xnloader.patch ++++++
Index: xen-4.2.1-testing/tools/pygrub/src/pygrub
===================================================================
--- xen-4.2.1-testing.orig/tools/pygrub/src/pygrub
+++ xen-4.2.1-testing/tools/pygrub/src/pygrub
@@ -26,6 +26,7 @@ import fsimage
import grub.GrubConf
import grub.LiloConf
import grub.ExtLinuxConf
+import xnloader

PYGRUB_VER = 0.6
FS_READ_MAX = 1024 * 1024
@@ -734,6 +735,8 @@ if __name__ == "__main__":
if len(data) == 0:
os.close(tfd)
del datafile
+ if file_to_read == "/nwserver/xnloader.sys":
+ xnloader.patch_netware_loader(ret)
return ret
try:
os.write(tfd, data)
++++++ qemu-dm-segfault.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ide.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ide.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ide.c
@@ -935,8 +935,9 @@ static inline void ide_dma_submit_check(

static inline void ide_set_irq(IDEState *s)
{
- BMDMAState *bm = s->bmdma;
- if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+ BMDMAState *bm;
+ if (!s || !s->bs) return; /* ouch! (see ide_flush_cb) */
+ bm = s->bmdma;
if (!(s->cmd & IDE_CMD_DISABLE_IRQ)) {
if (bm) {
bm->status |= BM_STATUS_INT;
@@ -1224,14 +1225,14 @@ static void ide_read_dma_cb(void *opaque
int n;
int64_t sector_num;

+ if (!s || !s->bs) return; /* ouch! (see ide_flush_cb) */
+
if (ret < 0) {
dma_buf_commit(s, 1);
ide_dma_error(s);
return;
}

- if (!s->bs) return; /* ouch! (see ide_flush_cb) */
-
n = s->io_buffer_size >> 9;
sector_num = ide_get_sector(s);
if (n > 0) {
@@ -1335,6 +1336,8 @@ static void ide_write_flush_cb(void *opa
BMDMAState *bm = opaque;
IDEState *s = bm->ide_if;

+ if (!s) return; /* yikes */
+
if (ret != 0) {
ide_dma_error(s);
return;
@@ -1366,13 +1369,13 @@ static void ide_write_dma_cb(void *opaqu
int n;
int64_t sector_num;

+ if (!s || !s->bs) return; /* ouch! (see ide_flush_cb) */
+
if (ret < 0) {
if (ide_handle_write_error(s, -ret, BM_STATUS_DMA_RETRY))
return;
}

- if (!s->bs) return; /* ouch! (see ide_flush_cb) */
-
n = s->io_buffer_size >> 9;
sector_num = ide_get_sector(s);
if (n > 0) {
@@ -1429,7 +1432,7 @@ static void ide_flush_cb(void *opaque, i
{
IDEState *s = opaque;

- if (!s->bs) return; /* ouch! (see below) */
+ if (!s || !s->bs) return; /* ouch! (see below) */

if (ret) {
/* We are completely doomed. The IDE spec does not permit us
@@ -1686,7 +1689,7 @@ static void ide_atapi_cmd_read_dma_cb(vo
IDEState *s = bm->ide_if;
int data_offset, n;

- if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+ if (!s || !s->bs) return; /* ouch! (see ide_flush_cb) */

if (ret < 0) {
ide_atapi_io_error(s, ret);
@@ -2365,7 +2368,7 @@ static void cdrom_change_cb(void *opaque
IDEState *s = opaque;
uint64_t nb_sectors;

- if (!s->bs) return; /* ouch! (see ide_flush_cb) */
+ if (!s || !s->bs) return; /* ouch! (see ide_flush_cb) */

bdrv_get_geometry(s->bs, &nb_sectors);
s->nb_sectors = nb_sectors;
++++++ qemu-security-etch1.diff ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
@@ -218,7 +218,7 @@ static int ne2000_can_receive(void *opaq
NE2000State *s = opaque;

if (s->cmd & E8390_STOP)
- return 1;
+ return 0;
return !ne2000_buffer_full(s);
}

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pc.c
@@ -413,7 +413,8 @@ static void bochs_bios_write(void *opaqu
case 0x400:
case 0x401:
fprintf(stderr, "BIOS panic at rombios.c, line %d\n", val);
- exit(1);
+ /* according to documentation, these can be safely ignored */
+ break;
case 0x402:
case 0x403:
#ifdef DEBUG_BIOS
@@ -436,8 +437,9 @@ static void bochs_bios_write(void *opaqu
/* LGPL'ed VGA BIOS messages */
case 0x501:
case 0x502:
+ /* according to documentation, these can be safely ignored */
fprintf(stderr, "VGA BIOS panic, line %d\n", val);
- exit(1);
+ break;
case 0x500:
case 0x503:
#ifdef DEBUG_BIOS
++++++ serial-split.patch ++++++
Index: xen-4.2.0-testing/tools/misc/serial-split/Makefile
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/tools/misc/serial-split/Makefile
@@ -0,0 +1,20 @@
+CC ?= gcc
+CFLAGS ?= -Wall -Os
+CFILES = $(wildcard *.c)
+OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
+TARGET = serial-split
+
+all: $(TARGET)
+
+install: all
+ install -d $(DESTDIR)/usr/bin
+ install -s $(TARGET) $(DESTDIR)/usr/bin/
+
+clean:
+ rm *.o $(TARGET) *~
+
+$(TARGET): $(OBJS)
+ $(CC) $(CFLAGS) -o $@ $^
+
+%.o: %.c Makefile
+ $(CC) $(CFLAGS) -c -o $@ $<
Index: xen-4.2.0-testing/tools/misc/serial-split/serial-split.c
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/tools/misc/serial-split/serial-split.c
@@ -0,0 +1,422 @@
+/*
+ * serial-split.c
+ * pdb / console splitter
+ *
+ * Copyright 2005 Charles Coffing <ccoffing@xxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/*
+ * Typical setup:
+ *
+ * Development box Xen box
+ * ...-----+ +-----...
+ * +---------+ | |
+ * | gdb | | |
+ * | |\ high | |
+ * +---------+ \ | |
+ * \+-----------+ | serial | +------------------+
+ * | splitter |------------| Xen |
+ * /+-----------+ | | | - pdb (com1H)|
+ * +---------+ / | | | - printk (com1) |
+ * | console |/ low | | +------------------+
+ * | viewer | | |
+ * +---------+ | |
+ * ...-----+ +-----...
+ */
+
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <termios.h>
+#include <sys/signal.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <netinet/in.h>
+
+const unsigned int DefaultLowPort = 12010;
+const unsigned int DefaultBaud = 115200;
+const char DefaultSerialDevice[] = "/dev/ttyS0";
+
+#define DEBUG 0
+#define MAX(a,b) ((a)<(b)?(b):(a))
+
+
+static int cook_baud(int baud)
+{
+ int cooked_baud = 0;
+ switch (baud)
+ {
+ case 50: cooked_baud = B50; break;
+ case 75: cooked_baud = B75; break;
+ case 110: cooked_baud = B110; break;
+ case 134: cooked_baud = B134; break;
+ case 150: cooked_baud = B150; break;
+ case 200: cooked_baud = B200; break;
+ case 300: cooked_baud = B300; break;
+ case 600: cooked_baud = B600; break;
+ case 1200: cooked_baud = B1200; break;
+ case 1800: cooked_baud = B1800; break;
+ case 2400: cooked_baud = B2400; break;
+ case 4800: cooked_baud = B4800; break;
+ case 9600: cooked_baud = B9600; break;
+ case 19200: cooked_baud = B19200; break;
+ case 38400: cooked_baud = B38400; break;
+ case 57600: cooked_baud = B57600; break;
+ case 115200: cooked_baud = B115200; break;
+ }
+ return cooked_baud;
+}
+
+
+static int start_listener(unsigned short port)
+{
+ int fd;
+ struct sockaddr_in sin;
+ int on = 1;
+
+ if ((fd = socket (AF_INET, SOCK_STREAM, 0)) < 0)
+ {
+ perror("socket");
+ goto out1;
+ }
+
+ setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof (on));
+
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_port = htons (port);
+ sin.sin_addr.s_addr = INADDR_ANY;
+ if (bind(fd, (struct sockaddr *)&sin, sizeof(sin)) < 0)
+ {
+ perror("bind");
+ goto out2;
+ }
+
+ if (listen(fd, 1) < 0)
+ {
+ perror("listen");
+ goto out2;
+ }
+
+ fprintf(stderr, "Listening on port %d\n", port);
+
+ return fd;
+
+out2:
+ close(fd);
+out1:
+ return -1;
+}
+
+
+static int accept_conn(int fd)
+{
+ int on = 1;
+ int new_fd;
+ struct sockaddr_in from;
+ socklen_t fromlen = sizeof(from);
+
+ new_fd = accept(fd, (struct sockaddr *)&from, &fromlen);
+ if (new_fd < 0)
+ perror("accept");
+ ioctl(new_fd, FIONBIO, &on);
+
+ fprintf(stderr, "Accepted connection on %d\n", new_fd);
+
+ return new_fd;
+}
+
+
+static void close_conn(int * fd)
+{
+ shutdown(*fd, 2);
+ close(*fd);
+ *fd = -1;
+}
+
+
+static int receive_data(int * fd, char * buf, ssize_t max_bytes, int * poll)
+{
+ ssize_t bytes;
+ if ((bytes = read(*fd, buf, max_bytes)) < 0)
+ {
+ perror("read");
+ *poll = 1;
+ return 0;
+ }
+ else if (bytes == 0)
+ {
+ close_conn(fd);
+ *poll = 0;
+ return 0;
+ }
+ else
+ {
+ if (bytes == max_bytes)
+ *poll = 1;
+ else
+ *poll = 0;
+#if DEBUG
+ {
+ ssize_t i;
+ fprintf(stderr, "Received %d bytes on %d:\n", bytes, *fd);
+ for (i = 0; i < bytes; ++ i)
+ {
+ if ((i & 0xf) == 0)
+ printf(" ");
+ printf("%02x", buf[i] & 0xff);
+ if (((i+1) & 0xf) == 0 || i + 1 == bytes)
+ printf("\n");
+ else
+ printf(" ");
+ }
+ }
+#endif
+ return bytes;
+ }
+}
+
+
+static void set_high_bit(char * buf, size_t bytes)
+{
+ size_t i;
+ for(i = 0; i < bytes; ++ i)
+ buf[i] |= 0x80;
+}
+
+
+static void clear_high_bit(char * buf, size_t bytes)
+{
+ size_t i;
+ for(i = 0; i < bytes; ++ i)
+ buf[i] &= 0x7f;
+}
+
+
+static int open_serial(char const * serial_dev, int baud)
+{
+ struct termios newsertio;
+ int serial_fd;
+ memset(&newsertio, 0, sizeof(newsertio));
+
+ if ((serial_fd = open(serial_dev, O_RDWR | O_NOCTTY | O_NONBLOCK)) < 0)
+ {
+ perror(serial_dev);
+ return -1;
+ }
+
+ newsertio.c_cflag = baud | CS8 | CLOCAL | CREAD;
+ newsertio.c_iflag = IGNBRK | IGNPAR; /* raw input */
+ newsertio.c_oflag = 0; /* raw output */
+ newsertio.c_lflag = 0; /* no echo, no signals */
+ newsertio.c_cc[VMIN] = 1;
+ newsertio.c_cc[VTIME] = 0;
+ tcflush(serial_fd, TCIFLUSH);
+ tcsetattr(serial_fd, TCSANOW, &newsertio);
+
+ fprintf(stderr, "Listening on %s\n", serial_dev);
+
+ return serial_fd;
+}
+
+
+static void main_loop(int serial_fd, int low_listener, int high_listener)
+{
+ fd_set rdfds;
+ int low_poll = 0, high_poll = 0, serial_poll = 0;
+ int low_fd = -1, high_fd = -1;
+
+ while(1)
+ {
+ char buf[1024];
+ ssize_t bytes;
+ int max;
+
+ FD_ZERO(&rdfds);
+ FD_SET(low_fd < 0 ? low_listener : low_fd, &rdfds);
+ FD_SET(high_fd < 0 ? high_listener : high_fd, &rdfds);
+ FD_SET(serial_fd, &rdfds);
+
+ max = MAX(low_fd, low_listener);
+ max = MAX(max, high_fd);
+ max = MAX(max, high_listener);
+ max = MAX(max, serial_fd);
+
+ if (select(max + 1, &rdfds, NULL, NULL, NULL) < 0)
+ {
+ perror("select");
+ continue;
+ }
+
+ if (FD_ISSET(low_listener, &rdfds))
+ {
+ assert(low_fd < 0);
+ low_fd = accept_conn(low_listener);
+ }
+
+ if (FD_ISSET(high_listener, &rdfds))
+ {
+ assert(high_fd < 0);
+ high_fd = accept_conn(high_listener);
+ }
+
+ if (low_poll || (low_fd >= 0 && FD_ISSET(low_fd, &rdfds)))
+ {
+ if ((bytes = receive_data(&low_fd, &buf[0], sizeof(buf),
+ &low_poll)) > 0)
+ {
+ clear_high_bit(&buf[0], bytes);
+ if (write(serial_fd, &buf[0], bytes) < 0)
+ perror("write");
+ }
+ }
+
+ if (high_poll || (high_fd >= 0 && FD_ISSET(high_fd, &rdfds)))
+ {
+ if ((bytes = receive_data(&high_fd, &buf[0], sizeof(buf),
+ &high_poll)) > 0)
+ {
+ set_high_bit(&buf[0], bytes);
+ if (write(serial_fd, &buf[0], bytes) < 0)
+ perror("write");
+ }
+ }
+
+ if (serial_poll || FD_ISSET(serial_fd, &rdfds))
+ {
+ if ((bytes = receive_data(&serial_fd, &buf[0], sizeof(buf),
+ &serial_poll)) > 0)
+ {
+ ssize_t i;
+ for (i = 0; i < bytes; ++ i)
+ {
+ if (buf[i] & 0x80)
+ {
+ if (high_fd >= 0)
+ {
+ buf[i] &= 0x7f;
+ if ((write(high_fd, &buf[i], 1)) < 0)
+ {
+ perror("write");
+ close_conn(&high_fd);
+ high_poll = 0;
+ }
+ }
+ }
+ else
+ {
+ if (low_fd >= 0)
+ {
+ if ((write(low_fd, &buf[i], 1)) < 0)
+ {
+ perror("write");
+ close_conn(&low_fd);
+ low_poll = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+
+static void usage()
+{
+ printf(
+"Description:\n"
+" Splits the serial port between two TCP ports. Bytes read from the\n"
+" serial port will be delivered to one of the two TCP ports (high or\n"
+" low) depending on whether the high bit is set. Bytes written to
the\n"
+" TCP ports will be forwarded to the serial port; the high bit will
be\n"
+" set or cleared to denote the source.\n"
+"Usage:\n"
+" serial-split [-d<serial-device>] [-b<baud>]\n"
+" [-l<low-port>] [-h<high-port>]\n"
+"Parameters:\n"
+" -d<serial-device> Defaults to %s.\n"
+" -b<baud> Baud rate of the serial port. Defaults to %d.\n"
+" Also assumes 8N1.\n"
+" -l<low-port> Low TCP port. Defaults to %d, or one less than\n"
+" the high port.\n"
+" -h<high-port> High TCP port. Defaults to %d, or one more than\n"
+" the low port.\n",
+DefaultSerialDevice, DefaultBaud, DefaultLowPort, DefaultLowPort + 1);
+
+ exit(1);
+}
+
+
+int main(int argc, char **argv)
+{
+ int cooked_baud = cook_baud(DefaultBaud);
+ char const * serial_dev = DefaultSerialDevice;
+ int low_port = -1, high_port = -1;
+ int serial_fd, low_listener, high_listener;
+
+ while ( --argc != 0 )
+ {
+ char *p = argv[argc];
+ if ( *(p++) != '-' )
+ usage();
+ switch (*(p++))
+ {
+ case 'b':
+ if ( (cooked_baud = cook_baud(atoi(p))) == 0 )
+ {
+ fprintf(stderr, "Bad baud rate\n");
+ exit(1);
+ }
+ break;
+ case 'd':
+ serial_dev = p;
+ break;
+ case 'l':
+ if ((low_port = atoi(p)) <= 0)
+ usage();
+ break;
+ case 'h':
+ if ((high_port = atoi(p)) <= 0)
+ usage();
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (low_port == -1 && high_port == -1)
+ low_port = DefaultLowPort;
+ if (low_port == -1)
+ low_port = high_port - 1;
+ if (high_port == -1)
+ high_port = low_port + 1;
+
+ if ((serial_fd = open_serial(serial_dev, cooked_baud)) < 0 ||
+ (low_listener = start_listener(low_port)) < 0 ||
+ (high_listener = start_listener(high_port)) < 0)
+ exit(1);
+
+ main_loop(serial_fd, low_listener, high_listener);
+
+ return 0;
+}
+
++++++ stdvga-cache.patch ++++++
Index: xen-4.2.0-testing/xen/arch/x86/hvm/stdvga.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/hvm/stdvga.c
+++ xen-4.2.0-testing/xen/arch/x86/hvm/stdvga.c
@@ -135,7 +135,10 @@ static int stdvga_outb(uint64_t addr, ui

/* When in standard vga mode, emulate here all writes to the vram buffer
* so we can immediately satisfy reads without waiting for qemu. */
- s->stdvga = (s->sr[7] == 0x00);
+ s->stdvga =
+ (s->sr[7] == 0x00) && /* standard vga mode */
+ (s->gr[6] == 0x05); /* misc graphics register w/ MemoryMapSelect=1
+ * 0xa0000-0xaffff (64k region), AlphaDis=1 */

if ( !prev_stdvga && s->stdvga )
{
++++++ supported_module.diff ++++++
Make our PV drivers "Novell supported modules"

Signed-off-by: K. Y. Srinivasan <ksrinivasan@xxxxxxxxxx>

Index: xen-4.2.0-testing/unmodified_drivers/linux-2.6/Module.supported
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/unmodified_drivers/linux-2.6/Module.supported
@@ -0,0 +1,6 @@
+xen-vbd
+xen-platform-pci
+xen-vnif
+xenbus
+xen-balloon
+xen-scsi
++++++ suspend_evtchn_lock.patch ++++++
Fix problems that suspend eventchannel lock file might be obselete for some
reason
like segment fault or other abnormal exit, and once obselete lock file exists,
it might affact latter save process.
Have discussed with upstream, for some reason not accepted.
http://xen.1045712.n5.nabble.com/Re-PATCH-improve-suspend-evtchn-lock-processing-td3395229.html

Signed-off-by: Chunyan Liu <cyliu@xxxxxxxx>

Index: xen-4.2.0-testing/tools/libxc/xc_suspend.c
===================================================================
--- xen-4.2.0-testing.orig/tools/libxc/xc_suspend.c
+++ xen-4.2.0-testing/tools/libxc/xc_suspend.c
@@ -16,8 +16,43 @@

#include "xc_private.h"
#include "xenguest.h"
+#include <signal.h>
+#ifdef __MINIOS__
+extern int kill (__pid_t __pid, int __sig);
+#endif

#define SUSPEND_LOCK_FILE "/var/lib/xen/suspend_evtchn"
+/* cleanup obsolete suspend lock file which is unlinked for any reason,
+so that current process can get lock */
+static void clean_obsolete_lock(int domid)
+{
+ int fd, pid, n;
+ char buf[128];
+ char suspend_file[256];
+
+ snprintf(suspend_file, sizeof(suspend_file), "%s_%d_lock.d",
+ SUSPEND_LOCK_FILE, domid);
+ fd = open(suspend_file, O_RDWR);
+
+ if (fd < 0)
+ return;
+
+ n = read(fd, buf, 127);
+
+ close(fd);
+
+ if (n > 0)
+ {
+ sscanf(buf, "%d", &pid);
+ /* pid does not exist, this lock file is obsolete, just delete it */
+ if ( kill(pid,0) )
+ {
+ unlink(suspend_file);
+ return;
+ }
+ }
+}
+
static int lock_suspend_event(xc_interface *xch, int domid)
{
int fd, rc;
@@ -27,6 +62,7 @@ static int lock_suspend_event(xc_interfa

snprintf(suspend_file, sizeof(suspend_file), "%s_%d_lock.d",
SUSPEND_LOCK_FILE, domid);
+ clean_obsolete_lock(domid);
mask = umask(022);
fd = open(suspend_file, O_CREAT | O_EXCL | O_RDWR, 0666);
if (fd < 0)
@@ -41,6 +77,9 @@ static int lock_suspend_event(xc_interfa
rc = write_exact(fd, buf, strlen(buf));
close(fd);

+ if(rc)
+ unlink(suspend_file);
+
return rc;
}

@@ -127,8 +166,7 @@ int xc_suspend_evtchn_init(xc_interface
return suspend_evtchn;

cleanup:
- if (suspend_evtchn != -1)
- xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn);
+ xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn);

return -1;
}
++++++ sysconfig.pciback ++++++
## Path: System/Virtualization
## Type: string
## Default: ""
#
# Space delimited list of PCI devices to late bind to pciback
# Format: <driver>,<PCI ID>
#
#XEN_PCI_HIDE_LIST="e1000,0000:0b:00.0 e1000,0000:0b:00.1"
XEN_PCI_HIDE_LIST=""
++++++ tapdisk-ioemu-logfile.patch ++++++
From 903a145f3eace5e3ae914f0335ab6c4e33635d2f Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@xxxxxxx>
Date: Tue, 10 Mar 2009 16:36:23 +0100
Subject: [PATCH 4/6] tapdisk-ioemu: Write messages to a logfile

Typically, tapdisk-ioemu runs as a daemon and messages to stderr are
simply lost. Write them to a logfile instead.

Signed-off-by: Kevin Wolf <kwolf@xxxxxxx>
---
tapdisk-ioemu.c | 19 +++++++++++++------
1 files changed, 13 insertions(+), 6 deletions(-)

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
@@ -78,15 +78,22 @@ int main(void)
struct timeval tv;
void *old_fd_start = NULL;

- logfile = stderr;
-
+ /* Daemonize */
+ if (fork() != 0)
+ exit(0);
+
bdrv_init();
init_blktap();

- /* Daemonize */
- if (fork() != 0)
- exit(0);
-
+ logfile = fopen("/var/log/xen/tapdisk-ioemu.log", "a");
+ if (logfile) {
+ setbuf(logfile, NULL);
+ fclose(stderr);
+ stderr = logfile;
+ } else {
+ logfile = stderr;
+ }
+
/*
* Main loop: Pass events to the corrsponding handlers and check for
* completed aio operations.
++++++ tapdisk-ioemu-shutdown-fix.patch ++++++
From 9062564d79cb45029403cc998b48410e42ead924 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@xxxxxxx>
Date: Tue, 10 Mar 2009 16:45:44 +0100
Subject: [PATCH 6/6] tapdisk-ioemu: Fix shutdown condition

Even when opening the only image a tapdisk-ioemu instance is
responsible for fails, it can't immediately shut down. blktapctrl
still wants to communicate with tapdisk-ioemu and close the disk.

This patch changes tapdisk-ioemu to count the connections to
blktapctrl rather than the number of opened disk images.

Signed-off-by: Kevin Wolf <kwolf@xxxxxxx>
---
hw/xen_blktap.c | 5 ++++-
tapdisk-ioemu.c | 13 ++++++++++---
2 files changed, 14 insertions(+), 4 deletions(-)

Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -65,6 +65,7 @@ int read_fd;
int write_fd;

static pid_t process;
+int connected_disks = 0;
fd_list_entry_t *fd_start = NULL;

static void handle_blktap_iomsg(void* private);
@@ -541,6 +542,7 @@ static void handle_blktap_ctrlmsg(void*

/* Allocate the disk structs */
s = state_init();
+ connected_disks++;

/*Open file*/
if (s == NULL || open_disk(s, path, msg->drivertype,
msg->readonly)) {
@@ -591,7 +593,8 @@ static void handle_blktap_ctrlmsg(void*
case CTLMSG_CLOSE:
s = get_state(msg->cookie);
if (s) unmap_disk(s);
- break;
+ connected_disks--;
+ break;

case CTLMSG_PID:
memset(buf, 0x00, MSG_SIZE);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
@@ -14,6 +14,7 @@ extern void qemu_aio_init(void);
extern void qemu_aio_poll(void);

extern void *fd_start;
+extern int connected_disks;

int domid = 0;
FILE* logfile;
@@ -76,7 +77,7 @@ int main(void)
int max_fd;
fd_set rfds;
struct timeval tv;
- void *old_fd_start = NULL;
+ int old_connected_disks = 0;

/* Daemonize */
if (fork() != 0)
@@ -128,11 +129,17 @@ int main(void)
pioh = &ioh->next;
}

+ if (old_connected_disks != connected_disks)
+ fprintf(stderr, "connected disks: %d => %d\n",
+ old_connected_disks, connected_disks);
+
/* Exit when the last image has been closed */
- if (old_fd_start != NULL && fd_start == NULL)
+ if (old_connected_disks != 0 && connected_disks == 0) {
+ fprintf(stderr, "Last image is closed, exiting.\n");
exit(0);
+ }

- old_fd_start = fd_start;
+ old_connected_disks = connected_disks;
}
return 0;
}
++++++ tmp-initscript-modprobe.patch ++++++
Index: xen-4.2.1-testing/tools/hotplug/Linux/init.d/xencommons
===================================================================
--- xen-4.2.1-testing.orig/tools/hotplug/Linux/init.d/xencommons
+++ xen-4.2.1-testing/tools/hotplug/Linux/init.d/xencommons
@@ -54,21 +54,27 @@ do_start () {
local time=0
local timeout=30

- modprobe xen-evtchn 2>/dev/null
- modprobe xen-gntdev 2>/dev/null
- modprobe xen-gntalloc 2>/dev/null
- modprobe xen-blkback 2>/dev/null
- modprobe xen-netback 2>/dev/null
- modprobe xen-pciback 2>/dev/null
- modprobe evtchn 2>/dev/null
- modprobe gntdev 2>/dev/null
- modprobe netbk 2>/dev/null
- modprobe blkbk 2>/dev/null
- modprobe xen-scsibk 2>/dev/null
- modprobe usbbk 2>/dev/null
- modprobe pciback 2>/dev/null
- modprobe xen-acpi-processor 2>/dev/null
- modprobe blktap2 2>/dev/null || modprobe blktap 2>/dev/null
+ #modprobe xen-evtchn 2>/dev/null
+ #modprobe xen-gntdev 2>/dev/null
+ #modprobe xen-gntalloc 2>/dev/null
+ #modprobe xen-blkback 2>/dev/null
+ #modprobe xen-netback 2>/dev/null
+ #modprobe xen-pciback 2>/dev/null
+ modprobe evtchn 2>/dev/null || true
+ modprobe gntdev 2>/dev/null || true
+ modprobe gntalloc 2>/dev/null || true
+ modprobe netbk 2>/dev/null || true
+ modprobe blkbk 2>/dev/null || true
+ modprobe xen-scsibk 2>/dev/null || true
+ modprobe usbbk 2>/dev/null || true
+ modprobe pciback 2>/dev/null || true
+ modprobe xen-acpi-processor 2>/dev/null || true
+ modprobe blktap2 2>/dev/null || true
+ modprobe blktap 2>/dev/null || true
+ # xenblk (frontend module) is needed in dom0, allowing it to use vbds
+ modprobe xenblk 2>/dev/null || true
+ # support xl create pv guest with qcow/qcow2 disk image
+ modprobe nbd max_part=8 2>/dev/null || true
mkdir -p /var/run/xen

if ! `xenstore-read -s / >/dev/null 2>&1`
++++++ tmp_build.patch ++++++
Index: xen-4.2.0-testing/tools/xenstore/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/xenstore/Makefile
+++ xen-4.2.0-testing/tools/xenstore/Makefile
@@ -72,6 +72,7 @@ $(CLIENTS_DOMU): xenstore

xenstore: xenstore_client.o $(LIBXENSTORE)
$(CC) $(LDFLAGS) $< $(LDLIBS_libxenstore) $(SOCKET_LIBS) -o $@
$(APPEND_LDFLAGS)
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,--build-id=uuid $< -L. -lxenstore
$(SOCKET_LIBS) -o domu-$@

xenstore-control: xenstore_control.o $(LIBXENSTORE)
$(CC) $(LDFLAGS) $< $(LDLIBS_libxenstore) $(SOCKET_LIBS) -o $@
$(APPEND_LDFLAGS)
@@ -121,10 +122,11 @@ install: all
$(INSTALL_PROG) xenstore-control $(DESTDIR)$(BINDIR)
$(INSTALL_PROG) xenstore $(DESTDIR)$(BINDIR)
set -e ; for c in $(CLIENTS) ; do \
- ln -f $(DESTDIR)$(BINDIR)/xenstore $(DESTDIR)$(BINDIR)/$${c} ; \
+ ln -fs /usr/bin/xenstore $(DESTDIR)/usr/bin/$${c} ; \
done
+ $(INSTALL_PROG) domu-xenstore $(DESTDIR)/bin
for client in $(CLIENTS_DOMU); do \
- $(INSTALL_PROG) $$client $(DESTDIR)/bin/$${client/domu-}; \
+ ln -fs /bin/domu-xenstore $(DESTDIR)/bin/$${client/domu-}; \
done
$(INSTALL_DIR) $(DESTDIR)$(LIBDIR)
$(INSTALL_PROG) libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)
++++++ tools-watchdog-support.patch ++++++
Index: xen-4.2.1-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xm/create.py
+++ xen-4.2.1-testing/tools/python/xen/xm/create.py
@@ -535,6 +535,21 @@ gopts.var('usbdevice', val='NAME',
fn=set_value, default='',
use="Name of USB device to add?")

+gopts.var('watchdog', val='NAME',
+ fn=set_value, default='',
+ use="Watchdog device to use. May be ib700 or i6300esb")
+
+gopts.var('watchdog_action', val='reset|shutdown|poweroff|pause|none|dump',
+ fn=set_value, default="reset",
+ use="""Action when watchdog timer expires:
+ - reset: Default, forcefully reset the guest;
+ - shutdown: Gracefully shutdown the guest (not recommended);
+ - poweroff: Forcefully power off the guest;
+ - pause: Pause the guest;
+ - none: Do nothing;
+ - dump: Automatically dump the guest;
+ """)
+
gopts.var('description', val='NAME',
fn=set_value, default='',
use="Description of a domain")
@@ -1097,6 +1112,7 @@ def configure_hvm(config_image, vals):
'usb', 'usbdevice',
'vcpus', 'vnc', 'vncconsole', 'vncdisplay', 'vnclisten',
'vncunused', 'viridian', 'vpt_align',
+ 'watchdog', 'watchdog_action',
'xauthority', 'xen_extended_power_mgmt', 'xen_platform_pci',
'memory_sharing' ]

Index: xen-4.2.1-testing/tools/python/xen/xm/xenapi_create.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xm/xenapi_create.py
+++ xen-4.2.1-testing/tools/python/xen/xm/xenapi_create.py
@@ -1113,7 +1113,9 @@ class sxp2xml:
'xen_platform_pci',
'tsc_mode'
'description',
- 'nomigrate'
+ 'nomigrate',
+ 'watchdog',
+ 'watchdog_action'
]

platform_configs = []
Index: xen-4.2.1-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.2.1-testing/tools/python/xen/xend/image.py
@@ -855,7 +855,8 @@ class HVMImageHandler(ImageHandler):

dmargs = [ 'boot', 'fda', 'fdb', 'soundhw',
'localtime', 'serial', 'stdvga', 'isa',
- 'acpi', 'usb', 'usbdevice', 'gfx_passthru' ]
+ 'acpi', 'usb', 'usbdevice', 'gfx_passthru',
+ 'watchdog', 'watchdog_action' ]

for a in dmargs:
v = vmConfig['platform'].get(a)
@@ -863,6 +864,7 @@ class HVMImageHandler(ImageHandler):
# python doesn't allow '-' in variable names
if a == 'stdvga': a = 'std-vga'
if a == 'keymap': a = 'k'
+ if a == 'watchdog_action': a = 'watchdog-action'

# Handle booleans gracefully
if a in ['localtime', 'std-vga', 'isa', 'usb', 'acpi']:
Index: xen-4.2.1-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendConfig.py
@@ -192,6 +192,8 @@ XENAPI_PLATFORM_CFG_TYPES = {
'xen_platform_pci': int,
"gfx_passthru": int,
'oos' : int,
+ 'watchdog': str,
+ 'watchdog_action': str,
}

# Xen API console 'other_config' keys.
Index: xen-4.2.1-testing/tools/libxl/libxl_dm.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_dm.c
+++ xen-4.2.1-testing/tools/libxl/libxl_dm.c
@@ -196,6 +196,12 @@ static char ** libxl__build_device_model
"-usbdevice", b_info->u.hvm.usbdevice, NULL);
}
}
+ if (b_info->u.hvm.watchdog || b_info->u.hvm.watchdog_action) {
+ flexarray_append(dm_args, "-watchdog");
+ if (b_info->u.hvm.watchdog_action) {
+ flexarray_vappend(dm_args, "-watchdog-action",
b_info->u.hvm.watchdog_action, NULL);
+ }
+ }
if (b_info->u.hvm.soundhw) {
flexarray_vappend(dm_args, "-soundhw", b_info->u.hvm.soundhw,
NULL);
}
@@ -449,6 +455,12 @@ static char ** libxl__build_device_model
"-usbdevice", b_info->u.hvm.usbdevice, NULL);
}
}
+ if (b_info->u.hvm.watchdog || b_info->u.hvm.watchdog_action) {
+ flexarray_append(dm_args, "-watchdog");
+ if (b_info->u.hvm.watchdog_action) {
+ flexarray_vappend(dm_args, "-watchdog-action",
b_info->u.hvm.watchdog_action, NULL);
+ }
+ }
if (b_info->u.hvm.soundhw) {
flexarray_vappend(dm_args, "-soundhw", b_info->u.hvm.soundhw,
NULL);
}
Index: xen-4.2.1-testing/tools/libxl/libxl_types.idl
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/libxl_types.idl
+++ xen-4.2.1-testing/tools/libxl/libxl_types.idl
@@ -322,6 +322,8 @@ libxl_domain_build_info = Struct("domain
("usbdevice", string),
("soundhw", string),
("xen_platform_pci", libxl_defbool),
+ ("watchdog", string),
+ ("watchdog_action", string),
])),
("pv", Struct(None, [("kernel", string),
("slack_memkb", MemKB),
Index: xen-4.2.1-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.2.1-testing/tools/libxl/xl_cmdimpl.c
@@ -1417,6 +1417,8 @@ skip_vfb:
xlu_cfg_replace_string (config, "soundhw", &b_info->u.hvm.soundhw, 0);
xlu_cfg_get_defbool(config, "xen_platform_pci",
&b_info->u.hvm.xen_platform_pci, 0);
+ xlu_cfg_replace_string (config, "watchdog", &b_info->u.hvm.watchdog,
0);
+ xlu_cfg_replace_string (config, "watchdog_action",
&b_info->u.hvm.watchdog_action, 0);
}

xlu_cfg_destroy(config);
++++++ udev-rules.patch ++++++
Index: xen-4.2.0-testing/tools/hotplug/Linux/xen-backend.rules
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/xen-backend.rules
+++ xen-4.2.0-testing/tools/hotplug/Linux/xen-backend.rules
@@ -13,4 +13,5 @@ KERNEL=="blktap-control", NAME="xen/blkt
KERNEL=="gntdev", NAME="xen/%k", MODE="0600"
KERNEL=="pci_iomul", NAME="xen/%k", MODE="0600"
KERNEL=="tapdev[a-z]*", NAME="xen/blktap-2/tapdev%m", MODE="0600"
-SUBSYSTEM=="net", KERNEL=="vif*-emu", ACTION=="add", ENV{UDEV_CALL}="1",
RUN+="/etc/xen/scripts/vif-setup $env{ACTION} type_if=tap"
+SUBSYSTEM=="net", KERNEL=="vif*-emu", ACTION=="add", ENV{UDEV_CALL}="1",
TEST=="/proc/xen" RUN+="/etc/xen/scripts/vif-setup $env{ACTION} type_if=tap"
+KERNELS=="xen", KERNEL=="xvd*", SUBSYSTEM=="block", OPTIONS+="last_rule"
++++++ usb-list.patch ++++++
"usb-hc-create" does not check usb-ver parameter. It allows
2/2.0/2.0usb/2.0aaa. While low level
driver doing hc create, it gets an integer by vssanf %d from usb-ver string, so
there is no problem.
But 2/2.0/2.0usb/2.0aaa will be saved into VM config.

After that, while doing "usb-list", it cannot handle "2.0/2.0usb/2.0aaa" and
will cause error:
Idx BE state usb-ver BE-path
Error: Invalid argument.
Usage: xm usb-list <Domain>

This patch is to let "usb-list" handle all usb-ver cases as low level driver
does and won't cause error.

About this problem, I've submitted two patches to upstream before, but got no
response. Information
could be referred to:

http://www.gossamer-threads.com/lists/xen/devel/178406?search_string=usb-list;#178406

http://www.gossamer-threads.com/lists/xen/devel/181021?search_string=usb-list;#181021


Index: xen-4.2.0-testing/tools/python/xen/xm/main.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xm/main.py
+++ xen-4.2.0-testing/tools/python/xen/xm/main.py
@@ -2629,10 +2629,22 @@ def xm_usb_list(args):
ni = parse_dev_info(x[1])
ni['idx'] = int(x[0])
usbver = sxp.child_value(x[1], 'usb-ver')
+
+ substr = re.search("^\d{1,}", usbver)
+ if substr:
+ usbver = substr.group()
+ else:
+ print "Unknown usb-ver"
+ continue
+
if int(usbver) == 1:
ni['usb-ver'] = 'USB1.1'
- else:
+ elif int(usbver) == 2:
ni['usb-ver'] = 'USB2.0'
+ else:
+ print "Unknown usb-ver"
+ continue
+
print "%(idx)-3d %(backend-id)-3d %(state)-5d %(usb-ver)-7s
%(be-path)-30s " % ni

ports = sxp.child(x[1], 'port')
++++++ vif-bridge-no-iptables.patch ++++++
Index: xen-4.2.0-testing/tools/hotplug/Linux/vif-bridge
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/vif-bridge
+++ xen-4.2.0-testing/tools/hotplug/Linux/vif-bridge
@@ -101,9 +101,9 @@ case "$command" in
;;
esac

-if [ "$type_if" = vif ]; then
- handle_iptable
-fi
+#if [ "$type_if" = vif ]; then
+# handle_iptable
+#fi

call_hooks vif post

++++++ vif-bridge-tap-fix.patch ++++++
# HG changeset patch
# User Jim Fehlig <jfehlig@xxxxxxxx>
# Date 1319581952 21600
# Node ID 74da2a3a1db1476d627f42e4a99e9e720cc6774d
# Parent 6c583d35d76dda2236c81d9437ff9d57ab02c006
Prevent vif-bridge from adding user-created tap interfaces to a bridge

Exit vif-bridge script if there is no device info in xenstore, preventing
it from adding user-created taps to bridges.

Signed-off-by: Jim Fehlig <jfehlig@xxxxxxxx>

Index: xen-4.2.0-testing/tools/hotplug/Linux/vif-bridge
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/vif-bridge
+++ xen-4.2.0-testing/tools/hotplug/Linux/vif-bridge
@@ -32,6 +32,13 @@
dir=$(dirname "$0")
. "$dir/vif-common.sh"

+mac=$(xenstore_read_default "$XENBUS_PATH/mac" "")
+if [ -z "$mac" ]
+then
+ log debug "No device details in $XENBUS_PATH, exiting."
+ exit 0
+fi
+
bridge=${bridge:-}
bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge")

++++++ vif-route-ifup.patch ++++++
---
tools/examples/xend-config.sxp | 20 ++++++++++++++++++++
tools/hotplug/Linux/Makefile | 2 +-
tools/hotplug/Linux/vif-route-ifup | 34 ++++++++++++++++++++++++++++++++++
3 files changed, 55 insertions(+), 1 deletion(-)

Index: xen-4.2.0-testing/tools/examples/xend-config.sxp
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/xend-config.sxp
+++ xen-4.2.0-testing/tools/examples/xend-config.sxp
@@ -200,6 +200,26 @@
#(network-script network-route)
#(vif-script vif-route)

+# SuSE users note:
+# If using a routed network configuration it is advised to NOT use
+# network-route and vif-route scripts but instead use sysconfig scripts
+# in dom0 and vif-route-ifup script to "connect" the domU vif to dom0.
+# Since this configuration requires a vif sysconfig script in dom0, a static
+# vif name must be used. E.g. in dom0 the vif sysconfig script
+# (/etc/sysconfig/network/ifcfg-xen1.0) may contain
+#
+# NAME='XEN vm 1 virtual interface 0'
+# BOOTPROTO='static'
+# STARTMODE='hotplug'
+# ...
+#
+# The corresponding domain vif configuration would contain e.g.
+# vif=[ 'mac=00:16:3e:aa:bb:cc,script=vif-route-ifup,vifname=xen1.0', ]
+#
+# If the vif-route-ifup script will be used for all domains, it can be
+# set here as the default vif script, alleviating the need for
+# 'script=' in domain vif configuration.
+#(vif-script vif-route-ifup)

## Use the following if network traffic is routed with NAT, as an alternative
# to the settings for bridged networking given above.
Index: xen-4.2.0-testing/tools/hotplug/Linux/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/Makefile
+++ xen-4.2.0-testing/tools/hotplug/Linux/Makefile
@@ -11,7 +11,7 @@ XENCOMMONS_SYSCONFIG = init.d/sysconfig.

# Xen script dir and scripts to go there.
XEN_SCRIPTS = network-bridge vif-bridge
-XEN_SCRIPTS += network-route vif-route
+XEN_SCRIPTS += network-route vif-route vif-route-ifup
XEN_SCRIPTS += network-nat vif-nat
XEN_SCRIPTS += vif2
XEN_SCRIPTS += vif-setup
Index: xen-4.2.0-testing/tools/hotplug/Linux/vif-route-ifup
===================================================================
--- /dev/null
+++ xen-4.2.0-testing/tools/hotplug/Linux/vif-route-ifup
@@ -0,0 +1,34 @@
+#!/bin/bash
+#============================================================================
+# /etc/xen/vif-route-ifup
+#
+# Script for configuring a vif in routed mode.
+# The hotplugging system will call this script if it is specified either in
+# the device configuration given to Xend, or the default Xend configuration
+# in /etc/xen/xend-config.sxp. If the script is specified in neither of those
+# places, then vif-bridge is the default.
+#
+# Usage:
+# vif-route-ifup (add|remove|online|offline)
+#
+# Environment vars:
+# dev vif interface name (required).
+#============================================================================
+
+dir=$(dirname "$0")
+. "$dir/vif-common.sh"
+
+case "$command" in
+ online)
+ ifup ${dev}
+ ;;
+ offline)
+ do_without_error ifdown ${dev}
+ ;;
+esac
+
+log debug "Successful vif-route-ifup $command for ${dev}."
+if [ "$command" = "online" ]
+then
+ success
+fi
++++++ x86-cpufreq-report.patch ++++++
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -25,7 +25,7 @@
#include <xen/irq.h>
#include <asm/current.h>
#include <public/platform.h>
-#include <acpi/cpufreq/processor_perf.h>
+#include <acpi/cpufreq/cpufreq.h>
#include <asm/edd.h>
#include <asm/mtrr.h>
#include <asm/io_apic.h>
@@ -639,6 +639,41 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
}
break;

+ case XENPF_get_cpu_freq:
+ case XENPF_get_cpu_freq_min:
+ case XENPF_get_cpu_freq_max:
+ {
+ struct vcpu *v;
+ const struct cpufreq_policy *policy;
+
+ if ( op->u.get_cpu_freq.vcpu >= current->domain->max_vcpus ||
+ !(v = current->domain->vcpu[op->u.get_cpu_freq.vcpu]) )
+ {
+ ret = -EINVAL;
+ break;
+ }
+
+ policy = per_cpu(cpufreq_cpu_policy, v->processor);
+ switch ( op->cmd & -!!policy )
+ {
+ case XENPF_get_cpu_freq:
+ op->u.get_cpu_freq.freq = policy->cur;
+ break;
+ case XENPF_get_cpu_freq_min:
+ op->u.get_cpu_freq.freq = policy->min;
+ break;
+ case XENPF_get_cpu_freq_max:
+ op->u.get_cpu_freq.freq = policy->max;
+ break;
+ default:
+ op->u.get_cpu_freq.freq = 0;
+ break;
+ }
+ if ( copy_field_to_guest(u_xenpf_op, op, u.get_cpu_freq.freq) )
+ ret = -EFAULT;
+ }
+ break;
+
default:
ret = -ENOSYS;
break;
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -504,6 +504,16 @@ struct xenpf_core_parking {
typedef struct xenpf_core_parking xenpf_core_parking_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t);

+#define XENPF_get_cpu_freq ('N' << 24)
+#define XENPF_get_cpu_freq_min (XENPF_get_cpu_freq + 1)
+#define XENPF_get_cpu_freq_max (XENPF_get_cpu_freq_min + 1)
+struct xenpf_get_cpu_freq {
+ /* IN variables */
+ uint32_t vcpu;
+ /* OUT variables */
+ uint32_t freq; /* in kHz */
+};
+
/*
* ` enum neg_errnoval
* ` HYPERVISOR_platform_op(const struct xen_platform_op*);
@@ -530,6 +540,7 @@ struct xen_platform_op {
struct xenpf_cpu_hotadd cpu_add;
struct xenpf_mem_hotadd mem_add;
struct xenpf_core_parking core_parking;
+ struct xenpf_get_cpu_freq get_cpu_freq;
uint8_t pad[128];
} u;
};
++++++ x86-dom-print.patch ++++++
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -154,15 +154,30 @@ void dump_pageframe_info(struct domain *

printk("Memory pages belonging to domain %u:\n", d->domain_id);

- if ( d->tot_pages >= 10 )
+ if ( d->tot_pages >= 10 && d->is_dying < DOMDYING_dead )
{
printk(" DomPage list too long to display\n");
}
else
{
+ unsigned long total[PGT_type_mask
+ / (PGT_type_mask & -PGT_type_mask) + 1] = {};
+
spin_lock(&d->page_alloc_lock);
page_list_for_each ( page, &d->page_list )
{
+ unsigned int index = (page->u.inuse.type_info & PGT_type_mask)
+ / (PGT_type_mask & -PGT_type_mask);
+
+ if ( ++total[index] > 16 )
+ {
+ switch ( page->u.inuse.type_info & PGT_type_mask )
+ {
+ case PGT_none:
+ case PGT_writable_page:
+ continue;
+ }
+ }
printk(" DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
++++++ x86-extra-trap-info.patch ++++++
--- a/xen/arch/x86/x86_32/entry.S
+++ b/xen/arch/x86/x86_32/entry.S
@@ -410,8 +410,10 @@ UNLIKELY_END(bounce_vm86_3)
_ASM_EXTABLE(.Lft24, domain_crash_synchronous)
_ASM_EXTABLE(.Lft25, domain_crash_synchronous)

+.section .rodata, "a", @progbits
domain_crash_synchronous_string:
.asciz "domain_crash_sync called from entry.S (%lx)\n"
+.previous

domain_crash_synchronous:
pushl $domain_crash_synchronous_string
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -428,22 +428,35 @@ UNLIKELY_END(bounce_failsafe)
jz domain_crash_synchronous
movq %rax,UREGS_rip+8(%rsp)
ret
- _ASM_EXTABLE(.Lft2, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft3, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft4, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft5, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft6, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft7, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft8, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft9, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft10, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft11, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft12, domain_crash_synchronous)
- _ASM_EXTABLE(.Lft13, domain_crash_synchronous)
+ _ASM_EXTABLE(.Lft2, domain_crash_page_fault_32)
+ _ASM_EXTABLE(.Lft3, domain_crash_page_fault_24)
+ _ASM_EXTABLE(.Lft4, domain_crash_page_fault_8)
+ _ASM_EXTABLE(.Lft5, domain_crash_page_fault_16)
+ _ASM_EXTABLE(.Lft6, domain_crash_page_fault)
+ _ASM_EXTABLE(.Lft7, domain_crash_page_fault)
+ _ASM_EXTABLE(.Lft8, domain_crash_page_fault_24)
+ _ASM_EXTABLE(.Lft9, domain_crash_page_fault_16)
+ _ASM_EXTABLE(.Lft10, domain_crash_page_fault_8)
+ _ASM_EXTABLE(.Lft11, domain_crash_page_fault)
+ _ASM_EXTABLE(.Lft12, domain_crash_page_fault_8)
+ _ASM_EXTABLE(.Lft13, domain_crash_page_fault)

+.section .rodata, "a", @progbits
domain_crash_synchronous_string:
.asciz "domain_crash_sync called from entry.S\n"
+.previous

+domain_crash_page_fault_32:
+ addq $8,%rsi
+domain_crash_page_fault_24:
+ addq $8,%rsi
+domain_crash_page_fault_16:
+ addq $8,%rsi
+domain_crash_page_fault_8:
+ addq $8,%rsi
+domain_crash_page_fault:
+ movq %rsi,%rdi
+ call show_page_walk
ENTRY(domain_crash_synchronous)
# Get out of the guest-save area of the stack.
GET_CPUINFO_FIELD(CPUINFO_guest_cpu_user_regs,%rax)
++++++ x86-ioapic-ack-default.patch ++++++
Change default IO-APIC ack mode for single IO-APIC systems to old-style.

Index: xen-4.2.0-testing/xen/arch/x86/io_apic.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/io_apic.c
+++ xen-4.2.0-testing/xen/arch/x86/io_apic.c
@@ -2012,7 +2012,10 @@ void __init setup_IO_APIC(void)
io_apic_irqs = ~PIC_IRQS;

printk("ENABLING IO-APIC IRQs\n");
- printk(" -> Using %s ACK method\n", ioapic_ack_new ? "new" : "old");
+ if (!directed_eoi_enabled && !ioapic_ack_forced) {
+ ioapic_ack_new = (nr_ioapics > 1);
+ printk(" -> Using %s ACK method\n", ioapic_ack_new ? "new" : "old");
+ }

/*
* Set up IO-APIC IRQ routing.
++++++ xen-api-auth.patch ++++++
Index: xen-4.2.0-testing/tools/python/xen/xend/XendAuthSessions.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendAuthSessions.py
+++ xen-4.2.0-testing/tools/python/xen/xend/XendAuthSessions.py
@@ -84,7 +84,7 @@ class XendAuthSessions:
# if PAM doesn't exist, let's ignore it
return False

- pam_auth.start("login")
+ pam_auth.start("xen-api")
pam_auth.set_item(PAM.PAM_USER, username)

def _pam_conv(auth, query_list, user_data = None):
++++++ xen-changeset.diff ++++++
Index: xen-4.2.0-testing/xen/Makefile
===================================================================
--- xen-4.2.0-testing.orig/xen/Makefile
+++ xen-4.2.0-testing/xen/Makefile
@@ -1,3 +1,4 @@
+export XEN_CHANGESET = unavailable
# This is the correct place to edit the build version.
# All other places this is stored (eg. compile.h) should be autogenerated.
export XEN_VERSION = 4
@@ -112,7 +113,7 @@ delete-unfresh-files:
@rm -f $@1 $@2

# compile.h contains dynamic build info. Rebuilt on every 'make' invocation.
-include/xen/compile.h: include/xen/compile.h.in .banner
+include/xen/compile.h: include/xen/compile.h.in
@sed -e 's/@@date@@/$(shell LC_ALL=C date)/g' \
-e 's/@@time@@/$(shell LC_ALL=C date +%T)/g' \
-e 's/@@whoami@@/$(XEN_WHOAMI)/g' \
@@ -122,10 +123,9 @@ include/xen/compile.h: include/xen/compi
-e 's/@@version@@/$(XEN_VERSION)/g' \
-e 's/@@subversion@@/$(XEN_SUBVERSION)/g' \
-e 's/@@extraversion@@/$(XEN_EXTRAVERSION)/g' \
- -e 's!@@changeset@@!$(shell ((hg parents --template "{date|date}
{rev}:{node|short}" >/dev/null && hg parents --template "{date|date}
{rev}:{node|short}") || echo "unavailable") 2>/dev/null)!g' \
+ -e 's!@@changeset@@!$(XEN_CHANGESET)!g' \
< include/xen/compile.h.in > $@.new
- @grep \" .banner >> $@.new
- @grep -v \" .banner
+ tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) >> $@.new
@mv -f $@.new $@

include/asm-$(TARGET_ARCH)/asm-offsets.h: arch/$(TARGET_ARCH)/asm-offsets.s
++++++ xen-cpupool-xl-config-format.patch ++++++
Index: xen-4.2.0-testing/tools/python/xen/xm/cpupool.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xm/cpupool.py
+++ xen-4.2.0-testing/tools/python/xen/xm/cpupool.py
@@ -157,9 +157,17 @@ def make_cpus_config(cfg_cpus):
# ["0,2","1,3"] -> [[0,2],[1,3]]
# ["0-3,^1","1-4,^2"] -> [[0,2,3],[1,3,4]]
try:
- for c in cfg_cpus:
- cpus = cnv(c)
- cpus_list.append(cpus)
+ cpus_str = ""
+ list_len = len(cfg_cpus)
+ n = 0
+ while n < list_len:
+ if type(cfg_cpus[n]) != str:
+ raise SyntaxError('cpus = %s' % cfg_cpus)
+ cpus_str += cfg_cpus[n]
+ n += 1
+ if n < list_len:
+ cpus_str += ', '
+ cpus_list = cnv(cpus_str)
except ValueError, e:
raise err('cpus = %s: %s' % (cfg_cpus, e))
else:
++++++ xen-destdir.diff ++++++
Index: xen-4.2.0-testing/tools/xenstore/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/xenstore/Makefile
+++ xen-4.2.0-testing/tools/xenstore/Makefile
@@ -10,6 +10,7 @@ CFLAGS += $(CFLAGS_libxenctrl)

CLIENTS := xenstore-exists xenstore-list xenstore-read xenstore-rm
xenstore-chmod
CLIENTS += xenstore-write xenstore-ls xenstore-watch
+CLIENTS_DOMU := $(patsubst xenstore-%,domu-xenstore-%,$(CLIENTS))

XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o

@@ -38,7 +39,7 @@ endif
all: $(ALL_TARGETS)

.PHONY: clients
-clients: xenstore $(CLIENTS) xenstore-control
+clients: xenstore $(CLIENTS) $(CLIENTS_DOMU) xenstore-control

ifeq ($(CONFIG_SunOS),y)
xenstored_probes.h: xenstored_probes.d
@@ -66,6 +67,9 @@ xenstored.a: $(XENSTORED_OBJS)
$(CLIENTS): xenstore
ln -f xenstore $@

+$(CLIENTS_DOMU): xenstore
+ ln -f xenstore $@
+
xenstore: xenstore_client.o $(LIBXENSTORE)
$(CC) $(LDFLAGS) $< $(LDLIBS_libxenstore) $(SOCKET_LIBS) -o $@
$(APPEND_LDFLAGS)

@@ -93,7 +97,7 @@ clean:
rm -f *.a *.o *.opic *.so* xenstored_probes.h
rm -f xenstored xs_random xs_stress xs_crashme
rm -f xs_tdb_dump xenstore-control init-xenstore-domain
- rm -f xenstore $(CLIENTS)
+ rm -f xenstore $(CLIENTS) $(CLIENTS_DOMU)
$(RM) $(DEPS)

.PHONY: TAGS
@@ -110,6 +114,7 @@ install: all
$(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)
$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xenstore-compat
+ $(INSTALL_DIR) $(DESTDIR)/bin
$(INSTALL_DIR) $(DESTDIR)/var/run/xenstored
$(INSTALL_DIR) $(DESTDIR)/var/lib/xenstored
$(INSTALL_PROG) xenstored $(DESTDIR)$(SBINDIR)
@@ -118,6 +123,9 @@ install: all
set -e ; for c in $(CLIENTS) ; do \
ln -f $(DESTDIR)$(BINDIR)/xenstore $(DESTDIR)$(BINDIR)/$${c} ; \
done
+ for client in $(CLIENTS_DOMU); do \
+ $(INSTALL_PROG) $$client $(DESTDIR)/bin/$${client/domu-}; \
+ done
$(INSTALL_DIR) $(DESTDIR)$(LIBDIR)
$(INSTALL_PROG) libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)
ln -sf libxenstore.so.$(MAJOR).$(MINOR)
$(DESTDIR)$(LIBDIR)/libxenstore.so.$(MAJOR)
Index: xen-4.2.0-testing/tools/hotplug/Linux/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/Makefile
+++ xen-4.2.0-testing/tools/hotplug/Linux/Makefile
@@ -43,12 +43,12 @@ install: all install-initd install-scrip
.PHONY: install-initd
install-initd:
[ -d $(DESTDIR)$(INITD_DIR) ] || $(INSTALL_DIR) $(DESTDIR)$(INITD_DIR)
- [ -d $(DESTDIR)$(SYSCONFIG_DIR) ] || $(INSTALL_DIR)
$(DESTDIR)$(SYSCONFIG_DIR)
+ [ -d $(DESTDIR)/var/adm/fillup-templates ] || $(INSTALL_DIR)
$(DESTDIR)/var/adm/fillup-templates/
$(INSTALL_PROG) $(XEND_INITD) $(DESTDIR)$(INITD_DIR)
$(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)$(INITD_DIR)
- $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG)
$(DESTDIR)$(SYSCONFIG_DIR)/xendomains
+ $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG)
$(DESTDIR)/var/adm/fillup-templates/
$(INSTALL_PROG) $(XENCOMMONS_INITD) $(DESTDIR)$(INITD_DIR)
- $(INSTALL_DATA) $(XENCOMMONS_SYSCONFIG)
$(DESTDIR)$(SYSCONFIG_DIR)/xencommons
+ $(INSTALL_DATA) $(XENCOMMONS_SYSCONFIG)
$(DESTDIR)/var/adm/fillup-templates/
$(INSTALL_PROG) init.d/xen-watchdog $(DESTDIR)$(INITD_DIR)

.PHONY: install-scripts
Index: xen-4.2.0-testing/tools/firmware/etherboot/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/firmware/etherboot/Makefile
+++ xen-4.2.0-testing/tools/firmware/etherboot/Makefile
@@ -28,12 +28,12 @@ all: $(ROMS)
$(MAKE) -C $D/src bin/$(*F).rom

$T:
- if ! wget -O _$T $(IPXE_TARBALL_URL); then \
- $(GIT) clone $(IPXE_GIT_URL) $D.git; \
- (cd $D.git && $(GIT) archive --format=tar --prefix=$D/ \
- $(IPXE_GIT_TAG) | gzip >../_$T); \
- rm -rf $D.git; \
- fi
+ #if ! wget -O _$T $(IPXE_TARBALL_URL); then \
+ # $(GIT) clone $(IPXE_GIT_URL) $D.git; \
+ # (cd $D.git && $(GIT) archive --format=tar --prefix=$D/ \
+ # $(IPXE_GIT_TAG) | gzip >../_$T); \
+ # rm -rf $D.git; \
+ #fi
mv _$T $T

$D/src/arch/i386/Makefile: $T Config
++++++ xen-disable-qemu-monitor.diff ++++++
CVE-2007-0998 - remote compromise of dom0

Rather than completely disabling QEMU's console (which would remove
the "sendkey" command, among other useful things), remove all console
commands that can read/write dom0's state.


Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/monitor.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/monitor.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/monitor.c
@@ -1497,6 +1497,7 @@ static const term_cmd_t term_cmds[] = {
"device|all", "commit changes to the disk images (if -snapshot is used)
or backing files" },
{ "info", "s?", do_info,
"subcommand", "show various information about the system state" },
+#ifdef CONFIG_TRUSTED_CLIENT
{ "q|quit", "", do_quit,
"", "quit the emulator" },
{ "eject", "-fB", do_eject,
@@ -1509,6 +1510,7 @@ static const term_cmd_t term_cmds[] = {
"filename", "output logs to 'filename'" },
{ "log", "s", do_log,
"item1[,...]", "activate logging of the specified items to
'/tmp/qemu.log'" },
+#endif
{ "savevm", "s?", do_savevm,
"tag|id", "save a VM snapshot. If no tag or id are provided, a new
snapshot is created" },
{ "loadvm", "s", do_loadvm,
@@ -1538,8 +1540,10 @@ static const term_cmd_t term_cmds[] = {
"", "reset the system" },
{ "system_powerdown", "", do_system_powerdown,
"", "send system power down event" },
+#ifdef CONFIG_TRUSTED_CLIENT
{ "sum", "ii", do_sum,
"addr size", "compute the checksum of a memory region" },
+#endif
{ "usb_add", "s", do_usb_add,
"device", "add USB device (e.g. 'host:bus.addr' or
'host:vendor_id:product_id')" },
{ "usb_del", "s", do_usb_del,
@@ -1558,6 +1562,7 @@ static const term_cmd_t term_cmds[] = {
"state", "change mouse button state (1=L, 2=M, 4=R)" },
{ "mouse_set", "i", do_mouse_set,
"index", "set which mouse device receives events" },
+#ifdef CONFIG_TRUSTED_CLIENT
#ifdef HAS_AUDIO
{ "wavcapture", "si?i?i?", do_wav_capture,
"path [frequency bits channels]",
@@ -1565,6 +1570,7 @@ static const term_cmd_t term_cmds[] = {
#endif
{ "stopcapture", "i", do_stop_capture,
"capture index", "stop capture" },
+#endif
{ "memsave", "lis", do_memory_save,
"addr size file", "save to disk virtual memory dump starting at 'addr'
of size 'size'", },
{ "pmemsave", "lis", do_physical_memory_save,
@@ -1646,6 +1652,7 @@ static const term_cmd_t info_cmds[] = {
"", "show KVM information", },
{ "usb", "", usb_info,
"", "show guest USB devices", },
+#ifdef CONFIG_TRUSTED_CLIENT
{ "usbhost", "", usb_host_info,
"", "show host USB devices", },
{ "profile", "", do_info_profile,
@@ -1677,6 +1684,7 @@ static const term_cmd_t info_cmds[] = {
{ "migrate", "", do_info_migrate, "", "show migration status" },
{ "balloon", "", do_info_balloon,
"", "show balloon information" },
+#endif
{ NULL, NULL, },
};

++++++ xen-domUloader.diff ++++++
Index: xen-4.2.1-testing/tools/python/xen/xend/server/DevController.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/server/DevController.py
+++ xen-4.2.1-testing/tools/python/xen/xend/server/DevController.py
@@ -592,6 +592,31 @@ class DevController:
return (Missing, None)


+ def waitForFrontend(self, devid):
+ def frontendStatusCallback(statusPath, ev, result):
+ status = xstransact.Read(statusPath)
+ log.debug("frontendStatusCallback %s = %s" % (statusPath, status))
+ try:
+ status = int(status)
+ if status == xenbusState['Connected']:
+ result['status'] = Connected
+ elif status == xenbusState['Closed']:
+ result['status'] = Error
+ else:
+ raise
+ except:
+ return 1
+ ev.set()
+ return 0
+ frontpath = self.frontendPath(devid)
+ statusPath = frontpath + '/state'
+ ev = Event()
+ result = { 'status': Timeout }
+ xswatch(statusPath, frontendStatusCallback, ev, result)
+ ev.wait(5)
+ return result['status']
+
+
def backendPath(self, backdom, devid):
"""Construct backend path given the backend domain and device id.

Index: xen-4.2.1-testing/tools/python/xen/xend/XendBootloader.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendBootloader.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendBootloader.py
@@ -12,7 +12,7 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#

-import os, select, errno, stat, signal, tty
+import os, select, errno, stat, signal, tty, time
import random
import shlex
from xen.xend import sxp
@@ -38,8 +38,25 @@ def bootloader(blexec, disk, dom, quiet
msg = "Bootloader isn't executable"
log.error(msg)
raise VmError(msg)
- if not os.access(disk, os.R_OK):
- msg = "Disk isn't accessible"
+
+ # domUloader requires '--entry=foo' in blargs, which is derived from
+ # 'bootargs' entry in domain configuration file. Ensure it exists
+ # here so a reasonable error message can be returned.
+ if blexec.find('domUloader.py') != -1:
+ if blargs.find('entry') == -1:
+ msg = "domUloader requires specification of bootargs"
+ log.error(msg)
+ raise VmError(msg)
+
+ avail = False
+ for i in xrange(1, 500):
+ avail = os.access(disk, os.R_OK)
+ if avail:
+ break
+ time.sleep(.1)
+
+ if not avail:
+ msg = "Disk '%s' isn't accessible" % disk
log.error(msg)
raise VmError(msg)

Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2347,6 +2347,10 @@ class XendDomainInfo:
deviceClass, config = self.info['devices'].get(dev_uuid)
self._waitForDevice(deviceClass, config['devid'])

+ def _waitForDeviceFrontUUID(self, dev_uuid):
+ deviceClass, config = self.info['devices'].get(dev_uuid)
+ self.getDeviceController(deviceClass).waitForFrontend(config['devid'])
+
def _waitForDevice_destroy(self, deviceClass, devid, backpath):
return self.getDeviceController(deviceClass).waitForDevice_destroy(
devid, backpath)
@@ -3297,7 +3301,8 @@ class XendDomainInfo:
from xen.xend import XendDomain
dom0 = XendDomain.instance().privilegedDomain()
mounted_vbd_uuid = dom0.create_vbd(vbd, disk);
- dom0._waitForDeviceUUID(mounted_vbd_uuid)
+ vbd_uuid = dom0.create_vbd(vbd, disk)
+ dom0._waitForDeviceFrontUUID(vbd_uuid)
fn = BOOTLOADER_LOOPBACK_DEVICE

try:
@@ -3307,10 +3312,10 @@ class XendDomainInfo:
if mounted:
log.info("Unmounting %s from %s." %
(fn, BOOTLOADER_LOOPBACK_DEVICE))
- _, vbd_info = dom0.info['devices'][mounted_vbd_uuid]
-
dom0.destroyDevice(dom0.getBlockDeviceClass(vbd_info['devid']),
- BOOTLOADER_LOOPBACK_DEVICE, force =
True)
-
+ if devtype in ['tap', 'tap2']:
+ dom0.destroyDevice('tap', BOOTLOADER_LOOPBACK_DEVICE,
rm_cfg = True)
+ else:
+ dom0.destroyDevice('vbd', BOOTLOADER_LOOPBACK_DEVICE,
rm_cfg = True)
if blcfg is None:
msg = "Had a bootloader specified, but can't find disk"
log.error(msg)
++++++ xen-fixme-doc.diff ++++++
Index: xen-4.2.0-testing/docs/man/xmdomain.cfg.pod.5
===================================================================
--- xen-4.2.0-testing.orig/docs/man/xmdomain.cfg.pod.5
+++ xen-4.2.0-testing/docs/man/xmdomain.cfg.pod.5
@@ -333,16 +333,10 @@ at hda1, which is the root filesystem.

=item I<NFS Root>

-FIXME: write me
-
=item I<LVM Root>

-FIXME: write me
-
=item I<Two Networks>

-FIXME: write me
-
=back

=head1 SEE ALSO
Index: xen-4.2.0-testing/docs/man/xm.pod.1
===================================================================
--- xen-4.2.0-testing.orig/docs/man/xm.pod.1
+++ xen-4.2.0-testing/docs/man/xm.pod.1
@@ -299,7 +299,8 @@ scheduling by the Xen hypervisor.

=item B<s - shutdown>

-FIXME: Why would you ever see this state?
+The guest has requested to be shutdown, rebooted or suspended, and the
+domain is in the process of being destroyed in response.

=item B<c - crashed>

@@ -312,8 +313,6 @@ restart on crash. See L<xmdomain.cfg> f
The domain is in process of dying, but hasn't completely shutdown or
crashed.

-FIXME: Is this right?
-
=back

B<NOTES>
@@ -737,8 +736,6 @@ Xen ships with a number of domain schedu
time with the B<sched=> parameter on the Xen command line. By
default B<credit> is used for scheduling.

-FIXME: we really need a scheduler expert to write up this section.
-
=over 4

=item B<sched-credit> [ B<-d> I<domain-id> [ B<-w>[B<=>I<WEIGHT>] |
B<-c>[B<=>I<CAP>] ] ]
@@ -788,8 +785,6 @@ The normal EDF scheduling usage in nanos

The normal EDF scheduling usage in nanoseconds

-FIXME: these are lame, should explain more.
-
=item I<latency-hint>

Scaled period if domain is doing heavy I/O.
@@ -939,9 +934,6 @@ the default setting in xend-config.sxp f

Passes the specified IP Address to the adapter on creation.

-FIXME: this currently appears to be B<broken>. I'm not sure under what
-circumstances this should actually work.
-
=item B<mac=>I<macaddr>

The MAC address that the domain will see on its Ethernet device. If
@@ -967,9 +959,6 @@ Removes the network device from the doma
I<devid> is the virtual interface device number within the domain
(i.e. the 3 in vif22.3).

-FIXME: this is currently B<broken>. Network devices aren't completely
-removed from domain 0.
-
=item B<network-list> [B<-l>|B<--long>]> I<domain-id>

List virtual network interfaces for a domain. The returned output is
++++++ xen-glibc217.patch ++++++
Index: xen-4.2.0-testing/tools/debugger/gdbsx/xg/xg_main.c
===================================================================
--- xen-4.2.0-testing.orig/tools/debugger/gdbsx/xg/xg_main.c
+++ xen-4.2.0-testing/tools/debugger/gdbsx/xg/xg_main.c
@@ -34,6 +34,7 @@
* XGTRC(): generic trace utility
*/

+#include <sys/types.h>
#include <stdio.h>
#include <stddef.h>
#include <stdarg.h>
++++++ xen-hvm-default-bridge.diff ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/net.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/net.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/net.h
@@ -107,8 +107,8 @@ void net_host_device_add(const char *dev
void net_host_device_remove(int vlan_id, const char *device);

#ifndef DEFAULT_NETWORK_SCRIPT
-#define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
-#define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/qemu-ifdown"
+#define DEFAULT_NETWORK_SCRIPT "/etc/xen/qemu-ifup"
+#define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/xen/qemu-ifdown"
#endif
#ifdef __sun__
#define SMBD_COMMAND "/usr/sfw/sbin/smbd"
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/net.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/net.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/net.c
@@ -1759,9 +1759,10 @@ int net_client_init(const char *device,
}
if (get_param_value(script_arg, sizeof(script_arg), "scriptarg",
p) == 0 &&
get_param_value(script_arg, sizeof(script_arg), "bridge", p)
== 0) { /* deprecated; for xend compatibility */
- pstrcpy(script_arg, sizeof(script_arg), "");
+ ret = net_tap_init(vlan, device, name, ifname, setup_script,
NULL, NULL);
+ } else {
+ ret = net_tap_init(vlan, device, name, ifname, setup_script,
down_script, script_arg);
}
- ret = net_tap_init(vlan, device, name, ifname, setup_script,
down_script, script_arg);
}
} else
#endif
Index: xen-4.2.0-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.2.0-testing/tools/python/xen/xend/image.py
@@ -912,11 +912,13 @@ class HVMImageHandler(ImageHandler):
mac = devinfo.get('mac')
if mac is None:
raise VmError("MAC address not specified or generated.")
- bridge = devinfo.get('bridge', 'xenbr0')
+ bridge = devinfo.get('bridge', None)
model = devinfo.get('model', 'rtl8139')
ret.append("-net")
- ret.append("nic,vlan=%d,macaddr=%s,model=%s" %
- (nics, mac, model))
+ net = "nic,vlan=%d,macaddr=%s,model=%s" % (nics, mac, model)
+ if bridge:
+ net += ",bridge=%s" % bridge
+ ret.append(net)
vifname = "vif%d.%d-emu" % (self.vm.getDomid(), nics-1)
ret.append("-net")
if osdep.tapif_script is not None:
Index:
xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
===================================================================
---
xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
+++
xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
@@ -1,36 +1,22 @@
#!/bin/sh

-#. /etc/rc.d/init.d/functions
-#ulimit -c unlimited
-
echo 'config qemu network with xen bridge for ' $*

+# If bridge is not specified, try device with default route.
bridge=$2
+if [ -z "$bridge" ]; then
+ bridge=$(ip route list | awk '/^default / { print $NF }')
+fi

-#
-# Old style bridge setup with netloop, used to have a bridge name
-# of xenbrX, enslaving pethX and vif0.X, and then configuring
-# eth0.
-#
-# New style bridge setup does not use netloop, so the bridge name
-# is ethX and the physical device is enslaved pethX
-#
-# So if...
-#
-# - User asks for xenbrX
-# - AND xenbrX doesn't exist
-# - AND there is a ethX device which is a bridge
-#
-# ..then we translate xenbrX to ethX
-#
-# This lets old config files work without modification
-#
-if [ ! -e "/sys/class/net/$bridge" ] && [ -z "${bridge##xenbr*}" ]
+# Exit if $bridge is not a bridge. Exit with 0 status
+# so qemu-dm process is not terminated. No networking in
+# vm is bad but not catastrophic. The vm could still run
+# cpu and disk IO workloads.
+# Include an useful error message in qemu-dm log file.
+if [ ! -e "/sys/class/net/${bridge}/bridge" ]
then
- if [ -e "/sys/class/net/eth${bridge#xenbr}/bridge" ]
- then
- bridge="eth${bridge#xenbr}"
- fi
+ echo "WARNING! ${bridge} is not a bridge. qemu-ifup exiting. VM may not
have a functioning networking stack."
+ exit 0
fi

ifconfig $1 0.0.0.0 up
++++++ xen-hvm-default-pae.diff ++++++
PAE must be on for 64-on-64 to work at all.

Index: xen-4.2.0-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.2.0-testing/tools/python/xen/xend/image.py
@@ -1038,7 +1038,7 @@ class X86_HVM_ImageHandler(HVMImageHandl

def configure(self, vmConfig):
HVMImageHandler.configure(self, vmConfig)
- self.pae = int(vmConfig['platform'].get('pae', 0))
+ self.pae = int(vmConfig['platform'].get('pae', 1))
self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024

def buildDomain(self):
++++++ xen-ioemu-hvm-pv-support.diff ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
===================================================================
---
xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
@@ -30,6 +30,8 @@
#include "qemu-xen.h"
#include "net.h"
#include "xen_platform.h"
+#include "sysemu.h"
+#include <xc_private.h>

#include <assert.h>
#include <xenguest.h>
@@ -335,11 +337,51 @@ static void xen_platform_ioport_writeb(v
}
}

+static uint32_t ioport_base;
+
+static void platform_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ DECLARE_DOMCTL;
+ int rc;
+
+ if (val == 0)
+ qemu_invalidate_map_cache();
+
+ switch (addr - ioport_base) {
+ case 0:
+ fprintf(logfile, "Init hypercall page %x, addr %x.\n", val, addr);
+ domctl.domain = (domid_t)domid;
+ domctl.u.hypercall_init.gmfn = val;
+ domctl.cmd = XEN_DOMCTL_hypercall_init;
+ rc = xc_domctl(xc_handle, &domctl);
+ fprintf(logfile, "result -> %d.\n", rc);
+ break;
+ case 4:
+ fprintf(logfile, "Disconnect IDE hard disk...\n");
+ ide_unplug_harddisks();
+ fprintf(logfile, "Disconnect netifs...\n");
+ pci_unplug_netifs();
+ fprintf(logfile, "Shutdown taps...\n");
+ net_tap_shutdown_all();
+ fprintf(logfile, "Done.\n");
+ break;
+ default:
+ fprintf(logfile, "Write to bad port %x (base %x) on evtchn device.\n",
+ addr, ioport_base);
+ break;
+ }
+}
+
static void platform_ioport_map(PCIDevice *pci_dev, int region_num, uint32_t
addr, uint32_t size, int type)
{
+ ioport_base = addr;
+
+ register_ioport_write(addr, 16, 4, platform_ioport_write, NULL);
+/*
PCIXenPlatformState *d = (PCIXenPlatformState *)pci_dev;
register_ioport_write(addr, size, 1, xen_platform_ioport_writeb, d);
register_ioport_read(addr, size, 1, xen_platform_ioport_readb, d);
+*/
}

static uint32_t platform_mmio_read(void *opaque, target_phys_addr_t addr)
++++++ xen-managed-pci-device.patch ++++++
pci passthrough: handle managed pci devices

Handle managed pci devices for libvirt usage. If a pci device is set
"managed=1", it will be made assignable (unbound from original driver and bind
to pcistub driver) before vm start and reattach to original driver after vm
shut off.

FATE#313570

Note: This patch was rejected upstream since xend is deprecated. See the
following thread for details

http://lists.xen.org/archives/html/xen-devel/2013-01/msg01145.html

Signed-off-by: Chunyan Liu <cyliu@xxxxxxxx>

Index: xen-4.2.1-testing/tools/python/xen/util/pci.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/util/pci.py
+++ xen-4.2.1-testing/tools/python/xen/util/pci.py
@@ -20,6 +20,8 @@ from xen.xend import sxp
from xen.xend.XendConstants import AUTO_PHP_SLOT
from xen.xend.XendSXPDev import dev_dict_to_sxp
from xen.xend.XendLogging import log
+from xen.xend.xenstore.xstransact import xstransact
+from xen.xend.XendError import XendError

# for 2.3 compatibility
try:
@@ -27,9 +29,11 @@ try:
except NameError:
from sets import Set as set

+XS_PCIBACK_PATH = '/xm/pciback'
PROC_PCI_PATH = '/proc/bus/pci/devices'
PROC_PCI_NUM_RESOURCES = 7

+SYSFS_PCI_DRVS_PATH = 'bus/pci/drivers'
SYSFS_PCI_DEVS_PATH = '/bus/pci/devices'
SYSFS_PCI_DEV_RESOURCE_PATH = '/resource'
SYSFS_PCI_DEV_CONFIG_PATH = '/config'
@@ -161,7 +165,7 @@ def PCI_BDF(domain, bus, slot, func):

def check_pci_opts(opts):
def f((k, v)):
- if k not in ['msitranslate', 'power_mgmt'] or \
+ if k not in ['msitranslate', 'power_mgmt', 'managed'] or \
not v.lower() in ['0', '1', 'yes', 'no']:
raise PciDeviceParseError('Invalid pci option %s=%s: ' % (k, v))

@@ -427,6 +431,9 @@ def __pci_dict_to_fmt_str(fmt, dev):
def pci_dict_to_bdf_str(dev):
return __pci_dict_to_fmt_str('%04x:%02x:%02x.%01x', dev)

+def pci_dict_to_xs_bdf_str(dev):
+ return __pci_dict_to_fmt_str('%04x-%02x-%02x-%01x', dev)
+
def pci_dict_to_xc_str(dev):
return __pci_dict_to_fmt_str('0x%x, 0x%x, 0x%x, 0x%x', dev)

@@ -560,6 +567,115 @@ def find_all_assignable_devices():
dev_list = dev_list + [dev]
return dev_list

+def pci_assignable_add(dev):
+ '''detach pci device from driver that we need to unbind from and rebind
+ to pciback driver, then it can be assigned to guest.
+ '''
+ sysfs_mnt = find_sysfs_mnt()
+ pcidev_path = sysfs_mnt + SYSFS_PCI_DEVS_PATH
+ pciback_path = sysfs_mnt + SYSFS_PCIBACK_PATH
+
+ # See if the device exists
+ pci_bdf = pci_dict_to_bdf_str(dev)
+ path = pcidev_path + '/' + pci_bdf
+ if not os.path.exists(path):
+ log.debug("Pci device %s doesn't exist" % pci_bdf)
+ return -1
+
+ # Check to see if it's already assigned to pciback
+ path = pciback_path + '/' + pci_bdf
+ if os.path.exists(path):
+ log.debug("Pci device %s is already assigned to pciback" % pci_bdf)
+ return 0
+
+ # Check to see if there's already a driver that we need to unbind from
+ path = pcidev_path + '/' + pci_bdf + '/driver'
+ drv_path = None
+ if os.path.exists(path):
+ drv_path = os.path.realpath(path).replace(" ", "\ ")
+ cmd = 'echo %s > %s/unbind' % (pci_bdf, drv_path)
+ if os.system(cmd):
+ log.debug("Couldn't unbind device")
+ return -1;
+
+ # Store driver_path for rebinding to dom0
+ if drv_path is not None:
+ xs_pci_bdf = pci_dict_to_xs_bdf_str(dev)
+ path = XS_PCIBACK_PATH + '/' + xs_pci_bdf
+ xstransact.Mkdir(path)
+ xstransact.Write(path, 'driver_path', drv_path)
+ else:
+ log.debug("Not bound to a driver, will not be rebound")
+
+ # Bind to pciback
+ try:
+ # Scan through /sys/.../pciback/slots looking for pcidev's BDF
+ slots = os.popen('cat %s/slots' % pciback_path).read()
+ if re.search(pci_bdf, slots) is None:
+ # write bdf to new_slot
+ cmd = 'echo %s > %s/new_slot' % (pci_bdf, pciback_path)
+ if os.system(cmd):
+ raise XendError("Couldn't add device to pciback new_slot")
+
+ # Bind to pciback
+ cmd = 'echo %s > %s/bind' % (pci_bdf, pciback_path)
+ if os.system(cmd):
+ raise XendError("Couldn't bind device to pciback")
+ except XendError:
+ # rebind to original driver
+ if drv_path is not None:
+ log.debug("Rebind to original driver")
+ cmd = 'echo %s > %s/bind' % (pci_bdf, drv_path)
+ if os.system(cmd):
+ log.debug("Failed to rebind")
+ return -1
+
+ return 0
+
+def pci_assignable_remove(dev):
+ '''unbind pci device from pciback, and rebind to host pci driver where it
+ was detached from in pci-assignable-add.
+ '''
+ sysfs_mnt = find_sysfs_mnt()
+ pcidrv_path = sysfs_mnt + SYSFS_PCI_DRVS_PATH
+ pciback_path = sysfs_mnt + SYSFS_PCIBACK_PATH
+ pci_bdf = pci_dict_to_bdf_str(dev)
+
+ # Unbind from pciback
+ path = pciback_path + '/' + pci_bdf
+ if os.path.exists(path):
+ # unbind
+ cmd = 'echo %s > %s/unbind' % (pci_bdf, pciback_path)
+ if os.system(cmd):
+ log.debug("Couldn't unbind device to pciback")
+ return -1
+
+ # remove slots if necessary
+ slots = os.popen('cat %s/slots' % pciback_path).read()
+ if re.search(pci_bdf, slots):
+ # write bdf to remove_slot
+ cmd = 'echo %s > %s/remove_slot' % (pci_bdf, pciback_path)
+ if os.system(cmd):
+ log.debug("Couldn't remove pciback slot")
+ return -1
+ else:
+ log.debug("Not bound to pciback")
+
+ # Rebind if necessary
+ xs_pci_bdf = pci_dict_to_xs_bdf_str(dev)
+ path = XS_PCIBACK_PATH + '/' + xs_pci_bdf
+ drv_path = xstransact.Read(path, 'driver_path')
+ if drv_path:
+ cmd = 'echo %s > %s/bind' % (pci_bdf, drv_path)
+ if os.system(cmd):
+ log.debug("Couldn't rebind to driver %s" % drv_path)
+ return -1
+ xstransact.Remove(path)
+ else:
+ log.debug("Counldn't find path for original driver. Not rebinding")
+
+ return 0
+
def transform_list(target, src):
''' src: its element is pci string (Format: xxxx:xx:xx.x).
target: its element is pci string, or a list of pci string.
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -303,7 +303,8 @@ def dom_get(dom):
return None

from xen.xend.server.pciif import parse_pci_name, PciDevice,\
- get_assigned_pci_devices, get_all_assigned_pci_devices
+ get_assigned_pci_devices, get_all_assigned_pci_devices,\
+ prepare_host_pci_devices, reattach_host_pci_devices


def do_FLR(domid, is_hvm):
@@ -317,6 +318,20 @@ def do_FLR(domid, is_hvm):
"parse it's resources - "+str(e))
dev.do_FLR(is_hvm, xoptions.get_pci_dev_assign_strict_check())

+def prepare_domain_pci_devices(domconfig):
+ ordered_refs = domconfig.ordered_device_refs()
+ for dev_uuid in ordered_refs:
+ devclass, devconfig = domconfig['devices'][dev_uuid]
+ if devclass == 'pci':
+ prepare_host_pci_devices(devconfig)
+
+def reattach_domain_pci_devices(domconfig):
+ ordered_refs = domconfig.ordered_device_refs()
+ for dev_uuid in ordered_refs:
+ devclass, devconfig = domconfig['devices'][dev_uuid]
+ if devclass == 'pci':
+ reattach_host_pci_devices(devconfig)
+
class XendDomainInfo:
"""An object represents a domain.

@@ -470,6 +485,7 @@ class XendDomainInfo:

if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED,
XEN_API_VM_POWER_STATE_SUSPENDED, XEN_API_VM_POWER_STATE_CRASHED):
try:
+ prepare_domain_pci_devices(self.info);
XendTask.log_progress(0, 30, self._constructDomain)
XendTask.log_progress(31, 60, self._initDomain)

@@ -496,6 +512,7 @@ class XendDomainInfo:
state = self._stateGet()
if state in (DOM_STATE_SUSPENDED, DOM_STATE_HALTED):
try:
+ prepare_domain_pci_devices(self.info)
self._constructDomain()

try:
@@ -712,6 +729,8 @@ class XendDomainInfo:
the device.
"""

+ if self.domid is None:
+ return
self.iommu_check_pod_mode()

# Test whether the devices can be assigned
@@ -851,6 +870,9 @@ class XendDomainInfo:

if self.domid is not None:
try:
+ if dev_type == 'pci':
+ prepare_host_pci_devices(dev_config_dict)
+
dev_config_dict['devid'] = devid = \
self._createDevice(dev_type, dev_config_dict)
if dev_type == 'tap2':
@@ -864,6 +886,7 @@ class XendDomainInfo:
if dev_type == 'pci':
for dev in dev_config_dict['devs']:
XendAPIStore.deregister(dev['uuid'], 'DPCI')
+ reattach_host_pci_devices(dev_config_dict)
elif dev_type == 'vscsi':
for dev in dev_config_dict['devs']:
XendAPIStore.deregister(dev['uuid'], 'DSCSI')
@@ -908,6 +931,9 @@ class XendDomainInfo:
dev_config = pci_convert_sxp_to_dict(dev_sxp)
dev = dev_config['devs'][0]

+ if self.domid is not None and pci_state == 'Initialising':
+ prepare_host_pci_devices(dev_config)
+
stubdomid = self.getStubdomDomid()
# Do HVM specific processing
if self.info.is_hvm():
@@ -984,6 +1010,9 @@ class XendDomainInfo:
new_dev_sxp = dev_control.configuration(devid)
self.info.device_update(dev_uuid, new_dev_sxp)

+ if pci_state == 'Closing':
+ reattach_host_pci_devices(dev_config)
+
# If there is no device left, destroy pci and remove config.
if num_devs == 0:
if self.info.is_hvm():
@@ -3168,6 +3197,7 @@ class XendDomainInfo:
log.debug("%s KiB need to add to Memory pool" %self.alloc_mem)
MemoryPool.instance().increase_memory(self.alloc_mem)

+ reattach_domain_pci_devices(self.info)
self._cleanup_phantom_devs(paths)
self._cleanupVm()

Index: xen-4.2.1-testing/tools/python/xen/xend/server/pciif.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/server/pciif.py
+++ xen-4.2.1-testing/tools/python/xen/xend/server/pciif.py
@@ -86,6 +86,48 @@ def get_all_assigned_pci_devices(domid =
pci_str_list = pci_str_list + get_assigned_pci_devices(int(d))
return pci_str_list

+def reattach_host_pci_devices(devconfig):
+ pci_dev_list = devconfig.get('devs', [])
+ for pci_dev in pci_dev_list:
+ managed = 0
+ pci_opts_config = pci_dev.get('opts', [])
+ for opt in pci_opts_config:
+ if opt[0] == 'managed':
+ managed = opt[1]
+ if managed:
+ if pci_assignable_remove(pci_dev) != 0:
+ raise VmError('pci_assignable_remove failed')
+
+def detach_host_pci_devices(devconfig):
+ pci_dev_list = devconfig.get('devs', [])
+ reattach = 0
+ for pci_dev in pci_dev_list:
+ managed = 0
+ pci_opts_config = pci_dev.get('opts', [])
+ for opt in pci_opts_config:
+ if opt[0] == 'managed':
+ managed = opt[1]
+ if managed:
+ if pci_assignable_add(pci_dev) != 0:
+ log.debug('pci_assignable_add failed')
+ reattach = 1
+ break
+
+ if reattach:
+ reattach_host_pci_devices(devconfig)
+ raise VmError('detach_host_pci_devices failed')
+
+def prepare_host_pci_devices(devconfig):
+ # Test whether the device used by other domain
+ pci_dev_list = devconfig.get('devs', [])
+ for pci_dev in pci_dev_list:
+ pci_name = pci_dict_to_bdf_str(pci_dev)
+ if pci_name in get_all_assigned_pci_devices():
+ raise VmError("failed to assign device %s that has"
+ " already been assigned to other domain." % pci_name)
+ # Detach 'managed' devices
+ detach_host_pci_devices(devconfig)
+
class PciController(DevController):

def __init__(self, vm):
++++++ xen-max-free-mem.diff ++++++
Index: xen-4.2.1-testing/tools/python/xen/xend/XendNode.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendNode.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendNode.py
@@ -949,11 +949,35 @@ class XendNode:

info['cpu_mhz'] = info['cpu_khz'] / 1000

- # physinfo is in KiB, need it in MiB
- info['total_memory'] = info['total_memory'] / 1024
- info['free_memory'] = info['free_memory'] / 1024
+ configured_floor = xendoptions().get_dom0_min_mem() * 1024
+ from xen.xend import balloon
+ try:
+ kernel_floor = balloon.get_dom0_min_target()
+ except:
+ kernel_floor = 0
+ dom0_min_mem = max(configured_floor, kernel_floor)
+ dom0_mem = balloon.get_dom0_current_alloc()
+ extra_mem = 0
+ if dom0_min_mem > 0 and dom0_mem > dom0_min_mem:
+ extra_mem = dom0_mem - dom0_min_mem
+ info['free_memory'] = info['free_memory'] + info['scrub_memory']
+ info['max_free_memory'] = info['free_memory'] + extra_mem
info['free_cpus'] = len(XendCPUPool.unbound_cpus())

+ # Convert KiB to MiB, rounding down to be conservative
+ info['total_memory'] = info['total_memory'] / 1024
+ info['free_memory'] = info['free_memory'] / 1024
+ info['max_free_memory'] = info['max_free_memory'] / 1024
+
+ # FIXME: These are hard-coded to be the inverse of the getXenMemory
+ # functions in image.py. Find a cleaner way.
+ info['max_para_memory'] = info['max_free_memory'] - 4
+ if info['max_para_memory'] < 0:
+ info['max_para_memory'] = 0
+ info['max_hvm_memory'] = int((info['max_free_memory']-12) *
(1-2.4/1024))
+ if info['max_hvm_memory'] < 0:
+ info['max_hvm_memory'] = 0
+
ITEM_ORDER = ['nr_cpus',
'nr_nodes',
'cores_per_socket',
@@ -964,6 +988,9 @@ class XendNode:
'total_memory',
'free_memory',
'free_cpus',
+ 'max_free_memory',
+ 'max_para_memory',
+ 'max_hvm_memory',
]

if show_numa != 0:
Index: xen-4.2.1-testing/tools/python/xen/xend/balloon.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/balloon.py
+++ xen-4.2.1-testing/tools/python/xen/xend/balloon.py
@@ -43,6 +43,8 @@ SLEEP_TIME_GROWTH = 0.1
# label actually shown in the PROC_XEN_BALLOON file.
#labels = { 'current' : 'Current allocation',
# 'target' : 'Requested target',
+# 'min-target' : 'Minimum target',
+# 'max-target' : 'Maximum target',
# 'low-balloon' : 'Low-mem balloon',
# 'high-balloon' : 'High-mem balloon',
# 'limit' : 'Xen hard limit' }
@@ -69,6 +71,23 @@ def get_dom0_target_alloc():
raise VmError('Failed to query target memory allocation of dom0.')
return kb

+def get_dom0_min_target():
+ """Returns the minimum amount of memory (in KiB) that dom0 will accept."""
+
+ kb = _get_proc_balloon('min-target')
+ if kb == None:
+ raise VmError('Failed to query minimum target memory allocation of
dom0.')
+ return kb
+
+def get_dom0_max_target():
+ """Returns the maximum amount of memory (in KiB) that is potentially
+ visible to dom0."""
+
+ kb = _get_proc_balloon('max-target')
+ if kb == None:
+ raise VmError('Failed to query maximum target memory allocation of
dom0.')
+ return kb
+
def free(need_mem, dominfo):
"""Balloon out memory from the privileged domain so that there is the
specified required amount (in KiB) free.
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1473,6 +1473,27 @@ class XendDomainInfo:
self.info['abort_if_busy'] = str(abort_if_busy)
self.info['log_save_progress'] = str(log_save_progress)

+ def capAndSetMemoryTarget(self, target):
+ """Potentially lowers the requested target to the largest possible
+ value (i.e., caps it), and then sets the memory target of this domain
+ to that value.
+ @param target in MiB.
+ """
+ max_target = 0
+ if self.domid == 0:
+ try:
+ from balloon import get_dom0_max_target
+ max_target = get_dom0_max_target() / 1024
+ except:
+ # It's nice to cap the max at sane values, but harmless to set
+ # them high. Carry on.
+ pass
+ if max_target and target > max_target:
+ log.debug("Requested memory target %d MiB; maximum reasonable
is %d MiB.",
+ target, max_target)
+ target = max_target
+ self.setMemoryTarget(target)
+
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
@param target: In MiB.
Index: xen-4.2.1-testing/tools/python/xen/xend/server/SrvDomain.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/server/SrvDomain.py
+++ xen-4.2.1-testing/tools/python/xen/xend/server/SrvDomain.py
@@ -187,7 +187,7 @@ class SrvDomain(SrvDir):


def op_mem_target_set(self, _, req):
- return self.call(self.dom.setMemoryTarget,
+ return self.call(self.dom.capAndSetMemoryTarget,
[['target', 'int']],
req)

Index: xen-4.2.1-testing/tools/python/xen/xend/osdep.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/osdep.py
+++ xen-4.2.1-testing/tools/python/xen/xend/osdep.py
@@ -42,6 +42,8 @@ def _linux_balloon_stat_proc(label):

xend2linux_labels = { 'current' : 'Current allocation',
'target' : 'Requested target',
+ 'min-target' : 'Minimum target',
+ 'max-target' : 'Maximum target',
'low-balloon' : 'Low-mem balloon',
'high-balloon' : 'High-mem balloon',
'limit' : 'Xen hard limit' }
++++++ xen-migration-bridge-check.patch ++++++
bnc#757525

Index: xen-4.2.0-testing/tools/python/xen/xend/server/netif.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/server/netif.py
+++ xen-4.2.0-testing/tools/python/xen/xend/server/netif.py
@@ -23,6 +23,7 @@
import os
import random
import re
+import commands

from xen.xend import XendOptions, sxp
from xen.xend.server.DevController import DevController
@@ -101,6 +102,14 @@ class NetifController(DevController):
def __init__(self, vm):
DevController.__init__(self, vm)

+ def createDevice(self, config):
+ bridge = config.get('bridge')
+ if bridge is not None:
+ bridge_result = commands.getstatusoutput("/sbin/ifconfig %s" %
bridge)
+ if bridge_result[0] != 0:
+ raise VmError('Network bridge does not exist: %s' % bridge)
+ DevController.createDevice(self, config)
+
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""

++++++ xen-minimum-restart-time.patch ++++++
References: bnc#661298

Index: xen-4.2.0-testing/tools/python/xen/xend/XendConstants.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendConstants.py
+++ xen-4.2.0-testing/tools/python/xen/xend/XendConstants.py
@@ -94,7 +94,7 @@ DOM_STATES_OLD = [
SHUTDOWN_TIMEOUT = (60.0 * 5)

"""Minimum time between domain restarts in seconds."""
-MINIMUM_RESTART_TIME = 60
+MINIMUM_RESTART_TIME = 10

RESTART_IN_PROGRESS = 'xend/restart_in_progress'
DUMPCORE_IN_PROGRESS = 'xend/dumpcore_in_progress'
++++++ xen-no-dummy-nfs-ip.diff ++++++
Index: xen-4.2.0-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xm/create.py
+++ xen-4.2.0-testing/tools/python/xen/xm/create.py
@@ -1342,9 +1342,8 @@ def preprocess_access_control(vals):

def preprocess_ip(vals):
if vals.ip or vals.dhcp != 'off':
- dummy_nfs_server = '127.0.255.255'
ip = (vals.ip
- + ':' + (vals.nfs_server or dummy_nfs_server)
+ + ':' + (vals.nfs_server or '')
+ ':' + vals.gateway
+ ':' + vals.netmask
+ ':' + vals.hostname
++++++ xen-paths.diff ++++++
Index: xen-4.2.0-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xm/create.py
+++ xen-4.2.0-testing/tools/python/xen/xm/create.py
@@ -73,7 +73,7 @@ gopts.opt('quiet', short='q',
use="Quiet.")

gopts.opt('path', val='PATH',
- fn=set_value, default='.:' + auxbin.xen_configdir(),
+ fn=set_value, default='.:' + auxbin.xen_configdir() + "/vm",
use="Search path for configuration scripts. "
"The value of PATH is a colon-separated directory list.")

Index: xen-4.2.0-testing/docs/man/xm.pod.1
===================================================================
--- xen-4.2.0-testing.orig/docs/man/xm.pod.1
+++ xen-4.2.0-testing/docs/man/xm.pod.1
@@ -79,7 +79,7 @@ in the config file. See L<xmdomain.cfg>
format, and possible options used in either the configfile or for I<vars>.

I<configfile> can either be an absolute path to a file, or a relative
-path to a file located in /etc/xen.
+path to a file located in /etc/xen/vm.

Create will return B<as soon> as the domain is started. This B<does
not> mean the guest OS in the domain has actually booted, or is
@@ -160,7 +160,7 @@ B<EXAMPLES>

xm create Fedora4

-This creates a domain with the file /etc/xen/Fedora4, and returns as
+This creates a domain with the file /etc/xen/vm/Fedora4, and returns as
soon as it is run.

=item I<without config file>
Index: xen-4.2.0-testing/docs/man/xmdomain.cfg.pod.5
===================================================================
--- xen-4.2.0-testing.orig/docs/man/xmdomain.cfg.pod.5
+++ xen-4.2.0-testing/docs/man/xmdomain.cfg.pod.5
@@ -4,9 +4,9 @@ xmdomain.cfg - xm domain config file for

=head1 SYNOPSIS

- /etc/xen/myxendomain
- /etc/xen/myxendomain2
- /etc/xen/auto/myxenautostarted
+ /etc/xen/auto/
+ /etc/xen/examples/
+ /etc/xen/vm/

=head1 DESCRIPTION

@@ -14,14 +14,14 @@ The B<xm>(1) program uses python executa
domains to create from scratch. Each of these config files needs to
contain a number of required options, and may specify many more.

-Domain configuration files live in /etc/xen by default, if you store
+Domain configuration files live in /etc/xen/vm by default. If you store
config files anywhere else the full path to the config file must be
specified in the I<xm create> command.

/etc/xen/auto is a special case. Domain config files in that
directory will be started automatically at system boot if the
xendomain init script is enabled. The contents of /etc/xen/auto
-should be symlinks to files in /etc/xen to allow I<xm create> to be
+should be symlinks to files in /etc/xen/vm to allow I<xm create> to be
used without full paths.

Options are specified by I<name = value> statements in the
++++++ xen-qemu-iscsi-fix.patch ++++++
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -449,7 +449,7 @@ void xenstore_parse_domain_config(int hv
char *buf = NULL;
char *fpath = NULL, *bpath = NULL, *btype = NULL,
*dev = NULL, *params = NULL, *drv = NULL;
- int i, ret, is_tap;
+ int i, j, ret, is_tap;
unsigned int len, num, hd_index, pci_devid = 0;
BlockDriverState *bs;
BlockDriver *format;
@@ -533,12 +533,7 @@ void xenstore_parse_domain_config(int hv
continue;
free(danger_type);
danger_type = xs_read(xsh, XBT_NULL, danger_buf, &len);
- if (pasprintf(&buf, "%s/params", bpath) == -1)
- continue;
- free(params);
- params = xs_read(xsh, XBT_NULL, buf, &len);
- if (params == NULL)
- continue;
+
/* read the name of the device */
if (pasprintf(&buf, "%s/type", bpath) == -1)
continue;
@@ -546,6 +541,35 @@ void xenstore_parse_domain_config(int hv
drv = xs_read(xsh, XBT_NULL, buf, &len);
if (drv == NULL)
continue;
+
+ free(params);
+ if (!strcmp(drv,"iscsi") || !strcmp(drv, "npiv") ||
+ !strcmp(drv,"dmmd")) {
+ if (pasprintf(&buf, "%s/node", bpath) == -1)
+ continue;
+
+ /* wait for block-[iscsi|npiv|dmmd] script to complete and populate
the
+ * node entry. try 30 times (30 secs) */
+ for (j = 0; j < 30; j++) {
+ params = xs_read(xsh, XBT_NULL, buf, &len);
+ if (params != NULL)
+ break;
+ sleep(1);
+ }
+ if (params == NULL) {
+ fprintf(stderr, "qemu: %s device not found -- timed out \n", drv);
+ continue;
+ }
+ }
+ else
+ {
+ if (pasprintf(&buf, "%s/params", bpath) == -1)
+ continue;
+ params = xs_read(xsh, XBT_NULL, buf, &len);
+ if (params == NULL)
+ continue;
+ }
+
/* Obtain blktap sub-type prefix */
if ((!strcmp(drv, "tap") || !strcmp(drv, "qdisk")) && params[0]) {
char *offset = strchr(params, ':');
@@ -663,6 +687,12 @@ void xenstore_parse_domain_config(int hv
format = &bdrv_host_device;
else
format = &bdrv_raw;
+ } else if (!strcmp(drv,"iscsi")) {
+ format = &bdrv_raw;
+ } else if (!strcmp(drv,"npiv")) {
+ format = &bdrv_raw;
+ } else if (!strcmp(drv,"dmmd")) {
+ format = &bdrv_raw;
} else {
format = bdrv_find_format(drv);
if (!format) {
++++++ xen-updown.sh ++++++
#!/bin/bash
#
usage () {
echo $@
echo "usage: $0 [<config>] <interface> [-o <options>]"
echo ""
echo "Options are:"
echo " debug : be verbose"
echo " rc : indicates that we are called from rcnetwork"
echo ""
echo "Any another options are ignored"
exit $R_USAGE
}

xm_cmd="xm"
pidof -x /usr/sbin/xend >/dev/null 2>&1 || xm_cmd="xl"

######################################################################
# change the working direcory and source some common files
#
R_INTERNAL=1 # internal error, e.g. no config or missing scripts
cd /etc/sysconfig/network || exit $R_INTERNAL
test -f ./config && . ./config
test -f scripts/functions && . scripts/functions || exit $R_INTERNAL

######################################################################
# check arguments and how we are called (in case of links)
#
SCRIPTNAME=${0}
debug $*
case $1 in ""|-h|*help*) usage ;; esac
CONFIG="$1"
shift
if [ "x$1" != x -a "x$1" != "x-o" ] ; then
INTERFACE="$1"
else
INTERFACE="$CONFIG"
fi
shift
test "x$1" = "x-o" && shift
DEBUG=no
RUN_FROM_RC=no
while [ $# -gt 0 ]; do
case $1 in
debug) DEBUG=yes ;;
rc) RUN_FROM_RC=yes ;;
*) debug unknown option $1 ;;
esac
shift
done

# usage: ifprint <err_mesg|mesg|...> message....
ifprint() {
func=$1 ; shift
test "x$func" = x && return 1
if [ "$RUN_FROM_RC" = yes -a "$INTERFACE" != all ] ; then
$func "`printf " %-9s " "$INTERFACE"`$*"
else
$func "$*"
fi
}

#
# xen related code
#

# check if xen is running
is_xend_running() {
test -x /etc/init.d/xend && \
/etc/init.d/xend status &>/dev/null && return 0
return 1
}
exit_if_xend_not_running() {
is_xend_running || {
debug "$0: xend is not running - nothing to do"
exit 0
}
}

# (modified) functions from /etc/init.d/xendomains
parseln()
{
name=${1:0:$((${#1}-36))}
name=${name%% *}
rest="${1: -36}"
id=${rest:0:4}
id=`echo $id`
mem=${rest:4:6}
mem=`echo $mem`
vcpu=${rest:10:6}
vcpu=`echo $vcpu`
state=${rest:16:11}
state=`echo $state`
tm=${rest:27}
tm=`echo $tm`
}

xm_list()
{
TERM=vt100 ${xm_cmd} list | grep -v '^Name *ID'
}

# For the specified vm, return a list of vifs that are connected to $INTERFACE
list_vifs()
{
id=$1
vifs=()
for vif in $(ls -1 "/sys/class/net/$INTERFACE/brif/" 2>/dev/null) ; do
eval BRIDGE_PORTS="" `grep "^[[:space:]]*BRIDGE_PORTS=" \
"/etc/sysconfig/network/ifcfg-$INTERFACE" 2>/dev/null`
for p in $BRIDGE_PORTS ; do
test "x$p" = "x$vif" && continue 2
done
case $vif in
(tap${id}\.*|vif${id}\.*)
vifs=(${vifs[@]} ${vif})
;;
esac
done
echo "${vifs[@]}"
}

# Write list of concerned vifs to state file
save_sysconfig_state()
{
[ -d "${RUN_FILES_BASE}/xen/" ] || \
mkdir -p "${RUN_FILES_BASE}/xen/" || return 1

rm -f "${RUN_FILES_BASE}/xen/$INTERFACE" && {
echo "VIFS='${vifs[@]}'"
} > "${RUN_FILES_BASE}/xen/$INTERFACE"
}

case $SCRIPTNAME in
*if-up.d*)
exit_if_xend_not_running

if test -f "${RUN_FILES_BASE}/xen/$INTERFACE" ; then
. "${RUN_FILES_BASE}/xen/$INTERFACE"

for vif in ${VIFS}; do
test -d "/sys/class/net/${vif}" || continue
test -d "/sys/class/net/${INTERFACE}/brif/${vif}" && \
continue
if ! is_iface_up ${vif} ; then
ip link set dev ${vif} up || continue
fi
brctl addif ${INTERFACE} ${vif} &>/dev/null
done

# remove sysconfig state
rm -f "${RUN_FILES_BASE}/xen/$INTERFACE"
fi
;;
*if-down.d*)
exit_if_xend_not_running
test -d "/sys/class/net/$INTERFACE/brif/" || exit 0

# Remember vifs attached to $INTERFACE
vifs=()
num=0
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
[ -z "$state" ] && continue

vifs=(${vifs[@]} $(list_vifs $id))
done < <(xm_list)

[ -z "${vifs[*]}" ] || save_sysconfig_state

;;
*)
usage
;;
esac

++++++ xen-xm-top-needs-root.diff ++++++
From: Charles Coffing <ccoffing@xxxxxxxxxx>
Upstream: no

Index: xen-4.2.0-testing/tools/python/xen/xm/main.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xm/main.py
+++ xen-4.2.0-testing/tools/python/xen/xm/main.py
@@ -2201,6 +2201,10 @@ def xm_debug_keys(args):
def xm_top(args):
arg_check(args, "top", 0)

+ # A hack to get a clear error message if ran as non-root
+ if os.geteuid() != 0:
+ raise IOError()
+
os.system('xentop')

def xm_dmesg(args):
++++++ xen-xmexample-vti.diff ++++++
Index: xen-4.2.0-testing/tools/examples/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/Makefile
+++ xen-4.2.0-testing/tools/examples/Makefile
@@ -18,7 +18,6 @@ XEN_CONFIGS += xmexample.hvm
XEN_CONFIGS += xmexample.hvm-stubdom
XEN_CONFIGS += xmexample.pv-grub
XEN_CONFIGS += xmexample.nbd
-XEN_CONFIGS += xmexample.vti
XEN_CONFIGS += xlexample.hvm
XEN_CONFIGS += xlexample.pvlinux
XEN_CONFIGS += xend-pci-quirks.sxp
++++++ xen-xmexample.diff ++++++
Change various example paths in the config files to match SUSE.

Index: xen-4.2.0-testing/tools/examples/xmexample1
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/xmexample1
+++ xen-4.2.0-testing/tools/examples/xmexample1
@@ -7,11 +7,17 @@
#============================================================================

#----------------------------------------------------------------------------
-# Kernel image file.
-kernel = "/boot/vmlinuz-2.6.10-xenU"
-
-# Optional ramdisk.
-#ramdisk = "/boot/initrd.gz"
+# Kernel image file and (optional) ramdisk (initrd).
+kernel = "/boot/vmlinuz-xen"
+ramdisk = "/boot/initrd-xen"
+
+# Or use a bootloader instead of kernel/ramdisk to get the kernel from domU FS
+# domUloader bootloader example:
+#bootloader = "/usr/lib/xen/boot/domUloader.py"
+#bootentry = "hda2:/vmlinuz-xen,/initrd-xen"
+# pygrub bootloader example:
+#bootloader="/usr/bin/pygrub"
+#bootargs=""

# The domain build function. Default is 'linux'.
#builder='linux'
@@ -49,11 +55,11 @@ name = "ExampleDomain"
#
# or optionally override backend, bridge, ip, mac, script, type, or vifname:
#
-# vif = [ 'mac=00:16:3e:00:00:11, bridge=xenbr0' ]
+# vif = [ 'mac=00:16:3e:00:00:11, bridge=br0' ]
#
# or more than one interface may be configured:
#
-# vif = [ '', 'bridge=xenbr1' ]
+# vif = [ '', 'bridge=br1' ]

vif = [ '' ]

@@ -131,7 +137,7 @@ disk = [ 'phy:hda1,hda1,w' ]
#hostname= "vm%d" % vmid

# Set root device.
-root = "/dev/hda1 ro"
+root = "/dev/hda1"

# Root device for nfs.
#root = "/dev/nfs"
@@ -140,8 +146,8 @@ root = "/dev/hda1 ro"
# Root directory on the nfs server.
#nfs_root = '/full/path/to/root/directory'

-# Sets runlevel 4.
-extra = "4"
+# Extra arguments to pass to the kernel.
+extra = ""

#----------------------------------------------------------------------------
# Configure the behaviour when a domain exits. There are three 'reasons'
Index: xen-4.2.0-testing/tools/examples/xmexample2
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/xmexample2
+++ xen-4.2.0-testing/tools/examples/xmexample2
@@ -35,11 +35,17 @@ xm_vars.var('vmid',
xm_vars.check()

#----------------------------------------------------------------------------
-# Kernel image file.
-kernel = "/boot/vmlinuz-2.6.10-xenU"
-
-# Optional ramdisk.
-#ramdisk = "/boot/initrd.gz"
+# Kernel image file and (optional) ramdisk (initrd).
+kernel = "/boot/vmlinuz-xen"
+ramdisk = "/boot/initrd-xen"
+
+# Or use a bootloader instead of kernel/ramdisk to get the kernel from domU FS
+# domUloader bootloader example:
+#bootloader = "/usr/lib/xen/boot/domUloader.py"
+#bootentry = "hda2:/vmlinuz-xen,/initrd-xen"
+# pygrub bootloader example:
+#bootloader="/usr/bin/pygrub"
+#bootargs=""

# The domain build function. Default is 'linux'.
#builder='linux'
@@ -80,11 +86,11 @@ vcpus = 4 # make your domain a 4-way
#
# or optionally override backend, bridge, ip, mac, script, type, or vifname:
#
-# vif = [ 'mac=00:16:3e:00:00:11, bridge=xenbr0' ]
+# vif = [ 'mac=00:16:3e:00:00:11, bridge=br0' ]
#
# or more than one interface may be configured:
#
-# vif = [ '', 'bridge=xenbr1' ]
+# vif = [ '', 'bridge=br1' ]

vif = [ '' ]

@@ -167,7 +173,7 @@ disk = [ 'phy:sda%d,sda1,w' % (7+vmid),
#hostname= "vm%d" % vmid

# Set root device.
-root = "/dev/sda1 ro"
+root = "/dev/sda1"

# Root device for nfs.
#root = "/dev/nfs"
@@ -176,8 +182,8 @@ root = "/dev/sda1 ro"
# Root directory on the nfs server.
#nfs_root = '/full/path/to/root/directory'

-# Sets runlevel 4 and the device for /usr.
-extra = "4 VMID=%d usr=/dev/sda6" % vmid
+# Sets the device for /usr.
+extra = "VMID=%d usr=/dev/sda6" % vmid

#----------------------------------------------------------------------------
# Configure the behaviour when a domain exits. There are three 'reasons'
Index: xen-4.2.0-testing/tools/examples/xmexample3
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/xmexample3
+++ xen-4.2.0-testing/tools/examples/xmexample3
@@ -35,11 +35,17 @@ xm_vars.var('vmid',
xm_vars.check()

#----------------------------------------------------------------------------
-# Kernel image file.
-kernel = "/path/to/domU/kernel"
+# Kernel image file and (optional) ramdisk (initrd).
+kernel = "/boot/vmlinuz-xen"
+ramdisk = "/boot/initrd-xen"

-# Optional ramdisk.
-#ramdisk = "/boot/initrd.gz"
+# Or use a bootloader instead of kernel/ramdisk to get the kernel from domU FS
+# domUloader bootloader example:
+#bootloader = "/usr/lib/xen/boot/domUloader.py"
+#bootentry = "hda2:/vmlinuz-xen,/initrd-xen"
+# pygrub bootloader example:
+#bootloader="/usr/bin/pygrub"
+#bootargs=""

# The domain build function. Default is 'linux'.
#builder='linux'
Index: xen-4.2.0-testing/tools/examples/xmexample.hvm
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/xmexample.hvm
+++ xen-4.2.0-testing/tools/examples/xmexample.hvm
@@ -64,11 +64,26 @@ name = "ExampleHVMDomain"
#cpus = "0-3,5,^1" # all vcpus run on cpus 0,2,3,5
#cpus = ["2", "3"] # VCPU0 runs on CPU2, VCPU1 runs on CPU3

-# Optionally define mac and/or bridge for the network interfaces.
-# Random MACs are assigned if not given.
-#vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0, model=ne2k_pci' ]
-# type=ioemu specify the NIC is an ioemu device not netfront
-vif = [ 'type=ioemu, bridge=xenbr0' ]
+#----------------------------------------------------------------------------
+# Define network interfaces.
+
+# By default, no network interfaces are configured. You may have one created
+# with sensible defaults using an empty vif clause:
+#
+# vif = [ '' ]
+#
+# or optionally override backend, bridge, ip, mac, script, type, model,
+# or vifname.
+#
+# An emulated RealTek 8139 network interface can be configured with:
+#
+# vif = [ 'mac=00:16:3e:00:00:11, type=ioemu, model=rtl8139, bridge=br0' ]
+#
+# A para-virtual network interface can be configured with:
+#
+# vif = [ 'mac=00:16:3e:00:00:11, type=netfront, bridge=br0' ]
+#
+vif = [ '' ]

#----------------------------------------------------------------------------
# Define the disk devices you want the domain to have access to, and
@@ -78,7 +93,7 @@ vif = [ 'type=ioemu, bridge=xenbr0' ]
# and MODE is r for read-only, w for read-write.

#disk = [ 'phy:hda1,hda1,r' ]
-disk = [ 'file:/var/images/min-el3-i386.img,hda,w', ',hdc:cdrom,r' ]
+disk = [ 'file:/var/lib/xen/images/disk.img,ioemu:hda,w', ',hdc:cdrom,r' ]

#----------------------------------------------------------------------------
# Configure the behaviour when a domain exits. There are three 'reasons'
Index: xen-4.2.0-testing/docs/man/xmdomain.cfg.pod.5
===================================================================
--- xen-4.2.0-testing.orig/docs/man/xmdomain.cfg.pod.5
+++ xen-4.2.0-testing/docs/man/xmdomain.cfg.pod.5
@@ -38,13 +38,13 @@ file.

The kernel image for the domain. The format of the parameter is the
fully qualified path to the kernel image file,
-i.e. I</boot/vmlinuz-2.6.12-xenU>.
+i.e. I</boot/vmlinuz-xen>.


=item B<ramdisk>

The initial ramdisk for the domain. The format of the parameter is
-the fully qualified path to the initrd, i.e. I</boot/initrd.gz>. On
+the fully qualified path to the initrd, i.e. I</boot/initrd-xen>. On
many Linux distros you will not need a ramdisk if using the default
xen kernel.

@@ -321,14 +321,14 @@ configured. They should not be consider

=item I<A Loopback File as Root>

- kernel = "/boot/vmlinuz-2.6-xenU"
+ kernel = "/boot/vmlinuz-xen"
memory = 128
name = "MyLinux"
- root = "/dev/hda1 ro"
- disk = [ "file:/var/xen/mylinux.img,hda1,w" ]
+ root = "/dev/hda1"
+ disk = [ "file:/var/lib/xen/images/MyLinux/hda1,hda1,w" ]

This creates a domain called MyLinux with 128 MB of memory using a
-default xen kernel, and the file /var/xen/mylinux.img loopback mounted
+default xen kernel, and the file hda1 loopback mounted
at hda1, which is the root filesystem.

=item I<NFS Root>
Index: xen-4.2.0-testing/docs/man/xm.pod.1
===================================================================
--- xen-4.2.0-testing.orig/docs/man/xm.pod.1
+++ xen-4.2.0-testing/docs/man/xm.pod.1
@@ -165,8 +165,8 @@ soon as it is run.

=item I<without config file>

- xm create /dev/null ramdisk=initrd.img \
- kernel=/boot/vmlinuz-2.6.12.6-xenU \
+ xm create /dev/null ramdisk=initrd-xen \
+ kernel=/boot/vmlinuz-xen \
name=ramdisk vif='' vcpus=1 \
memory=64 root=/dev/ram0

Index: xen-4.2.0-testing/tools/examples/xmexample.hvm-stubdom
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/xmexample.hvm-stubdom
+++ xen-4.2.0-testing/tools/examples/xmexample.hvm-stubdom
@@ -55,11 +55,26 @@ name = "xmexample.hvm"
#cpus = "0-3,5,^1" # all vcpus run on cpus 0,2,3,5
#cpus = ["2", "3"] # VCPU0 runs on CPU2, VCPU1 runs on CPU3

-# Optionally define mac and/or bridge for the network interfaces.
-# Random MACs are assigned if not given.
-#vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0, model=ne2k_pci' ]
-# type=ioemu specify the NIC is an ioemu device not netfront
-vif = [ 'type=ioemu, bridge=xenbr0' ]
+#----------------------------------------------------------------------------
+# Define network interfaces.
+
+# By default, no network interfaces are configured. You may have one created
+# with sensible defaults using an empty vif clause:
+#
+# vif = [ '' ]
+#
+# or optionally override backend, bridge, ip, mac, script, type, model,
+# or vifname.
+#
+# An emulated RealTek 8139 network interface can be configured with:
+#
+# vif = [ 'mac=00:16:3e:00:00:11, type=ioemu, model=rtl8139, bridge=br0' ]
+#
+# A para-virtual network interface can be configured with:
+#
+# vif = [ 'mac=00:16:3e:00:00:11, type=netfront, bridge=br0' ]
+#
+vif = [ '' ]

#----------------------------------------------------------------------------
# Define the disk devices you want the domain to have access to, and
Index: xen-4.2.0-testing/tools/examples/xmexample.pv-grub
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/xmexample.pv-grub
+++ xen-4.2.0-testing/tools/examples/xmexample.pv-grub
@@ -53,11 +53,11 @@ name = "ExampleDomain"
#
# or optionally override backend, bridge, ip, mac, script, type, or vifname:
#
-# vif = [ 'mac=00:16:3e:00:00:11, bridge=xenbr0' ]
+# vif = [ 'mac=00:16:3e:00:00:11, bridge=br0' ]
#
# or more than one interface may be configured:
#
-# vif = [ '', 'bridge=xenbr1' ]
+# vif = [ '', 'bridge=br1' ]

vif = [ '' ]

Index: xen-4.2.0-testing/tools/examples/xmexample.vti
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/xmexample.vti
+++ xen-4.2.0-testing/tools/examples/xmexample.vti
@@ -40,11 +40,26 @@ name = "ExampleVTIDomain"
# In Windows OS, smaller size shows better performance.
#vhpt = 23

-# Optionally define mac and/or bridge for the network interfaces.
-# Random MACs are assigned if not given.
-#vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0, model=ne2k_pci' ]
-# type=ioemu specify the NIC is an ioemu device not netfront
-vif = [ 'type=ioemu, bridge=xenbr0' ]
+#----------------------------------------------------------------------------
+# Define network interfaces.
+
+# By default, no network interfaces are configured. You may have one created
+# with sensible defaults using an empty vif clause:
+#
+# vif = [ '' ]
+#
+# or optionally override backend, bridge, ip, mac, script, type, model,
+# or vifname.
+#
+# An emulated RealTek 8139 network interface can be configured with:
+#
+# vif = [ 'mac=00:16:3e:00:00:11, type=ioemu, model=rtl8139, bridge=br0' ]
+#
+# A para-virtual network interface can be configured with:
+#
+# vif = [ 'mac=00:16:3e:00:00:11, type=netfront, bridge=br0' ]
+#
+vif = [ '' ]

#----------------------------------------------------------------------------
# Define the disk devices you want the domain to have access to, and
Index: xen-4.2.0-testing/docs/man/xl.pod.1
===================================================================
--- xen-4.2.0-testing.orig/docs/man/xl.pod.1
+++ xen-4.2.0-testing/docs/man/xl.pod.1
@@ -12,7 +12,8 @@ The B<xl> program is the new tool for ma
domains. The program can be used to create, pause, and shutdown
domains. It can also be used to list current domains, enable or pin
VCPUs, and attach or detach virtual block devices.
-The old B<xm> tool is deprecated and should not be used.
+The B<xm> tool continues to be supported on SLE11 platforms
+and should still be used.

The basic structure of every B<xl> command is almost always:

++++++ xen.migrate.tools-xc_document_printf_calls_in_xc_restore.patch ++++++
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Mar 06 16:42:02 2013 +0100
files: tools/xcutils/xc_restore.c
description:
tools/xc: document printf calls in xc_restore

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>


diff -r e5ae0e680b5c -r 49b90990442a tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c
+++ b/tools/xcutils/xc_restore.c
@@ -56,6 +56,7 @@ main(int argc, char **argv)

if ( ret == 0 )
{
+ /* xend expects this output, part of protocol */
printf("store-mfn %li\n", store_mfn);
if ( !hvm )
printf("console-mfn %li\n", console_mfn);
++++++ xen.migrate.tools-xc_print_messages_from_xc_save_with_xc_report.patch
++++++
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Mar 06 16:32:08 2013 +0100
files: tools/libxc/xc_private.h tools/xcutils/xc_save.c
description:
tools/xc: print messages from xc_save with xc_report

Make use of xc_report in xc_save to log also pid if some error occoured.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>


diff -r 7af4246a6e1c -r e5ae0e680b5c tools/libxc/xc_private.h
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -119,6 +119,7 @@ void xc_report_progress_step(xc_interfac

/* anamorphic macros: struct xc_interface *xch must be in scope */

+#define WPRINTF(_f, _a...) xc_report(xch, xch->error_handler, XTL_WARN,0, _f ,
## _a)
#define IPRINTF(_f, _a...) xc_report(xch, xch->error_handler, XTL_INFO,0, _f ,
## _a)
#define DPRINTF(_f, _a...) xc_report(xch, xch->error_handler, XTL_DETAIL,0, _f
, ## _a)
#define DBGPRINTF(_f, _a...) xc_report(xch, xch->error_handler, XTL_DEBUG,0,
_f , ## _a)
diff -r 7af4246a6e1c -r e5ae0e680b5c tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c
+++ b/tools/xcutils/xc_save.c
@@ -7,6 +7,7 @@
*
*/

+#include <unistd.h>
#include <err.h>
#include <stdlib.h>
#include <stdint.h>
@@ -19,6 +20,7 @@
#include <fcntl.h>
#include <err.h>

+#include <xc_private.h>
#include <xenstore.h>
#include <xenctrl.h>
#include <xenguest.h>
@@ -51,16 +53,17 @@ static int compat_suspend(void)
* receive the acknowledgement from the subscribe event channel. */
static int evtchn_suspend(void)
{
+ xc_interface *xch = si.xch;
int rc;

rc = xc_evtchn_notify(si.xce, si.suspend_evtchn);
if (rc < 0) {
- warnx("failed to notify suspend request channel: %d", rc);
+ WPRINTF("failed to notify suspend request channel: %d", rc);
return 0;
}

- if (xc_await_suspend(si.xch, si.xce, si.suspend_evtchn) < 0) {
- warnx("suspend failed");
+ if (xc_await_suspend(xch, si.xce, si.suspend_evtchn) < 0) {
+ WPRINTF("suspend failed");
return 0;
}

@@ -104,20 +107,27 @@ static int suspend(void* data)

static int switch_qemu_logdirty(int domid, unsigned int enable, void *data)
{
+ xc_interface *xch = si.xch;
struct xs_handle *xs;
char *path, *p, *ret_str, *cmd_str, **watch;
unsigned int len;
struct timeval tv;
fd_set fdset;

- if ((xs = xs_daemon_open()) == NULL)
- errx(1, "Couldn't contact xenstore");
- if (!(path = strdup("/local/domain/0/device-model/")))
- errx(1, "can't get domain path in store");
+ if ((xs = xs_daemon_open()) == NULL) {
+ PERROR("Couldn't contact xenstore");
+ exit(1);
+ }
+ if (!(path = strdup("/local/domain/0/device-model/"))) {
+ PERROR("can't get domain path in store");
+ exit(1);
+ }
if (!(path = realloc(path, strlen(path)
+ 10
- + strlen("/logdirty/cmd") + 1)))
- errx(1, "no memory for constructing xenstore path");
+ + strlen("/logdirty/cmd") + 1))) {
+ PERROR("no memory for constructing xenstore path");
+ exit(1);
+ }
snprintf(path + strlen(path), 11, "%i", domid);
strcat(path, "/logdirty/");
p = path + strlen(path);
@@ -126,16 +136,22 @@ static int switch_qemu_logdirty(int domi
/* Watch for qemu's return value */
strcpy(p, "ret");
if (!xs_watch(xs, path, "qemu-logdirty-ret"))
- errx(1, "can't set watch in store (%s)\n", path);
+ {
+ ERROR("can't set watch in store (%s)\n", path);
+ exit(1);
+ }

- if (!(cmd_str = strdup( enable == 0 ? "disable" : "enable")))
- errx(1, "can't get logdirty cmd path in store");
+ if (!(cmd_str = strdup( enable == 0 ? "disable" : "enable"))) {
+ PERROR("can't get logdirty cmd path in store");
+ exit(1);
+ }

/* Tell qemu that we want it to start logging dirty page to Xen */
strcpy(p, "cmd");
- if (!xs_write(xs, XBT_NULL, path, cmd_str, strlen(cmd_str)))
- errx(1, "can't write to store path (%s)\n",
- path);
+ if (!xs_write(xs, XBT_NULL, path, cmd_str, strlen(cmd_str))) {
+ PERROR("can't write to store path (%s)\n", path);
+ exit(1);
+ }

/* Wait a while for qemu to signal that it has service logdirty command */
read_again:
@@ -144,8 +160,10 @@ static int switch_qemu_logdirty(int domi
FD_ZERO(&fdset);
FD_SET(xs_fileno(xs), &fdset);

- if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1)
- errx(1, "timed out waiting for qemu logdirty response.\n");
+ if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) {
+ PERROR("timed out waiting for qemu logdirty response.\n");
+ exit(1);
+ }

watch = xs_read_watch(xs, &len);
free(watch);
@@ -166,6 +184,7 @@ static int switch_qemu_logdirty(int domi
int
main(int argc, char **argv)
{
+ xc_interface *xch;
unsigned int maxit, max_f, lflags;
int io_fd, ret, port;
struct save_callbacks callbacks;
@@ -186,26 +205,26 @@ main(int argc, char **argv)
lvl = si.flags & XCFLAGS_DEBUG ? XTL_DEBUG: XTL_DETAIL;
lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS;
l = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr, lvl, lflags);
- si.xch = xc_interface_open(l, 0, 0);
+ xch = si.xch = xc_interface_open(l, 0, 0);
if (!si.xch)
- errx(1, "failed to open control interface");
+ errx(1, "[%lu] failed to open control interface", (unsigned
long)getpid());

si.xce = xc_evtchn_open(NULL, 0);
if (si.xce == NULL)
- warnx("failed to open event channel handle");
+ WPRINTF("failed to open event channel handle");
else
{
port = xs_suspend_evtchn_port(si.domid);

if (port < 0)
- warnx("failed to get the suspend evtchn port\n");
+ WPRINTF("failed to get the suspend evtchn port\n");
else
{
si.suspend_evtchn =
xc_suspend_evtchn_init(si.xch, si.xce, si.domid, port);

if (si.suspend_evtchn < 0)
- warnx("suspend event channel initialization failed, "
+ WPRINTF("suspend event channel initialization failed, "
"using slow path");
}
}
++++++ xen.migrate.tools-xc_rework_xc_save.cswitch_qemu_logdirty.patch ++++++
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Mar 06 17:05:10 2013 +0100
files: tools/xcutils/xc_save.c
description:
tools/xc: rework xc_save.c:switch_qemu_logdirty

Rework code in switch_qemu_logdirty, fix also memleak.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>


diff -r 49b90990442a -r 1ea501d60264 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c
+++ b/tools/xcutils/xc_save.c
@@ -7,6 +7,7 @@
*
*/

+#define _GNU_SOURCE
#include <unistd.h>
#include <err.h>
#include <stdlib.h>
@@ -109,8 +110,10 @@ static int switch_qemu_logdirty(int domi
{
xc_interface *xch = si.xch;
struct xs_handle *xs;
- char *path, *p, *ret_str, *cmd_str, **watch;
+ char *path, *dir_p, *ret_str, **watch;
+ const char *cmd_str;
unsigned int len;
+ int ret, again;
struct timeval tv;
fd_set fdset;

@@ -118,65 +121,56 @@ static int switch_qemu_logdirty(int domi
PERROR("Couldn't contact xenstore");
exit(1);
}
- if (!(path = strdup("/local/domain/0/device-model/"))) {
- PERROR("can't get domain path in store");
+
+ ret = asprintf(&path, "/local/domain/0/device-model/%i/logdirty/ret",
domid);
+ if (ret < 0) {
+ ERROR("Couldn't construct xenstore path");
exit(1);
}
- if (!(path = realloc(path, strlen(path)
- + 10
- + strlen("/logdirty/cmd") + 1))) {
- PERROR("no memory for constructing xenstore path");
- exit(1);
- }
- snprintf(path + strlen(path), 11, "%i", domid);
- strcat(path, "/logdirty/");
- p = path + strlen(path);
-
+ /* Pointer to directory */
+ dir_p = path + ret - 3;

/* Watch for qemu's return value */
- strcpy(p, "ret");
- if (!xs_watch(xs, path, "qemu-logdirty-ret"))
- {
- ERROR("can't set watch in store (%s)\n", path);
+ if (!xs_watch(xs, path, "qemu-logdirty-ret")) {
+ PERROR("can't set watch in store (%s)", path);
exit(1);
}

- if (!(cmd_str = strdup( enable == 0 ? "disable" : "enable"))) {
- PERROR("can't get logdirty cmd path in store");
+ cmd_str = enable ? "enable" : "disable";
+
+ /* Tell qemu that we want it to start logging dirty pages to Xen */
+ strcpy(dir_p, "cmd");
+ if (!xs_write(xs, XBT_NULL, path, cmd_str, strlen(cmd_str))) {
+ PERROR("can't write to store path (%s)", path);
exit(1);
}

- /* Tell qemu that we want it to start logging dirty page to Xen */
- strcpy(p, "cmd");
- if (!xs_write(xs, XBT_NULL, path, cmd_str, strlen(cmd_str))) {
- PERROR("can't write to store path (%s)\n", path);
- exit(1);
- }
+ /* Restore initial path */
+ strcpy(dir_p, "ret");
+ /* Wait a while for qemu to signal that it has serviced logdirty command */
+ do {
+ tv.tv_sec = 5;
+ tv.tv_usec = 0;
+ FD_ZERO(&fdset);
+ FD_SET(xs_fileno(xs), &fdset);
+ errno = 0;

- /* Wait a while for qemu to signal that it has service logdirty command */
- read_again:
- tv.tv_sec = 5;
- tv.tv_usec = 0;
- FD_ZERO(&fdset);
- FD_SET(xs_fileno(xs), &fdset);
-
- if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) {
- PERROR("timed out waiting for qemu logdirty response.\n");
- exit(1);
- }
-
- watch = xs_read_watch(xs, &len);
- free(watch);
-
- strcpy(p, "ret");
- ret_str = xs_read(xs, XBT_NULL, path, &len);
- if (ret_str == NULL || strcmp(ret_str, cmd_str))
+ if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) {
+ PERROR("timed out waiting for qemu logdirty response.");
+ exit(1);
+ }
+
+ watch = xs_read_watch(xs, &len);
+ free(watch);
+
+ ret_str = xs_read(xs, XBT_NULL, path, &len);
+ again = ret_str == NULL || strcmp(ret_str, cmd_str);
+ WPRINTF("Got '%s' from logdirty%s.\n", ret_str, again ? ", retrying" :
"");
+ free(ret_str);
/* Watch fired but value is not yet right */
- goto read_again;
+ } while (again);

free(path);
- free(cmd_str);
- free(ret_str);

return 0;
}
++++++ xen.migrate.tools_add_xm_migrate_--log_progress_option.patch ++++++
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Mar 06 17:05:15 2013 +0100
files: tools/libxc/xenguest.h tools/python/xen/xend/XendCheckpoint.py
tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xm/migrate.py tools/xcutils/xc_save.c
description:
tools: add xm migrate --log_progress option

xc_domain_save does print progress messages. These verbose messages are
disabled per default to avoid flood in xend.log. Sometimes it is helpful
to see progress when migrating large and busy guests. So add a new
option to xm migrate to actually enable the printing of progress
messsages.

xl migrate is not modified with this change because it does not use the
stdio logger.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>


diff -r 29c66a248f5b -r d8ef4a83760f tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -29,6 +29,7 @@
#define XCFLAGS_STDVGA (1 << 3)
#define XCFLAGS_CHECKPOINT_COMPRESS (1 << 4)
#define XCFLAGS_DOMSAVE_ABORT_IF_BUSY (1 << 5)
+#define XCFLAGS_PROGRESS (1 << 6)

#define X86_64_B_SIZE 64
#define X86_32_B_SIZE 32
diff -r 29c66a248f5b -r d8ef4a83760f tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py
+++ b/tools/python/xen/xend/XendCheckpoint.py
@@ -121,16 +121,19 @@ def save(fd, dominfo, network, live, dst
max_iters = dominfo.info.get('max_iters', "0")
max_factor = dominfo.info.get('max_factor', "0")
abort_if_busy = dominfo.info.get('abort_if_busy', "0")
+ log_save_progress = dominfo.info.get('log_save_progress', "0")
if max_iters == "None":
max_iters = "0"
if max_factor == "None":
max_factor = "0"
if abort_if_busy == "None":
abort_if_busy = "0"
+ if log_save_progress == "None":
+ log_save_progress = "0"
cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
str(dominfo.getDomid()),
max_iters, max_factor,
- str( int(live) | (int(hvm) << 2) | (int(abort_if_busy) << 5) ) ]
+ str( int(live) | (int(hvm) << 2) | (int(abort_if_busy) << 5) |
(int(log_save_progress) << 6) ) ]
log.debug("[xc_save]: %s", string.join(cmd))

def saveInputHandler(line, tochild):
diff -r 29c66a248f5b -r d8ef4a83760f tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py
+++ b/tools/python/xen/xend/XendDomain.py
@@ -1832,17 +1832,18 @@ class XendDomain:
log.exception(ex)
raise XendError(str(ex))

- def domain_migrate_constraints_set(self, domid, max_iters, max_factor,
abort_if_busy):
+ def domain_migrate_constraints_set(self, domid, max_iters, max_factor,
abort_if_busy, log_save_progress):
"""Set the Migrate Constraints of this domain.
@param domid: Domain ID or Name
@param max_iters: Number of iterations before final suspend
@param max_factor: Max amount of memory to transfer before final
suspend
@param abort_if_busy: Abort migration instead of doing final suspend
+ @param log_save_progress: Log progress of migrate to xend.log
"""
dominfo = self.domain_lookup_nr(domid)
if not dominfo:
raise XendInvalidDomain(str(domid))
- dominfo.setMigrateConstraints(max_iters, max_factor, abort_if_busy)
+ dominfo.setMigrateConstraints(max_iters, max_factor, abort_if_busy,
log_save_progress)

def domain_maxmem_set(self, domid, mem):
"""Set the memory limit for a domain.
diff -r 29c66a248f5b -r d8ef4a83760f tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -1459,17 +1459,19 @@ class XendDomainInfo:
pci_conf = self.info['devices'][dev_uuid][1]
return map(pci_dict_to_bdf_str, pci_conf['devs'])

- def setMigrateConstraints(self, max_iters, max_factor, abort_if_busy):
+ def setMigrateConstraints(self, max_iters, max_factor, abort_if_busy,
log_save_progress):
"""Set the Migrate Constraints of this domain.
@param max_iters: Number of iterations before final suspend
@param max_factor: Max amount of memory to transfer before final
suspend
@param abort_if_busy: Abort migration instead of doing final suspend
+ @param log_save_progress: Log progress of migrate to xend.log
"""
log.debug("Setting migration constraints of domain %s (%s) to '%s'
'%s' '%s'.",
self.info['name_label'], str(self.domid), max_iters,
max_factor, abort_if_busy)
self.info['max_iters'] = str(max_iters)
self.info['max_factor'] = str(max_factor)
self.info['abort_if_busy'] = str(abort_if_busy)
+ self.info['log_save_progress'] = str(log_save_progress)

def setMemoryTarget(self, target):
"""Set the memory target of this domain.
diff -r 29c66a248f5b -r d8ef4a83760f tools/python/xen/xm/migrate.py
--- a/tools/python/xen/xm/migrate.py
+++ b/tools/python/xen/xm/migrate.py
@@ -67,6 +67,10 @@ gopts.opt('abort_if_busy', short='A',
fn=set_true, default=0,
use="Abort migration instead of doing final suspend.")

+gopts.opt('log_progress',
+ fn=set_true, default=0,
+ use="Log progress of migration to xend.log")
+
def help():
return str(gopts)

@@ -95,7 +99,8 @@ def main(argv):
server.xend.domain.migrate_constraints_set(dom,
opts.vals.max_iters,
opts.vals.max_factor,
- opts.vals.abort_if_busy)
+ opts.vals.abort_if_busy,
+ opts.vals.log_progress)
server.xend.domain.migrate(dom, dst, opts.vals.live,
opts.vals.port,
opts.vals.node,
diff -r 29c66a248f5b -r d8ef4a83760f tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c
+++ b/tools/xcutils/xc_save.c
@@ -197,7 +197,8 @@ main(int argc, char **argv)
si.suspend_evtchn = -1;

lvl = si.flags & XCFLAGS_DEBUG ? XTL_DEBUG: XTL_DETAIL;
- lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS;
+ lflags = XTL_STDIOSTREAM_SHOW_PID;
+ lflags |= si.flags & XCFLAGS_PROGRESS ? 0 : XTL_STDIOSTREAM_HIDE_PROGRESS;
l = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr, lvl, lflags);
xch = si.xch = xc_interface_open(l, 0, 0);
if (!si.xch)
++++++ xen.migrate.tools_set_migration_constraints_from_cmdline.patch ++++++
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Mar 06 17:05:14 2013 +0100
files: docs/man/xl.pod.1 tools/libxc/xc_domain_save.c
tools/libxc/xenguest.h tools/libxl/Makefile tools/libxl/libxl.c
tools/libxl/libxl.h tools/libxl/libxl_dom.c tools/libxl/libxl_internal.h
tools/libxl/libxl_save_callout.c tools/libxl/xl_cmdimpl.c
tools/libxl/xl_cmdtable.c tools/python/xen/xend/XendCheckpoint.py
tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xm/migrate.py
description:
tools: set migration constraints from cmdline

Add new options to xm/xl migrate to control the process of migration.
The intention is to optionally abort the migration if it takes too long
to migrate a busy guest due to the high number of dirty pages. Currently
the guest is suspended to transfer the remaining dirty pages. This
transfer can take too long, which can confuse the guest if its suspended
for too long.

-M <number> Number of iterations before final suspend (default: 30)
--max_iters <number>

-m <factor> Max amount of memory to transfer before final suspend (default:
3*RAM)
--max_factor <factor>

-A Abort migration instead of doing final suspend.
--abort_if_busy



The changes to libxl change the API, handle LIBXL_API_VERSION == 0x040200.

TODO:
eventually add also --min_remaining (default value 50) in a seperate patch

v6:
- update the LIBXL_API_VERSION handling for libxl_domain_suspend
change it to an inline function if LIBXL_API_VERSION is defined to 4.2.0
- rename libxl_save_properties to libxl_domain_suspend_properties
- rename ->xlflags to ->flags within that struct

v5:
- adjust libxl_domain_suspend prototype, move flags, max_iters,
max_factor into a new, optional struct libxl_save_properties
- rename XCFLAGS_DOMSAVE_NOSUSPEND to XCFLAGS_DOMSAVE_ABORT_IF_BUSY
- rename LIBXL_SUSPEND_NO_FINAL_SUSPEND to LIBXL_SUSPEND_ABORT_IF_BUSY
- rename variables no_suspend to abort_if_busy
- rename option -N/--no_suspend to -A/--abort_if_busy
- update xl.1, extend description of -A option

v4:
- update default for no_suspend from None to 0 in XendCheckpoint.py:save
- update logoutput in setMigrateConstraints
- change xm migrate defaults from None to 0
- add new options to xl.1
- fix syntax error in XendDomain.py:domain_migrate_constraints_set
- fix xm migrate -N option name to match xl migrate

v3:
- move logic errors in libxl__domain_suspend and fixed help text in
cmd_table to separate patches
- fix syntax error in XendCheckpoint.py
- really pass max_iters and max_factor in libxl__xc_domain_save
- make libxl_domain_suspend_0x040200 declaration globally visible
- bump libxenlight.so SONAME from 2.0 to 2.1 due to changed
libxl_domain_suspend

v2:
- use LIBXL_API_VERSION and define libxl_domain_suspend_0x040200
- fix logic error in min_reached check in xc_domain_save
- add longopts
- update --help text
- correct description of migrate --help text

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>


Index: xen-4.2.1-testing/tools/libxc/xc_domain_save.c
===================================================================
--- xen-4.2.1-testing.orig/tools/libxc/xc_domain_save.c
+++ xen-4.2.1-testing/tools/libxc/xc_domain_save.c
@@ -813,6 +813,7 @@ int xc_domain_save(xc_interface *xch, in
int rc = 1, frc, i, j, last_iter = 0, iter = 0;
int live = (flags & XCFLAGS_LIVE);
int debug = (flags & XCFLAGS_DEBUG);
+ int abort_if_busy = (flags & XCFLAGS_DOMSAVE_ABORT_IF_BUSY);
int superpages = !!hvm;
int race = 0, sent_last_iter, skip_this_iter = 0;
unsigned int sent_this_iter = 0;
@@ -1525,10 +1526,20 @@ int xc_domain_save(xc_interface *xch, in

if ( live )
{
+ int min_reached = sent_this_iter + skip_this_iter < 50;
if ( (iter >= max_iters) ||
- (sent_this_iter+skip_this_iter < 50) ||
+ min_reached ||
(total_sent > dinfo->p2m_size*max_factor) )
{
+ if ( !min_reached && abort_if_busy )
+ {
+ ERROR("Live migration aborted, as requested. (guest too
busy?)"
+ " total_sent %lu iter %d, max_iters %u max_factor %u",
+ total_sent, iter, max_iters, max_factor);
+ rc = 1;
+ goto out;
+ }
+
DPRINTF("Start last iteration\n");
last_iter = 1;

Index: xen-4.2.1-testing/tools/libxc/xenguest.h
===================================================================
--- xen-4.2.1-testing.orig/tools/libxc/xenguest.h
+++ xen-4.2.1-testing/tools/libxc/xenguest.h
@@ -28,6 +28,7 @@
#define XCFLAGS_HVM (1 << 2)
#define XCFLAGS_STDVGA (1 << 3)
#define XCFLAGS_CHECKPOINT_COMPRESS (1 << 4)
+#define XCFLAGS_DOMSAVE_ABORT_IF_BUSY (1 << 5)

#define X86_64_B_SIZE 64
#define X86_32_B_SIZE 32
Index: xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendCheckpoint.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
@@ -118,9 +118,19 @@ def save(fd, dominfo, network, live, dst
# enabled. Passing "0" simply uses the defaults compiled into
# libxenguest; see the comments and/or code in xc_linux_save() for
# more information.
+ max_iters = dominfo.info.get('max_iters', "0")
+ max_factor = dominfo.info.get('max_factor', "0")
+ abort_if_busy = dominfo.info.get('abort_if_busy', "0")
+ if max_iters == "None":
+ max_iters = "0"
+ if max_factor == "None":
+ max_factor = "0"
+ if abort_if_busy == "None":
+ abort_if_busy = "0"
cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
- str(dominfo.getDomid()), "0", "0",
- str(int(live) | (int(hvm) << 2)) ]
+ str(dominfo.getDomid()),
+ max_iters, max_factor,
+ str( int(live) | (int(hvm) << 2) | (int(abort_if_busy) << 5) ) ]
log.debug("[xc_save]: %s", string.join(cmd))

def saveInputHandler(line, tochild):
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomain.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomain.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomain.py
@@ -1832,6 +1832,18 @@ class XendDomain:
log.exception(ex)
raise XendError(str(ex))

+ def domain_migrate_constraints_set(self, domid, max_iters, max_factor,
abort_if_busy):
+ """Set the Migrate Constraints of this domain.
+ @param domid: Domain ID or Name
+ @param max_iters: Number of iterations before final suspend
+ @param max_factor: Max amount of memory to transfer before final
suspend
+ @param abort_if_busy: Abort migration instead of doing final suspend
+ """
+ dominfo = self.domain_lookup_nr(domid)
+ if not dominfo:
+ raise XendInvalidDomain(str(domid))
+ dominfo.setMigrateConstraints(max_iters, max_factor, abort_if_busy)
+
def domain_maxmem_set(self, domid, mem):
"""Set the memory limit for a domain.

Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1459,6 +1459,18 @@ class XendDomainInfo:
pci_conf = self.info['devices'][dev_uuid][1]
return map(pci_dict_to_bdf_str, pci_conf['devs'])

+ def setMigrateConstraints(self, max_iters, max_factor, abort_if_busy):
+ """Set the Migrate Constraints of this domain.
+ @param max_iters: Number of iterations before final suspend
+ @param max_factor: Max amount of memory to transfer before final
suspend
+ @param abort_if_busy: Abort migration instead of doing final suspend
+ """
+ log.debug("Setting migration constraints of domain %s (%s) to '%s'
'%s' '%s'.",
+ self.info['name_label'], str(self.domid), max_iters,
max_factor, abort_if_busy)
+ self.info['max_iters'] = str(max_iters)
+ self.info['max_factor'] = str(max_factor)
+ self.info['abort_if_busy'] = str(abort_if_busy)
+
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
@param target: In MiB.
Index: xen-4.2.1-testing/tools/python/xen/xm/migrate.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xm/migrate.py
+++ xen-4.2.1-testing/tools/python/xen/xm/migrate.py
@@ -55,6 +55,18 @@ gopts.opt('change_home_server', short='c
fn=set_true, default=0,
use="Change home server for managed domains.")

+gopts.opt('max_iters', short='M', val='max_iters',
+ fn=set_int, default=0,
+ use="Number of iterations before final suspend (default: 30).")
+
+gopts.opt('max_factor', short='m', val='max_factor',
+ fn=set_int, default=0,
+ use="Max amount of memory to transfer before final suspend (default:
3*RAM).")
+
+gopts.opt('abort_if_busy', short='A',
+ fn=set_true, default=0,
+ use="Abort migration instead of doing final suspend.")
+
def help():
return str(gopts)

@@ -80,6 +92,10 @@ def main(argv):
server.xenapi.VM.migrate(vm_ref, dst, bool(opts.vals.live),
other_config)
else:
+ server.xend.domain.migrate_constraints_set(dom,
+ opts.vals.max_iters,
+ opts.vals.max_factor,
+ opts.vals.abort_if_busy)
server.xend.domain.migrate(dom, dst, opts.vals.live,
opts.vals.port,
opts.vals.node,
++++++ xen.sles11sp1.fate311487.xen_platform_pci.dmistring.patch ++++++
References: fate#311487

Provide a modalias entry in xen-plaform-pci.ko to allow early autoloading in
initrd based on /sys/class/dmi/id/modalias

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>

---
unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 13 +++++++++++++
1 file changed, 13 insertions(+)

Index:
xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
===================================================================
---
xen-4.2.0-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ xen-4.2.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -27,6 +27,7 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/version.h>
+#include <linux/dmi.h>
#include <linux/interrupt.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
@@ -477,6 +478,18 @@ static struct pci_device_id platform_pci

MODULE_DEVICE_TABLE(pci, platform_pci_tbl);

+static const struct dmi_system_id platform_dmi_tbl[] = {
+ {
+ .ident = "Xen PV-on-HVM",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Xen"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HVM domU"),
+ },
+ },
+ { },
+};
+MODULE_DEVICE_TABLE(dmi, platform_dmi_tbl);
+
static struct pci_driver platform_driver = {
name: DRV_NAME,
probe: platform_pci_init,
++++++ xen_pvdrivers.conf ++++++
# Install the paravirtualized drivers
install libata /sbin/modprobe xen-vbd 2>&1 |:; /sbin/modprobe --ignore-install
libata
install ata_piix /sbin/modprobe xen-vbd 2>&1 |:; /sbin/modprobe
--ignore-install ata_piix

install 8139cp /sbin/modprobe xen-vnif 2>&1 |:; /sbin/modprobe
--ignore-install 8139cp

install 8139too /sbin/modprobe xen-vnif 2>&1 |:; /sbin/modprobe
--ignore-install 8139too

++++++ xenapi-console-protocol.patch ++++++
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3954,6 +3954,14 @@ class XendDomainInfo:
if not config.has_key('backend'):
config['backend'] = "00000000-0000-0000-0000-000000000000"

+ if dev_class == 'console':
+ if not config.has_key('protocol'):
+ con_type = config.get('type', '')
+ if con_type == 'vnc':
+ config['protocol'] = 'rfb'
+ elif con_type == 'sdl':
+ config['protocol'] = 'rdp'
+
return config

def get_dev_property(self, dev_class, dev_uuid, field):
++++++ xenconsole-no-multiple-connections.patch ++++++
Index: xen-4.2.0-testing/tools/console/client/main.c
===================================================================
--- xen-4.2.0-testing.orig/tools/console/client/main.c
+++ xen-4.2.0-testing/tools/console/client/main.c
@@ -96,6 +96,7 @@ static int get_pty_fd(struct xs_handle *
* Assumes there is already a watch set in the store for this path. */
{
struct timeval tv;
+ struct flock lock;
fd_set watch_fdset;
int xs_fd = xs_fileno(xs), pty_fd = -1;
int start, now;
@@ -122,6 +123,12 @@ static int get_pty_fd(struct xs_handle *
if (pty_fd == -1)
err(errno, "Could not open tty `%s'",
pty_path);
+ memset(&lock, 0, sizeof(lock));
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ if (fcntl(pty_fd, F_SETLK, &lock) != 0)
+ err(errno, "Could not lock tty '%s'",
+ pty_path);
free(pty_path);
}
}
++++++ xend-config-enable-dump-comment.patch ++++++
bnc#684305

---
tools/examples/xend-config.sxp | 3 +++
1 file changed, 3 insertions(+)

Index: xen-4.2.0-testing/tools/examples/xend-config.sxp
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/xend-config.sxp
+++ xen-4.2.0-testing/tools/examples/xend-config.sxp
@@ -250,6 +250,9 @@
(dom0-cpus 0)

# Whether to enable core-dumps when domains crash.
+# This setting overrides the per-domain dump value 'on_crash' and causes a
+# core dump on all crashed domains. For finer grain control, it is best to
+# disable this setting (which is default) and use the per-domain controls.
#(enable-dump no)

# The tool used for initiating virtual TPM migration
++++++ xend-config.diff ++++++
Index: xen-4.2.0-testing/tools/hotplug/Linux/init.d/sysconfig.xendomains
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/init.d/sysconfig.xendomains
+++ xen-4.2.0-testing/tools/hotplug/Linux/init.d/sysconfig.xendomains
@@ -98,7 +98,6 @@ XENDOMAINS_RESTORE=true
# Note that the script tries to be clever if both RESTORE and AUTO are
# set: It will first restore saved domains and then only start domains
# in AUTO which are not running yet.
-# Note that the name matching is somewhat fuzzy.
#
XENDOMAINS_AUTO=/etc/xen/auto

Index: xen-4.2.0-testing/tools/examples/xend-config.sxp
===================================================================
--- xen-4.2.0-testing.orig/tools/examples/xend-config.sxp
+++ xen-4.2.0-testing/tools/examples/xend-config.sxp
@@ -58,11 +58,12 @@


#(xend-http-server no)
-#(xend-unix-server no)
+(xend-unix-server yes)
#(xend-tcp-xmlrpc-server no)
#(xend-unix-xmlrpc-server yes)
+# Only enable xend-relocation-server on trusted networks as it lacks
+# encryption and authentication.
#(xend-relocation-server no)
-(xend-relocation-server yes)
#(xend-relocation-ssl-server no)
#(xend-udev-event-server no)

@@ -170,7 +171,12 @@
# two fake interfaces per guest domain. To do things like this, write
# yourself a wrapper script, and call network-bridge from it, as appropriate.
#
-(network-script network-bridge)
+# SuSE users note:
+# On openSUSE >= 11.1 and SLES >= 11, networks should be configured using
+# native platform tool - YaST. vif-bridge and qemu-ifup can be used to
+# connect vifs to the YaST-managed networks.
+#(network-script network-bridge)
+(network-script )

# The script used to control virtual interfaces. This can be overridden on a
# per-vif basis when creating a domain or a configuring a new vif. The
@@ -203,7 +209,7 @@
# dom0-min-mem is the lowest permissible memory level (in MB) for dom0.
# This is a minimum both for auto-ballooning (as enabled by
# enable-dom0-ballooning below) and for xm mem-set when applied to dom0.
-(dom0-min-mem 196)
+(dom0-min-mem 512)

# Whether to enable auto-ballooning of dom0 to allow domUs to be created.
# If enable-dom0-ballooning = no, dom0 will never balloon out.
++++++ xend-console-port-restore.patch ++++++
Pass console_port to completeRestore() so that console/port is written to
xenstore. See bnc#706574

From: Chunyan Liu <cyliu@xxxxxxxxxx>

Index: xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendCheckpoint.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
@@ -342,8 +342,7 @@ def restore(xd, fd, dominfo = None, paus
restore_image.setCpuid()

# xc_restore will wait for source to close connection
-
- dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
+ dominfo.completeRestore(handler.store_mfn, handler.console_mfn,
console_port)

#
# We shouldn't hold the domains_lock over a waitForDevices
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3076,7 +3076,7 @@ class XendDomainInfo:
# TODO: recategorise - called from XendCheckpoint
#

- def completeRestore(self, store_mfn, console_mfn):
+ def completeRestore(self, store_mfn, console_mfn, console_port):

log.debug("XendDomainInfo.completeRestore")

@@ -3087,6 +3087,7 @@ class XendDomainInfo:
self.image = image.create(self, self.info)
if self.image:
self.image.createDeviceModel(True)
+ self.console_port = console_port
self._storeDomDetails()
self._registerWatches()
self.refreshShutdown()
++++++ xend-core-dump-loc.diff ++++++
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2318,7 +2318,7 @@ class XendDomainInfo:
# To prohibit directory traversal
based_name = os.path.basename(self.info['name_label'])

- coredir = "/var/xen/dump/%s" % (based_name)
+ coredir = "/var/lib/xen/dump/%s" % (based_name)
if not os.path.exists(coredir):
try:
mkdir.parents(coredir, stat.S_IRWXU)
++++++ xend-cpuid.patch ++++++
Only add cpuid and cpuid_check to sexpr once

When converting a XendConfig object to sexpr, cpuid and cpuid_check
were being emitted twice in the resulting sexpr. The first conversion
writes incorrect sexpr, causing parsing of the sexpr to fail when xend
is restarted and domain sexpr files in /var/lib/xend/domains/<dom-uuid>
are read and parsed.

This patch skips the first conversion, and uses only the custom
cpuid{_check} conversion methods called later. It is not pretty, but
is the least invasive fix in this complex code.
Index: xen-4.2.0-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.2.0-testing/tools/python/xen/xend/XendConfig.py
@@ -1126,6 +1126,10 @@ class XendConfig(dict):
else:
for name, typ in XENAPI_CFG_TYPES.items():
if name in self and self[name] not in (None, []):
+ # Skip cpuid and cpuid_check. Custom conversion
+ # methods for these are called below.
+ if name in ("cpuid", "cpuid_check"):
+ continue
if typ == dict:
s = self[name].items()
elif typ == list:
++++++ xend-devid-or-name.patch ++++++
# HG changeset patch
# User Jim Fehlig <jfehlig@xxxxxxxxxx>
# Date 1284948067 21600
# Node ID 4674ad11feef87a6a57b99313966e0e121588e1c
# Parent 5393151a737b023476f4e571effc547e758cf8c8
xend: Fix device_configure

The semantics of XendDomainInfo.py:device_configure() changed with xen upstream
c/s 19610. Previously this method would take a devid in actual id *or* name
form, e.g. it would accept '5632' or 'hdc'. This patch restores that behavior.

Signed-off-by: Jim Fehlig <jfehlig@xxxxxxxxxx>

Index: xen-4.2.0-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.0-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1203,6 +1203,9 @@ class XendDomainInfo:
except ValueError:
pass
devid = dev_control.convertToDeviceNumber(dev)
+ else:
+ # devid could be a name, e.g. hdc
+ devid = dev_control.convertToDeviceNumber(devid)
dev_info = self._getDeviceInfo_vbd(devid)
if dev_info is None:
raise VmError("Device %s not connected" % devid)
++++++ xend-disable-internal-logrotate.patch ++++++
Disable internal logging and enable the logrotate.conf from the xen package.
This allows larger xend.log files

---
tools/python/xen/xend/XendLogging.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: xen-4.2.0-testing/tools/python/xen/xend/XendLogging.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendLogging.py
+++ xen-4.2.0-testing/tools/python/xen/xend/XendLogging.py
@@ -76,7 +76,7 @@ if 'TRACE' not in logging.__dict__:
log = logging.getLogger("xend")


-MAX_BYTES = 1 << 20 # 1MB
+MAX_BYTES = 0
BACKUP_COUNT = 5

STDERR_FORMAT = "[%(name)s] %(levelname)s (%(module)s:%(lineno)d) %(message)s"
++++++ xend-domain-lock-sfex.patch ++++++
Index: xen-4.2.1-testing/tools/examples/xend-config.sxp
===================================================================
--- xen-4.2.1-testing.orig/tools/examples/xend-config.sxp
+++ xen-4.2.1-testing/tools/examples/xend-config.sxp
@@ -357,7 +357,7 @@
# path /<xend-domain-lock-path>/<vm-uuid>
# Return 0 on success, non-zero on error.
#
-# lock-util [-s] path"
+# lock-util [-s] -i <vm uuid> path"
# -s Lock status. If lock is acquired, print any contents
# on stdout and return 0. Return non-zero if lock is
# available.
@@ -383,6 +383,11 @@
#
#(xend-domain-lock-utility domain-lock)

+# Some locking mechanism provide cluster wide locking service like sfex.
+# And that requires a shared locking device.
+#(xend-domain-lock-utility domain-lock-sfex)
+#(xend-domain-lock-device "/dev/iwmvg/hbdevice")
+
# If we have a very big scsi device configuration, start of xend is slow,
# because xend scans all the device paths to build its internal PSCSI device
# list. If we need only a few devices for assigning to a guest, we can reduce
Index: xen-4.2.1-testing/tools/hotplug/Linux/Makefile
===================================================================
--- xen-4.2.1-testing.orig/tools/hotplug/Linux/Makefile
+++ xen-4.2.1-testing/tools/hotplug/Linux/Makefile
@@ -23,6 +23,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup
XEN_SCRIPTS += external-device-migrate
XEN_SCRIPTS += vscsi
XEN_SCRIPTS += domain-lock vm-monitor
+XEN_SCRIPTS += domain-lock-sfex
XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
XEN_SCRIPT_DATA += xen-hotplug-common.sh xen-network-common.sh vif-common.sh
XEN_SCRIPT_DATA += block-common.sh vtpm-common.sh vtpm-hotplug-common.sh
Index: xen-4.2.1-testing/tools/hotplug/Linux/domain-lock
===================================================================
--- xen-4.2.1-testing.orig/tools/hotplug/Linux/domain-lock
+++ xen-4.2.1-testing/tools/hotplug/Linux/domain-lock
@@ -4,7 +4,7 @@ basedir=$(dirname "$0")

usage() {
echo "usage: domain-lock [-l|-u] -n <vm name> -i <vm uuid> -p <physical
host> path"
- echo "usage: domain-lock [-s] path"
+ echo "usage: domain-lock [-s] -i <vm uuid> path"
echo ""
echo "-l lock"
echo "-u unlock"
Index: xen-4.2.1-testing/tools/hotplug/Linux/domain-lock-sfex
===================================================================
--- /dev/null
+++ xen-4.2.1-testing/tools/hotplug/Linux/domain-lock-sfex
@@ -0,0 +1,166 @@
+#!/bin/bash
+
+# pre-condition
+# 1. device is ready: logical volume activated if used
+# 2. device already initialized
+# 3. index is assigned correctly
+
+#error code:
+# 0: success
+# 1: error
+
+if [ `uname -m` = "x86_64" ]; then
+ SFEX_DAEMON=/usr/lib64/heartbeat/sfex_daemon
+else
+ SFEX_DAEMON=/usr/lib/heartbeat/sfex_daemon
+fi
+SFEX_INIT=/usr/sbin/sfex_init
+COLLISION_TIMEOUT=1
+LOCK_TIMEOUT=3
+MONITOR_INTERVAL=2
+LOCAL_LOCK_FILE=/var/lock/sfex
+
+usage() {
+ echo "usage: domain-lock-sfex [-l|-u|-s] -i <vm uuid> -x <sfex device>"
+ echo ""
+ echo "-l lock"
+ echo "-u unlock"
+ echo "-s status (default)"
+ echo "-i Virtual Machine Id or UUID"
+ echo "-x SFEX device which used for sfex lock"
+ exit 1
+}
+
+get_lock_host() {
+ local rscname=$1
+ local device=$2
+ r=`$SFEX_DAEMON -s -u $rscname $device`
+ echo $r
+}
+
+get_status() {
+ local rscname=$1
+ if /usr/bin/pgrep -f "$SFEX_DAEMON .* ${rscname} " > /dev/null 2>&1; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+acquire_lock() {
+ local rscname=$1
+ local device=$2
+ get_status $rscname
+ ## We assume xend will take care to avoid starting same VM twice on the same
machine.
+ if [ $? -eq 0 ]; then
+ return 0
+ fi
+ $SFEX_DAEMON -c $COLLISION_TIMEOUT -t $LOCK_TIMEOUT -m $MONITOR_INTERVAL -u
$rscname $device
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ return $rc
+ fi
+ sleep 4
+ get_status $rscname
+ if [ $? -eq 0 ]; then
+ return 0
+ fi
+ return 1
+}
+
+# release has to success
+release_lock(){
+ local rscname=$1
+
+ ## If the lock is already released
+ get_status $rscname
+ if [ $? -ne 0 ]; then
+ return 0
+ fi
+
+ pid=`/usr/bin/pgrep -f "$SFEX_DAEMON .* ${rscname} "`
+ /bin/kill $pid
+
+ count=0
+ while [ $count -lt 10 ]
+ do
+ get_status $rscname
+ if [ $? -eq 1 ]; then
+ return 0
+ fi
+ count=`expr $count + 1`
+ sleep 1
+ done
+
+ /bin/kill -9 $pid
+ while :
+ do
+ get_status $rscname
+ if [ $? -eq 1 ]; then
+ break;
+ fi
+ sleep 1
+ done
+
+ return 0
+}
+
+mode="status"
+
+while getopts ":lusn:i:p:x:" opt; do
+case $opt in
+l )
+mode="lock"
+;;
+u )
+mode="unlock"
+;;
+s )
+mode="status"
+;;
+n )
+vm_name=$OPTARG
+;;
+i )
+vm_uuid=$OPTARG
+;;
+p )
+vm_host=$OPTARG
+;;
+x )
+vm_sfex_device=$OPTARG
+;;
+\? )
+usage
+;;
+esac
+done
+
+shift $(($OPTIND - 1))
+[ -z $vm_uuid ] && usage
+[ -z $vm_sfex_device ] && usage
+
+case $mode in
+lock )
+ (
+ flock -x 200
+ acquire_lock $vm_uuid $vm_sfex_device
+ rc=$?
+ flock -u 200
+ exit $rc
+ ) 200>$LOCAL_LOCK_FILE-$vm_uuid
+;;
+unlock )
+ (
+ flock -x 200
+ release_lock $vm_uuid
+ rc=$?
+ flock -u 200
+ exit $rc
+ ) 200>$LOCAL_LOCK_FILE-$vm_uuid
+;;
+status )
+ get_lock_host $vm_uuid $vm_sfex_device
+;;
+esac
+
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -4570,8 +4570,14 @@ class XendDomainInfo:

# Return name of host contained in lock file.
def get_lock_host(self, path):
- fin = os.popen(xoptions.get_xend_domain_lock_utility() + \
- ' -s ' + path, 'r')
+ lock_cmd = '%s -s -i %s ' % \
+ (xoptions.get_xend_domain_lock_utility(), \
+ self.info['uuid'])
+ lock_dev = xoptions.get_xend_domain_lock_device()
+ if lock_dev:
+ lock_cmd += '-x %d ' % lock_dev
+ lock_cmd += path
+ fin = os.popen(lock_cmd, 'r')
hostname = "unknown"

try:
@@ -4593,6 +4599,16 @@ class XendDomainInfo:
path = xoptions.get_xend_domain_lock_path()
path = os.path.join(path, self.get_uuid())

+ lock_cmd = '%s -l -p %s -n %s -i %s ' % \
+ (xoptions.get_xend_domain_lock_utility(), \
+ XendNode.instance().get_name(), \
+ self.info['name_label'], \
+ self.info['uuid'])
+ lock_dev = xoptions.get_xend_domain_lock_device()
+ if lock_dev:
+ lock_cmd += '-x %d ' % lock_dev
+ lock_cmd += path
+
try:
if not os.path.exists(path):
mkdir.parents(path, stat.S_IRWXU)
@@ -4600,12 +4616,7 @@ class XendDomainInfo:
log.exception("%s could not be created." % path)
raise XendError("%s could not be created." % path)

- status = os.system('%s -l -p %s -n %s -i %s %s' % \
- (xoptions.get_xend_domain_lock_utility(), \
- XendNode.instance().get_name(), \
- self.info['name_label'], \
- self.info['uuid'], \
- path))
+ status = os.system(lock_cmd) >> 8
if status != 0:
log.debug("Failed to aqcuire lock: status = %d" % status)
raise XendError("The VM is locked and appears to be running on
host %s." % self.get_lock_host(path))
@@ -4622,12 +4633,18 @@ class XendDomainInfo:

path = xoptions.get_xend_domain_lock_path()
path = os.path.join(path, self.get_uuid())
- status = os.system('%s -u -p %s -n %s -i %s %s' % \
- (xoptions.get_xend_domain_lock_utility(), \
- XendNode.instance().get_name(), \
- dom_name, \
- self.info['uuid'], \
- path))
+
+ lock_cmd = '%s -u -p %s -n %s -i %s ' % \
+ (xoptions.get_xend_domain_lock_utility(), \
+ XendNode.instance().get_name(), \
+ dom_name, \
+ self.info['uuid'])
+ lock_dev = xoptions.get_xend_domain_lock_device()
+ if lock_dev:
+ lock_cmd += '-x %d ' % lock_dev
+ lock_cmd += path
+
+ status = os.system(lock_cmd) >> 8
if status != 0:
log.exception("Failed to release lock: status = %s" % status)
try:
Index: xen-4.2.1-testing/tools/python/xen/xend/XendNode.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendNode.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendNode.py
@@ -162,6 +162,7 @@ class XendNode:

self._init_cpu_pools()

+ self._init_lock_devices()

def _init_networks(self):
# Initialise networks
@@ -382,6 +383,17 @@ class XendNode:
XendCPUPool.recreate_active_pools()


+ def _init_lock_devices(self):
+ if xendoptions().get_xend_domain_lock():
+ if
xendoptions().get_xend_domain_lock_utility().endswith("domain-lock-sfex"):
+ lock_device = xendoptions().get_xend_domain_lock_device()
+ if not lock_device:
+ raise XendError("The block device for sfex is not properly
configured")
+ status = os.system("lvchange -ay %s" % lock_device) >> 8
+ if status != 0:
+ raise XendError("The block device for sfex could not be
initialized")
+
+
def add_network(self, interface):
# TODO
log.debug("add_network(): Not implemented.")
Index: xen-4.2.1-testing/tools/python/xen/xend/XendOptions.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendOptions.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendOptions.py
@@ -164,6 +164,9 @@ class XendOptions:
"""Default script to acquire/release domain lock"""
xend_domain_lock_utility = auxbin.scripts_dir() + "/domain-lock"

+ """Default block device for lock service"""
+ xend_domain_lock_device = ""
+

def __init__(self):
self.configure()
@@ -430,6 +433,8 @@ class XendOptions:
else:
return self.xend_domain_lock_utility

+ def get_xend_domain_lock_device(self):
+ return self.get_config_string('xend-domain-lock-device',
self.xend_domain_lock_device)

def get_vnc_tls(self):
return self.get_config_string('vnc-tls', self.xend_vnc_tls)
++++++ xend-domain-lock.patch ++++++
---
tools/examples/xend-config.sxp | 59 ++++++++++++++++++++++
tools/hotplug/Linux/Makefile | 1
tools/hotplug/Linux/domain-lock | 83 ++++++++++++++++++++++++++++++++
tools/hotplug/Linux/vm-monitor | 41 +++++++++++++++
tools/python/xen/xend/XendCheckpoint.py | 3 +
tools/python/xen/xend/XendDomainInfo.py | 74 ++++++++++++++++++++++++++++
tools/python/xen/xend/XendOptions.py | 29 +++++++++++
7 files changed, 290 insertions(+)

Index: xen-4.2.1-testing/tools/examples/xend-config.sxp
===================================================================
--- xen-4.2.1-testing.orig/tools/examples/xend-config.sxp
+++ xen-4.2.1-testing/tools/examples/xend-config.sxp
@@ -324,6 +324,65 @@
# device assignment could really work properly even after we do this.
#(pci-passthrough-strict-check yes)

+# Domain Locking
+# In a multihost environment, domain locking provides a simple mechanism that
+# prevents simultaneously running a domain on more than one host.
+#
+# If enabled, xend will execute a external lock utility (defined below)
+# on each domain start and stop event. Disabled by default. Set to yes
+# to enable domain locking.
+#
+#(xend-domain-lock no)
+
+# Path where domain lock is stored if xend-domain-lock is enabled.
+# Note: This path must be accessible to all VM Servers participating
+# in domain locking, e.g. by specifying a shared mount point.
+# Lock is placed in /<xend-domain-lock-path>/<domain-uuid>.
+# Default is /var/lib/xen/images/vm_locks/
+#
+#(xend-domain-lock-path /var/lib/images/vm_locks)
+
+# External locking utility called by xend for acquiring/releasing
+# domain lock. By default /etc/xen/scripts/domain-lock will be used
+# if xend-domain-lock is set to yes. Set to path of custom locking
+# utility to override the default.
+#
+# Synopsis of lock-util:
+# lock-util [-l|-u] -n <vm name> -i <vm uuid> -p <physical host> path"
+# -l Acquire (create) lock
+# -u Remove lock
+# -n vm-name Name of domain
+# -i vm-id Id or UUID of domain
+# -p phy-host Name of physical host (dom0)
+# path /<xend-domain-lock-path>/<vm-uuid>
+# Return 0 on success, non-zero on error.
+#
+# lock-util [-s] path"
+# -s Lock status. If lock is acquired, print any contents
+# on stdout and return 0. Return non-zero if lock is
+# available.
+# path /<xend-domain-lock-path>/<vm-uuid>
+# If lock is acquired, print any contents on stdout and return 0.
+# Return non-zero if lock is available.
+#
+# Default lock-util behavior:
+# On domain start event, domain-lock will create and flock(1)
+# /<xend-domain-lock-path>/<vm-uuid>/lock. Every two seconds it
+# will write <vm-name>, <vm-id>, <vm-host>, and <tick> to the lock.
+# <tick> is running counter.
+# On domain stop event, domain-lock will unlock and remove
+# /<xend-domain-lock-path>/<vm-uuid>/lock.
+#
+# Note: If xend-domain-lock-path is a cluster-unaware file system,
+# administrator intervention may be required to remove stale
+# locks. Consider two hosts using NFS for xend-domain-lock-path
+# when HostA, running vm1, crashes. HostB could not acquire a
+# lock for vm1 since the NFS server holds an exclusive lock
+# acquired by HostA. The lock file must be manually removed
+# before starting vm1 on HostB.
+#
+#(xend-domain-lock-utility domain-lock)
+
# If we have a very big scsi device configuration, start of xend is slow,
# because xend scans all the device paths to build its internal PSCSI device
# list. If we need only a few devices for assigning to a guest, we can reduce
Index: xen-4.2.1-testing/tools/hotplug/Linux/Makefile
===================================================================
--- xen-4.2.1-testing.orig/tools/hotplug/Linux/Makefile
+++ xen-4.2.1-testing/tools/hotplug/Linux/Makefile
@@ -22,6 +22,7 @@ XEN_SCRIPTS += vtpm vtpm-delete
XEN_SCRIPTS += xen-hotplug-cleanup
XEN_SCRIPTS += external-device-migrate
XEN_SCRIPTS += vscsi
+XEN_SCRIPTS += domain-lock vm-monitor
XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
XEN_SCRIPT_DATA += xen-hotplug-common.sh xen-network-common.sh vif-common.sh
XEN_SCRIPT_DATA += block-common.sh vtpm-common.sh vtpm-hotplug-common.sh
Index: xen-4.2.1-testing/tools/hotplug/Linux/domain-lock
===================================================================
--- /dev/null
+++ xen-4.2.1-testing/tools/hotplug/Linux/domain-lock
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+basedir=$(dirname "$0")
+
+usage() {
+ echo "usage: domain-lock [-l|-u] -n <vm name> -i <vm uuid> -p <physical
host> path"
+ echo "usage: domain-lock [-s] path"
+ echo ""
+ echo "-l lock"
+ echo "-u unlock"
+ echo "-s status (default)"
+ echo "-n Virtual Machine name"
+ echo "-i Virtual Machine Id or UUID"
+ echo "-p Virtual Machine Server (physical host) name"
+ echo "path A per-VM, unique location where external lock will be managed"
+ exit 1
+}
+
+remove_lock(){
+ local path=$1/lock
+ local name=$2
+
+ pid=`ps -efwww | grep vm-monitor | grep $name | awk '{print $2}'`
+ if [ -n "$pid" ]; then
+ kill $pid
+ rm -f $path
+ fi
+}
+
+get_status(){
+ local path=$1/lock
+ [ -f $path ] || exit 1
+
+ rc=`flock -xn $path /bin/true`
+ cat $path
+ exit $rc
+}
+
+mode="status"
+
+while getopts ":lusn:i:p:" opt; do
+ case $opt in
+ l )
+ mode="lock"
+ ;;
+ u )
+ mode="unlock"
+ ;;
+ s )
+ mode="status"
+ ;;
+ p )
+ vm_host=$OPTARG
+ ;;
+ n )
+ vm_name=$OPTARG
+ ;;
+ i )
+ vm_uuid=$OPTARG
+ ;;
+ \? )
+ usage
+ ;;
+ esac
+done
+
+shift $(($OPTIND - 1))
+vm_path=$1
+
+case $mode in
+ lock )
+ [ -z "$vm_path" ] || [ -z "$vm_name" ] || [ -z "$vm_uuid" ] || [ -z
"$vm_host" ] && usage
+ $basedir/set-lock $vm_path $vm_name $vm_uuid $vm_host
+ ;;
+ unlock )
+ [ -z "$vm_path" ] || [ -z "$vm_name" ] || [ -z "$vm_uuid" ] || [ -z
"$vm_host" ] && usage
+ remove_lock $vm_path $vm_name $vm_uuid $vm_host
+ ;;
+ status )
+ [ -z "$vm_path" ] && usage
+ get_status $vm_path
+ ;;
+esac
Index: xen-4.2.1-testing/tools/hotplug/Linux/vm-monitor
===================================================================
--- /dev/null
+++ xen-4.2.1-testing/tools/hotplug/Linux/vm-monitor
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+basedir=$(dirname "$0")
+HA_TICK=2
+
+monitor() {
+ local path=$1
+ local name=$2
+ local uuid=$3
+ local host=$4
+ local count=0
+ path=$path/lock
+
+ while :
+ do
+ echo "name=$name uuid=$uuid host=$host count=$count" > $path
+ count=$(($count+1))
+ sleep $HA_TICK
+ done&
+}
+
+create_lock() {
+ local path=$1/lock
+ local rc=0
+
+ [ -f $path ] || touch $path
+ flock -x -w $HA_TICK $path $basedir/vm-monitor $*
+ rc=$?
+ if [ $rc -eq 1 ]; then
+ echo `cat $path`
+ exit 1
+ else
+ exit $rc
+ fi
+}
+
+if [ $0 = "$basedir/set-lock" ]; then
+ create_lock $*
+elif [ $0 = "$basedir/vm-monitor" ]; then
+ monitor $*
+fi
Index: xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendCheckpoint.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
@@ -136,6 +136,11 @@ def save(fd, dominfo, network, live, dst
str( int(live) | (int(hvm) << 2) | (int(abort_if_busy) << 5) |
(int(log_save_progress) << 6) ) ]
log.debug("[xc_save]: %s", string.join(cmd))

+ # It is safe to release the domain lock at this point if not
+ # checkpointing
+ if checkpoint == False:
+ dominfo.release_running_lock(domain_name)
+
def saveInputHandler(line, tochild):
log.debug("In saveInputHandler %s", line)
if line == "suspend":
@@ -200,6 +205,9 @@ def save(fd, dominfo, network, live, dst
log.exception("Save failed on domain %s (%s) - resuming.", domain_name,
dominfo.getDomid())
dominfo.resumeDomain()
+ # Reacquire the domain lock
+ if checkpoint == False:
+ dominfo.acquire_running_lock()

try:
dominfo.setName(domain_name)
@@ -366,6 +374,7 @@ def restore(xd, fd, dominfo = None, paus
if not paused:
dominfo.unpause()

+ dominfo.acquire_running_lock()
return dominfo
except Exception, exn:
dominfo.destroy()
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -486,6 +486,7 @@ class XendDomainInfo:
if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED,
XEN_API_VM_POWER_STATE_SUSPENDED, XEN_API_VM_POWER_STATE_CRASHED):
try:
prepare_domain_pci_devices(self.info);
+ self.acquire_running_lock();
XendTask.log_progress(0, 30, self._constructDomain)
XendTask.log_progress(31, 60, self._initDomain)

@@ -3053,6 +3054,11 @@ class XendDomainInfo:

self._stateSet(DOM_STATE_HALTED)
self.domid = None # Do not push into _stateSet()!
+
+ try:
+ self.release_running_lock()
+ except:
+ log.exception("Failed to release domain lock.")
finally:
self.refresh_shutdown_lock.release()

@@ -4562,6 +4568,74 @@ class XendDomainInfo:
def has_device(self, dev_class, dev_uuid):
return (dev_uuid in self.info['%s_refs' % dev_class.lower()])

+ # Return name of host contained in lock file.
+ def get_lock_host(self, path):
+ fin = os.popen(xoptions.get_xend_domain_lock_utility() + \
+ ' -s ' + path, 'r')
+ hostname = "unknown"
+
+ try:
+ tokens = fin.readline().split()
+ for token in tokens:
+ item = token.split('=')
+ if item[0] == 'host':
+ hostname = item[1]
+ return hostname
+ finally:
+ fin.close()
+
+ # Acquire a lock for the domain. No-op if domain locking is turned off.
+ def acquire_running_lock(self):
+ if not xoptions.get_xend_domain_lock():
+ return
+
+ log.debug("Acquiring lock for domain %s" % self.info['name_label'])
+ path = xoptions.get_xend_domain_lock_path()
+ path = os.path.join(path, self.get_uuid())
+
+ try:
+ if not os.path.exists(path):
+ mkdir.parents(path, stat.S_IRWXU)
+ except:
+ log.exception("%s could not be created." % path)
+ raise XendError("%s could not be created." % path)
+
+ status = os.system('%s -l -p %s -n %s -i %s %s' % \
+ (xoptions.get_xend_domain_lock_utility(), \
+ XendNode.instance().get_name(), \
+ self.info['name_label'], \
+ self.info['uuid'], \
+ path))
+ if status != 0:
+ log.debug("Failed to aqcuire lock: status = %d" % status)
+ raise XendError("The VM is locked and appears to be running on
host %s." % self.get_lock_host(path))
+
+ # Release lock for domain. No-op if domain locking is turned off.
+ def release_running_lock(self, name = None):
+ if not xoptions.get_xend_domain_lock():
+ return
+
+ dom_name = self.info['name_label']
+ if name:
+ dom_name = name
+ log.debug("Releasing lock for domain %s" % dom_name)
+
+ path = xoptions.get_xend_domain_lock_path()
+ path = os.path.join(path, self.get_uuid())
+ status = os.system('%s -u -p %s -n %s -i %s %s' % \
+ (xoptions.get_xend_domain_lock_utility(), \
+ XendNode.instance().get_name(), \
+ dom_name, \
+ self.info['uuid'], \
+ path))
+ if status != 0:
+ log.exception("Failed to release lock: status = %s" % status)
+ try:
+ if len(os.listdir(path)) == 0:
+ shutil.rmtree(path)
+ except:
+ log.exception("Failed to remove unmanaged directory %s." % path)
+
def __str__(self):
return '<domain id=%s name=%s memory=%s state=%s>' % \
(str(self.domid), self.info['name_label'],
Index: xen-4.2.1-testing/tools/python/xen/xend/XendOptions.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendOptions.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendOptions.py
@@ -154,6 +154,17 @@ class XendOptions:
use loose check automatically if necessary."""
pci_dev_assign_strict_check_default = True

+ """Default for the flag indicating whether xend should create
+ a lock file for domains when they are started."""
+ xend_domain_lock = 'no'
+
+ """Default domain lock storage path."""
+ xend_domain_lock_path_default = '/var/lib/xen/images/vm_locks'
+
+ """Default script to acquire/release domain lock"""
+ xend_domain_lock_utility = auxbin.scripts_dir() + "/domain-lock"
+
+
def __init__(self):
self.configure()

@@ -401,6 +412,24 @@ class XendOptions:
else:
return None

+ def get_xend_domain_lock(self):
+ """Get the flag indicating whether xend should create a lock file
+ for domains when they are started."""
+ return self.get_config_bool("xend-domain-lock", self.xend_domain_lock)
+
+ def get_xend_domain_lock_path(self):
+ """ Get the path for domain lock storage
+ """
+ return self.get_config_string("xend-domain-lock-path",
self.xend_domain_lock_path_default)
+
+ def get_xend_domain_lock_utility(self):
+ s = self.get_config_string('xend-domain-lock-utility')
+
+ if s:
+ return os.path.join(auxbin.scripts_dir(), s)
+ else:
+ return self.xend_domain_lock_utility
+

def get_vnc_tls(self):
return self.get_config_string('vnc-tls', self.xend_vnc_tls)
++++++ xend-hvm-firmware-passthrough.patch ++++++
fate#313584: pass bios information to XEN HVM guest

Index: xen-4.2.1-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xm/create.py
+++ xen-4.2.1-testing/tools/python/xen/xm/create.py
@@ -491,6 +491,14 @@ gopts.var('nfs_root', val="PATH",
fn=set_value, default=None,
use="Set the path of the root NFS directory.")

+gopts.var('smbios_firmware', val='FILE',
+ fn=set_value, default=None,
+ use="Path to a file that contains extra SMBIOS firmware structures.")
+
+gopts.var('acpi_firmware', val='FILE',
+ fn=set_value, default=None,
+ use="Path to a file that contains extra ACPI firmware tables.")
+
gopts.var('device_model', val='FILE',
fn=set_value, default=None,
use="Path to device model program.")
@@ -1097,6 +1105,7 @@ def configure_hvm(config_image, vals):
'boot',
'cpuid', 'cpuid_check',
'device_model', 'display',
+ 'smbios_firmware', 'acpi_firmware',
'fda', 'fdb',
'gfx_passthru', 'guest_os_type',
'hap', 'hpet',
Index: xen-4.2.1-testing/tools/python/xen/xm/xenapi_create.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xm/xenapi_create.py
+++ xen-4.2.1-testing/tools/python/xen/xm/xenapi_create.py
@@ -1086,6 +1086,8 @@ class sxp2xml:
'apic',
'boot',
'device_model',
+ 'smbios_firmware',
+ 'acpi_firmware',
'loader',
'fda',
'fdb',
Index: xen-4.2.1-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.2.1-testing/tools/python/xen/xend/image.py
@@ -17,7 +17,7 @@
#============================================================================


-import os, os.path, string
+import os, os.path, string, struct, stat
import re
import math
import time
@@ -123,6 +123,8 @@ class ImageHandler:

self.device_model = vmConfig['platform'].get('device_model')

+ self.smbios_firmware
=(str(vmConfig['platform'].get('smbios_firmware')))
+ self.acpi_firmware =(str(vmConfig['platform'].get('acpi_firmware')))
self.display = vmConfig['platform'].get('display')
self.xauthority = vmConfig['platform'].get('xauthority')
self.vncconsole = int(vmConfig['platform'].get('vncconsole', 0))
@@ -945,6 +947,38 @@ class HVMImageHandler(ImageHandler):
self.vm.getDomid() ])
return args

+ def _readFirmwareFile(self, filename):
+ # Sanity check
+ if filename is None or filename.strip() == "":
+ size = struct.pack('i', int(0))
+ return size + ""
+
+ log.debug("Reading firmware file %s", filename)
+ # Open
+ try:
+ fd = os.open(filename, os.O_RDONLY)
+ except Exception, e:
+ raise VmError('Unable to open firmware file %s' % filename)
+
+ # Validate file size
+ statinfo = os.fstat(fd)
+ if statinfo.st_size == 0 or statinfo.st_size > sys.maxint:
+ os.close(fd)
+ raise VmError('Firmware file %s is an invalid size' % filename)
+ if not stat.S_ISREG(statinfo.st_mode):
+ os.close(fd)
+ raise VmError('Firmware file %s is an invalid file type' %
filename)
+ size = struct.pack('i', statinfo.st_size)
+
+ # Read entire file
+ try:
+ buf = os.read(fd, statinfo.st_size)
+ except Exception, e:
+ os.close(fd)
+ raise VmError('Failed reading firmware file %s' % filename)
+ os.close(fd)
+ return size+buf
+
def buildDomain(self):
store_evtchn = self.vm.getStorePort()

@@ -960,6 +994,8 @@ class HVMImageHandler(ImageHandler):
log.debug("vcpu_avail = %li", self.vm.getVCpuAvail())
log.debug("acpi = %d", self.acpi)
log.debug("apic = %d", self.apic)
+ log.debug("smbios_firmware= %s", self.smbios_firmware)
+ log.debug("acpi_firmware = %s", self.acpi_firmware)

rc = xc.hvm_build(domid = self.vm.getDomid(),
image = self.loader,
@@ -968,7 +1004,9 @@ class HVMImageHandler(ImageHandler):
vcpus = self.vm.getVCpuCount(),
vcpu_avail = self.vm.getVCpuAvail(),
acpi = self.acpi,
- apic = self.apic)
+ apic = self.apic,
+ smbios_firmware=
self._readFirmwareFile(self.smbios_firmware),
+ acpi_firmware =
self._readFirmwareFile(self.acpi_firmware))
rc['notes'] = { 'SUSPEND_CANCEL': 1 }

rc['store_mfn'] = xc.hvm_get_param(self.vm.getDomid(),
Index: xen-4.2.1-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendConfig.py
@@ -147,6 +147,8 @@ XENAPI_PLATFORM_CFG_TYPES = {
'apic': int,
'boot': str,
'device_model': str,
+ 'smbios_firmware': str,
+ 'acpi_firmware': str,
'loader': str,
'display' : str,
'fda': str,
@@ -515,6 +517,10 @@ class XendConfig(dict):
self['platform']['nomigrate'] = 0

if self.is_hvm():
+ if 'smbios_firmware' not in self['platform']:
+ self['platform']['smbios_firmware'] = ""
+ if 'acpi_firmware' not in self['platform']:
+ self['platform']['acpi_firmware'] = ""
if 'timer_mode' not in self['platform']:
self['platform']['timer_mode'] = 1
if 'viridian' not in self['platform']:
Index: xen-4.2.1-testing/tools/python/xen/lowlevel/xc/xc.c
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/lowlevel/xc/xc.c
+++ xen-4.2.1-testing/tools/python/xen/lowlevel/xc/xc.c
@@ -942,18 +942,23 @@ static PyObject *pyxc_hvm_build(XcObject
struct hvm_info_table *va_hvm;
uint8_t *va_map, sum;
#endif
- int i;
- char *image;
+ int i, datalen;
+ char *image, *smbios_str, *acpi_str;
int memsize, target=-1, vcpus = 1, acpi = 0, apic = 1;
+ PyObject *acpi_firmware = NULL;
+ PyObject *smbios_firmware = NULL;
PyObject *vcpu_avail_handle = NULL;
uint8_t vcpu_avail[(HVM_MAX_VCPUS + 7)/8];
+ struct xc_hvm_build_args hvm_args = {};

static char *kwd_list[] = { "domid",
"memsize", "image", "target", "vcpus",
- "vcpu_avail", "acpi", "apic", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOii", kwd_list,
+ "vcpu_avail", "acpi", "apic",
+ "smbios_firmware", "acpi_firmware", NULL };
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOiiOO", kwd_list,
&dom, &memsize, &image, &target, &vcpus,
- &vcpu_avail_handle, &acpi, &apic) )
+ &vcpu_avail_handle, &acpi,
+ &apic, &smbios_firmware, &acpi_firmware)
)
return NULL;

memset(vcpu_avail, 0, sizeof(vcpu_avail));
@@ -984,8 +989,38 @@ static PyObject *pyxc_hvm_build(XcObject
if ( target == -1 )
target = memsize;

- if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
- target, image) != 0 )
+ memset(&hvm_args, 0, sizeof(struct xc_hvm_build_args));
+ hvm_args.mem_size = (uint64_t)memsize << 20;
+ hvm_args.mem_target = (uint64_t)target << 20;
+ hvm_args.image_file_name = image;
+
+ if ( PyString_Check(smbios_firmware ) )
+ {
+ smbios_str = PyString_AsString(smbios_firmware);
+ if ( smbios_str )
+ {
+ datalen = *(int *)smbios_str;
+ if ( datalen ) {
+ hvm_args.smbios_module.data = &((uint8_t *)smbios_str)[4];
+ hvm_args.smbios_module.length = (uint32_t)datalen;
+ }
+ }
+ }
+
+ if ( PyString_Check(acpi_firmware ) )
+ {
+ acpi_str = PyString_AsString(acpi_firmware);
+ if (acpi_str)
+ {
+ datalen = *(int *)acpi_str;
+ if ( datalen ) {
+ hvm_args.acpi_module.data = &((uint8_t *)acpi_str)[4];
+ hvm_args.acpi_module.length = (uint32_t)datalen;
+ }
+ }
+ }
+
+ if ( xc_hvm_build(self->xc_handle, dom, &hvm_args) != 0 )
return pyxc_error_to_exception(self->xc_handle);

#if !defined(__ia64__)
Index: xen-4.2.1-testing/docs/man/xmdomain.cfg.pod.5
===================================================================
--- xen-4.2.1-testing.orig/docs/man/xmdomain.cfg.pod.5
+++ xen-4.2.1-testing/docs/man/xmdomain.cfg.pod.5
@@ -243,6 +243,25 @@ this the xen kernel must be compiled wit

This defaults to 1, meaning running the domain as a UP.

+=item B<acpi_firmware>
+
+Specify a path to a file that contains extra ACPI firmware tables to pass in to
+a guest. The file can contain several tables in their binary AML form
+concatenated together. Each table self describes its length so no additional
+information is needed. These tables will be added to the ACPI table set in the
+guest. Note that existing tables cannot be overridden by this feature. For
+example this cannot be used to override tables like DSDT, FADT, etc.
+
+=item B<smbios_firmware>
+
+Specify a path to a file that contains extra SMBIOS firmware structures to pass
+in to a guest. The file can contain a set DMTF predefined structures which will
+override the internal defaults. Not all predefined structures can be
overridden,
+only the following types: 0, 1, 2, 3, 11, 22, 39. The file can also contain any
+number of vendor defined SMBIOS structures (type 128 - 255). Since SMBIOS
+structures do not present their overall size, each entry in the file must be
+preceded by a 32b integer indicating the size of the next structure.
+
=back

=head1 DOMAIN SHUTDOWN OPTIONS
Index: xen-4.2.1-testing/tools/python/README.sxpcfg
===================================================================
--- xen-4.2.1-testing.orig/tools/python/README.sxpcfg
+++ xen-4.2.1-testing/tools/python/README.sxpcfg
@@ -51,6 +51,8 @@ image
- vncunused
(HVM)
- device_model
+ - smbios_firmware
+ - acpi_firmware
- display
- xauthority
- vncconsole
Index: xen-4.2.1-testing/tools/python/README.XendConfig
===================================================================
--- xen-4.2.1-testing.orig/tools/python/README.XendConfig
+++ xen-4.2.1-testing/tools/python/README.XendConfig
@@ -120,6 +120,8 @@ otherConfig
image.vncdisplay
image.vncunused
image.hvm.device_model
+ image.hvm.smbios_firmware
+ image.hvm.apci_firmware
image.hvm.display
image.hvm.xauthority
image.hvm.vncconsole
++++++ xend-migration-domname-fix.patch ++++++
setName() writes the new name to xenstore/Dompath too, so that those read
domname from xenstore (like 'virsh list') could get correct value.
2nd hunk prevents writing xenstore if not "checkpoint", otherwise, vm
destroyed but there is still VM entry in xenstore.

Signed-off-by: Chunyan Liu <cyliu@xxxxxxxxxx>
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1961,6 +1961,8 @@ class XendDomainInfo:
self.info['name_label'] = name
if to_store:
self.storeVm("name", name)
+ if self.dompath:
+ self.storeDom("name", name)

def getName(self):
return self.info['name_label']
Index: xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendCheckpoint.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendCheckpoint.py
@@ -185,7 +185,10 @@ def save(fd, dominfo, network, live, dst
dominfo.destroy()
dominfo.testDeviceComplete()
try:
- dominfo.setName(domain_name)
+ if checkpoint:
+ dominfo.setName(domain_name)
+ else:
+ dominfo.setName(domain_name, False)
except VmError:
# Ignore this. The name conflict (hopefully) arises because we
# are doing localhost migration; if we are doing a suspend of a
++++++ xend-relocation-server.fw ++++++
## Name: Xend Relocation Server
## Description: Enables xend relocation service

TCP="8002 8003"
++++++ xend-relocation.sh ++++++
#!/bin/bash
#============================================================================
# xend-relocation
#
# Version = 1.0.3
# Date = 2007-09-14
#
# Maintainer(s) = Ron Terry - ron (at) pronetworkconsulting (dot) com
#
# The latest version can be found at:
#
# http://pronetworkconsulting.com/linux/scripts/xend-relocation.html
#
# Description:
#
# This script is used to enable or disable the VM relocation (migration)
# feature of xend. It can be used to manage the local instance of xend
# or both the local instance and instances of xend on the other machines
# to/from which VMs can be relocated.
# To manage the instances of xend on other machines this script communicates
# using ssh so it is recomended that if you use this feature you
# pre-distribute ssh keys between the servers.
#
# Depends on:
#
# Can use: /etc/sysconfig/xend
#
# Usage: xend-relocation (start|stop|status)
# or
# xend-relocation (on|off|status)
#
# Vars:
#
# XEN_CONFIG_FILE
#
# RELOCATION_NODELIST
#
# MANAGE_ALL_RELOCATION_NODES
#
# XEN_RELOCATION_PORT
#
#============================================================================

#### Read config files and set variables ##################################

# If you source the /etc/sysconfig/xend file comment out the variables
# being set in this script.

. /etc/sysconfig/xend

XEN_CONFIG_FILE="/etc/xen/xend-config.sxp"

#### Script Functions #####################################################

usage(){
echo ""
echo "Usage: xend-relocation {start|stop|status}"
echo " or"
echo " xend-relocation {on|off|status}"
echo ""
}

relocate_on() {
for NODE in $RELOCATION_NODELIST
do
case $NODE in
any)
SSHCMD=""
RELOCATION_NODELIST=""
;;
*)
if [ "$MANAGE_ALL_RELOCATION_NODES" = "true" ]
then
SSHCMD="ssh root@$NODE "
else
SSHCMD=""
fi
;;
esac

$SSHCMD sed -i "s/^#(xend-relocation-server yes)/(xend-relocation-server
yes)/g" $XEN_CONFIG_FILE
$SSHCMD sed -i "s/^#(xend-relocation-server no)/(xend-relocation-server
yes)/g" $XEN_CONFIG_FILE
$SSHCMD sed -i "s/^#(xend-relocation-port [^)]*)/(xend-relocation-port
$XEN_RELOCATION_PORT)/g" $XEN_CONFIG_FILE
$SSHCMD sed -i "s/^(xend-relocation-hosts-allow
\(.*\)/###(xend-relocation-hosts-allow \1/g" $XEN_CONFIG_FILE
$SSHCMD sed -i "s/^#(xend-relocation-hosts-allow
.*/(xend-relocation-hosts-allow \'$RELOCATION_NODELIST')/g" $XEN_CONFIG_FILE
$SSHCMD rcxend restart

if [ "$NODE" = "any" ] || [ "$MANAGE_ALL_RELOCATION_NODES" = "false" ]
then
exit 0
fi
done
}

relocate_off() {
for NODE in $RELOCATION_NODELIST
do
case $NODE in
any)
SSHCMD=""
RELOCATION_NODELIST=""
;;
*)
SSHCMD="ssh root@$NODE "
;;
esac

$SSHCMD sed -i "s/^(xend-relocation-server yes)/#(xend-relocation-server
yes)/g" $XEN_CONFIG_FILE
$SSHCMD sed -i "s/^(xend-relocation-port [^)]*)/#(xend-relocation-port
$XEN_RELOCATION_PORT)/g" $XEN_CONFIG_FILE
$SSHCMD sed -i "s/^(xend-relocation-hosts-allow
.*/#(xend-relocation-hosts-allow \'$RELOCATION_NODELIST')/g" $XEN_CONFIG_FILE
$SSHCMD rcxend restart

if [ "$NODE" = "any" ] || [ "$MANAGE_ALL_RELOCATION_NODES" = "false" ]
then
exit 0
fi
done
}

relocate_status() {
if grep -q "^(xend-relocation-server .*yes)" $XEN_CONFIG_FILE
then
ENABLED="yes"
elif egrep -q "(^\(xend-relocation-server .*no\)|^#\(xend-relocation-server
.*no\)|^#\(xend-relocation-server .*yes\))" $XEN_CONFIG_FILE
then
ENABLED="no"
fi

echo ""
echo "Xend Relocation Server Enabled: $ENABLED"
echo ""
}

#### Script Body ##########################################################

case $1 in
on|ON|On|start)
case $ENABLE_RELOCATION in
true)
relocate_on
;;
false)
;;
esac
exit 0
;;
off|OFF|Off|stop)
relocate_off
exit 0
;;
status|STATUS|Status)
relocate_status
exit 0
;;
*)
usage
exit 1
;;
esac
++++++ xend-sysconfig.patch ++++++
Index: xen-4.2.0-testing/tools/hotplug/Linux/init.d/sysconfig.xencommons
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/init.d/sysconfig.xencommons
+++ xen-4.2.0-testing/tools/hotplug/Linux/init.d/sysconfig.xencommons
@@ -1,14 +1,30 @@
+## Path: System/Virtualization
+## Type: string
+## Default: "none"
+#
# Log xenconsoled messages (cf xl dmesg)
#XENCONSOLED_TRACE=[none|guest|hv|all]

+## Type: string
+## Default: xenstored
+#
# Select xenstored implementation
#XENSTORED=[oxenstored|xenstored]

+## Type: string
+## Default: Not defined, tracing off
+#
# Log xenstored messages
#XENSTORED_TRACE=[yes|on|1]

+## Type: string
+## Default: "/var/lib/xenstored"
+#
# Running xenstored on XENSTORED_ROOTDIR
#XENSTORED_ROOTDIR=/var/lib/xenstored

+## Type: string
+## Default: Not defined, xenbackendd debug mode off
+#
# Running xenbackendd in debug mode
#XENBACKENDD_DEBUG=[yes|on|1]
++++++ xend-vcpu-affinity-fix.patch ++++++
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2799,7 +2799,10 @@ class XendDomainInfo:
from xen.xend import XendDomain
doms = XendDomain.instance().list('all')
for dom in filter (lambda d: d.domid != self.domid, doms):
- cpuinfo = dom.getVCPUInfo()
+ try:
+ cpuinfo = dom.getVCPUInfo()
+ except:
+ continue
for vcpu in sxp.children(cpuinfo, 'vcpu'):
if sxp.child_value(vcpu, 'online') == 0: continue
cpumap = list(sxp.child_value(vcpu,'cpumap'))
++++++ xenpaging.autostart.patch ++++++
# HG changeset patch
# Parent 659ee31faec91ac543578db7c9b2849fb7367419

xenpaging: xend: start xenpaging via config option

Start xenpaging via config option.

TODO: add libxl support
TODO: parse config values like 42K, 42M, 42G, 42%

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>

---
v5:
use actmem=, xenpaging_file=, xenpaging_extra=
add xm mem-swap-target

v4:
add config option for pagefile directory
add config option to enable debug
add config option to set policy mru_size
fail if chdir fails
force self.xenpaging* variables to be strings because a xm new may turn some
of them into type int and later os.execve fails with a TypeError

v3:
decouple create/destroycreateXenPaging from _create/_removeDevices
init xenpaging variable to 0 if xenpaging is not in config file to
avoid string None coming from sxp file

v2:
unlink logfile instead of truncating it.
allows hardlinking for further inspection

---
tools/examples/xmexample.hvm | 9 +++
tools/python/README.XendConfig | 3 +
tools/python/README.sxpcfg | 3 +
tools/python/xen/xend/XendConfig.py | 9 +++
tools/python/xen/xend/XendDomain.py | 15 +++++
tools/python/xen/xend/XendDomainInfo.py | 23 ++++++++
tools/python/xen/xend/image.py | 85 ++++++++++++++++++++++++++++++++
tools/python/xen/xm/create.py | 15 +++++
tools/python/xen/xm/main.py | 14 +++++
tools/python/xen/xm/xenapi_create.py | 3 +
10 files changed, 179 insertions(+)

Index: xen-4.2.1-testing/tools/examples/xmexample.hvm
===================================================================
--- xen-4.2.1-testing.orig/tools/examples/xmexample.hvm
+++ xen-4.2.1-testing/tools/examples/xmexample.hvm
@@ -142,6 +142,15 @@ disk = [ 'file:/var/lib/xen/images/disk.
# Device Model to be used
device_model = 'qemu-dm'

+# the amount of memory in MiB for the guest
+#actmem=42
+
+# Optional: guest page file
+#xenpaging_file="/var/lib/xen/xenpaging/<domain_name>.<domaind_id>.paging"
+
+# Optional: extra cmdline options for xenpaging
+#xenpaging_extra=[ 'string', 'string' ]
+
#-----------------------------------------------------------------------------
# boot on floppy (a), hard disk (c), Network (n) or CD-ROM (d)
# default: hard disk, cd-rom, floppy
Index: xen-4.2.1-testing/tools/python/README.XendConfig
===================================================================
--- xen-4.2.1-testing.orig/tools/python/README.XendConfig
+++ xen-4.2.1-testing/tools/python/README.XendConfig
@@ -120,6 +120,9 @@ otherConfig
image.vncdisplay
image.vncunused
image.hvm.device_model
+ image.hvm.actmem
+ image.hvm.xenpaging_file
+ image.hvm.xenpaging_extra
image.hvm.smbios_firmware
image.hvm.apci_firmware
image.hvm.display
Index: xen-4.2.1-testing/tools/python/README.sxpcfg
===================================================================
--- xen-4.2.1-testing.orig/tools/python/README.sxpcfg
+++ xen-4.2.1-testing/tools/python/README.sxpcfg
@@ -51,6 +51,9 @@ image
- vncunused
(HVM)
- device_model
+ - actmem
+ - xenpaging_file
+ - xenpaging_extra
- smbios_firmware
- acpi_firmware
- display
Index: xen-4.2.1-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendConfig.py
@@ -147,6 +147,9 @@ XENAPI_PLATFORM_CFG_TYPES = {
'apic': int,
'boot': str,
'device_model': str,
+ 'actmem': str,
+ 'xenpaging_file': str,
+ 'xenpaging_extra': str,
'smbios_firmware': str,
'acpi_firmware': str,
'loader': str,
@@ -518,6 +521,12 @@ class XendConfig(dict):
self['platform']['nomigrate'] = 0

if self.is_hvm():
+ if 'actmem' not in self['platform']:
+ self['platform']['actmem'] = "0"
+ if 'xenpaging_file' not in self['platform']:
+ self['platform']['xenpaging_file'] = ""
+ if 'xenpaging_extra' not in self['platform']:
+ self['platform']['xenpaging_extra'] = []
if 'smbios_firmware' not in self['platform']:
self['platform']['smbios_firmware'] = ""
if 'acpi_firmware' not in self['platform']:
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomain.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomain.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomain.py
@@ -1848,6 +1848,21 @@ class XendDomain:
raise XendInvalidDomain(str(domid))
dominfo.setMigrateConstraints(max_iters, max_factor, abort_if_busy,
log_save_progress)

+ def domain_swaptarget_set(self, domid, mem):
+ """Set the memory limit for a domain.
+
+ @param domid: Domain ID or Name
+ @type domid: int or string.
+ @param mem: memory limit (in MiB)
+ @type mem: int
+ @raise XendError: fail to set memory
+ @rtype: 0
+ """
+ dominfo = self.domain_lookup_nr(domid)
+ if not dominfo:
+ raise XendInvalidDomain(str(domid))
+ dominfo.setSwapTarget(mem)
+
def domain_maxmem_set(self, domid, mem):
"""Set the memory limit for a domain.

Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1548,6 +1548,17 @@ class XendDomainInfo:
break
xen.xend.XendDomain.instance().managed_config_save(self)

+ def setSwapTarget(self, target):
+ """Set the swap target of this domain.
+ @param target: In MiB.
+ """
+ log.debug("Setting swap target of domain %s (%s) to %d MiB.",
+ self.info['name_label'], str(self.domid), target)
+
+ if self.domid > 0:
+ self.storeDom("memory/target-tot_pages", target * 1024)
+ self.info['platform']['actmem'] = str(target)
+
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
@param target: In MiB.
@@ -2338,6 +2349,8 @@ class XendDomainInfo:
self.info['name_label'], self.domid, self.info['uuid'],
new_name, new_uuid)
self._unwatchVm()
+ if self.image:
+ self.image.destroyXenPaging()
self._releaseDevices()
# Remove existing vm node in xenstore
self._removeVm()
@@ -3017,6 +3030,9 @@ class XendDomainInfo:

self._createDevices()

+ if self.image:
+ self.image.createXenPaging()
+
self.image.cleanupTmpImages()

self.info['start_time'] = time.time()
@@ -3041,6 +3057,8 @@ class XendDomainInfo:
self.refresh_shutdown_lock.acquire()
try:
self.unwatchShutdown()
+ if self.image:
+ self.image.destroyXenPaging()
self._releaseDevices()
bootloader_tidy(self)

@@ -3125,6 +3143,7 @@ class XendDomainInfo:
self.image = image.create(self, self.info)
if self.image:
self.image.createDeviceModel(True)
+ self.image.createXenPaging()
self.console_port = console_port
self._storeDomDetails()
self._registerWatches()
@@ -3267,6 +3286,8 @@ class XendDomainInfo:
# could also fetch a parsed note from xenstore
fast = self.info.get_notes().get('SUSPEND_CANCEL') and 1 or 0
if not fast:
+ if self.image:
+ self.image.destroyXenPaging()
self._releaseDevices()
self.testDeviceComplete()
self.testvifsComplete()
@@ -3282,6 +3303,8 @@ class XendDomainInfo:
self._storeDomDetails()

self._createDevices()
+ if self.image:
+ self.image.createXenPaging()
log.debug("XendDomainInfo.resumeDomain: devices created")

xc.domain_resume(self.domid, fast)
Index: xen-4.2.1-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.2.1-testing/tools/python/xen/xend/image.py
@@ -122,6 +122,10 @@ class ImageHandler:
self.vm.permissionsVm("image/cmdline", { 'dom': self.vm.getDomid(),
'read': True } )

self.device_model = vmConfig['platform'].get('device_model')
+ self.actmem = str(vmConfig['platform'].get('actmem'))
+ self.xenpaging_file = str(vmConfig['platform'].get('xenpaging_file'))
+ self.xenpaging_extra = vmConfig['platform'].get('xenpaging_extra')
+ self.xenpaging_pid = None

self.smbios_firmware
=(str(vmConfig['platform'].get('smbios_firmware')))
self.acpi_firmware =(str(vmConfig['platform'].get('acpi_firmware')))
@@ -394,6 +398,87 @@ class ImageHandler:
sentinel_fifos_inuse[sentinel_path_fifo] = 1
self.sentinel_path_fifo = sentinel_path_fifo

+ def createXenPaging(self):
+ if not self.vm.info.is_hvm():
+ return
+ if self.actmem == "0":
+ return
+ if self.xenpaging_pid:
+ return
+ xenpaging_bin = auxbin.pathTo("xenpaging")
+ args = [xenpaging_bin]
+ args = args + ([ "-f", "/var/lib/xen/xenpaging/%s.%d.paging" %
(str(self.vm.info['name_label']), self.vm.getDomid())])
+ if self.xenpaging_extra:
+ args = args + (self.xenpaging_extra)
+ args = args + ([ "-d", "%d" % self.vm.getDomid()])
+ self.xenpaging_logfile = "/var/log/xen/xenpaging-%s.log" %
str(self.vm.info['name_label'])
+ logfile_mode = os.O_WRONLY|os.O_CREAT|os.O_APPEND|os.O_TRUNC
+ null = os.open("/dev/null", os.O_RDONLY)
+ try:
+ os.unlink(self.xenpaging_logfile)
+ except:
+ pass
+ logfd = os.open(self.xenpaging_logfile, logfile_mode, 0644)
+ sys.stderr.flush()
+ contract = osdep.prefork("%s:%d" % (self.vm.getName(),
self.vm.getDomid()))
+ xenpaging_pid = os.fork()
+ if xenpaging_pid == 0: #child
+ try:
+ osdep.postfork(contract)
+ os.dup2(null, 0)
+ os.dup2(logfd, 1)
+ os.dup2(logfd, 2)
+ try:
+ env = dict(os.environ)
+ log.info("starting %s" % args)
+ os.execve(xenpaging_bin, args, env)
+ except Exception, e:
+ log.warn('failed to execute xenpaging: %s' %
utils.exception_string(e))
+ os._exit(126)
+ except:
+ log.warn("starting xenpaging failed")
+ os._exit(127)
+ else:
+ osdep.postfork(contract, abandon=True)
+ self.xenpaging_pid = xenpaging_pid
+ os.close(null)
+ os.close(logfd)
+ self.vm.storeDom("xenpaging/xenpaging-pid", self.xenpaging_pid)
+ self.vm.storeDom("memory/target-tot_pages", int(self.actmem) * 1024)
+
+ def destroyXenPaging(self):
+ if self.actmem == "0":
+ return
+ if self.xenpaging_pid:
+ try:
+ os.kill(self.xenpaging_pid, signal.SIGHUP)
+ except OSError, exn:
+ log.exception(exn)
+ for i in xrange(100):
+ try:
+ (p, rv) = os.waitpid(self.xenpaging_pid, os.WNOHANG)
+ if p == self.xenpaging_pid:
+ break
+ except OSError:
+ # This is expected if Xend has been restarted within
+ # the life of this domain. In this case, we can kill
+ # the process, but we can't wait for it because it's
+ # not our child. We continue this loop, and after it is
+ # terminated make really sure the process is going away
+ # (SIGKILL).
+ pass
+ time.sleep(0.1)
+ else:
+ log.warning("xenpaging %d took more than 10s "
+ "to terminate: sending SIGKILL" %
self.xenpaging_pid)
+ try:
+ os.kill(self.xenpaging_pid, signal.SIGKILL)
+ os.waitpid(self.xenpaging_pid, 0)
+ except OSError:
+ # This happens if the process doesn't exist.
+ pass
+ self.xenpaging_pid = None
+
def createDeviceModel(self, restore = False):
if self.device_model is None:
return
Index: xen-4.2.1-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xm/create.py
+++ xen-4.2.1-testing/tools/python/xen/xm/create.py
@@ -503,6 +503,18 @@ gopts.var('acpi_firmware', val='FILE',
fn=set_value, default=None,
use="Path to a file that contains extra ACPI firmware tables.")

+gopts.var('actmem', val='NUM',
+ fn=set_value, default='0',
+ use="Number of pages to swap.")
+
+gopts.var('xenpaging_file', val='PATH',
+ fn=set_value, default=None,
+ use="pagefile to use (optional)")
+
+gopts.var('xenpaging_extra', val='string1,string2',
+ fn=append_value, default=[],
+ use="additional args for xenpaging (optional)")
+
gopts.var('device_model', val='FILE',
fn=set_value, default=None,
use="Path to device model program.")
@@ -1108,6 +1120,9 @@ def configure_hvm(config_image, vals):
args = [ 'acpi', 'apic',
'boot',
'cpuid', 'cpuid_check',
+ 'actmem',
+ 'xenpaging_file',
+ 'xenpaging_extra',
'device_model', 'display',
'smbios_firmware', 'acpi_firmware',
'fda', 'fdb',
Index: xen-4.2.1-testing/tools/python/xen/xm/main.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xm/main.py
+++ xen-4.2.1-testing/tools/python/xen/xm/main.py
@@ -115,6 +115,8 @@ SUBCOMMAND_HELP = {
'Set the maximum amount reservation for a domain.'),
'mem-set' : ('<Domain> <Mem>',
'Set the current memory usage for a domain.'),
+ 'mem-swap-target' : ('<Domain> <Mem>',
+ 'Set the memory usage for a domain.'),
'migrate' : ('<Domain> <Host>',
'Migrate a domain to another machine.'),
'pause' : ('<Domain>', 'Pause execution of a domain.'),
@@ -1592,6 +1594,17 @@ def xm_mem_set(args):
mem_target = int_unit(args[1], 'm')
server.xend.domain.setMemoryTarget(dom, mem_target)

+def xm_mem_swap_target(args):
+ arg_check(args, "mem-swap-target", 2)
+
+ dom = args[0]
+
+ if serverType == SERVER_XEN_API:
+ err("xenapi not supported")
+ else:
+ swap_target = int_unit(args[1], 'm')
+ server.xend.domain.swaptarget_set(dom, swap_target)
+
def xm_usb_add(args):
arg_check(args, "usb-add", 2)
server.xend.domain.usb_add(args[0],args[1])
@@ -3847,6 +3860,7 @@ commands = {
# memory commands
"mem-max": xm_mem_max,
"mem-set": xm_mem_set,
+ "mem-swap-target": xm_mem_swap_target,
# cpu commands
"vcpu-pin": xm_vcpu_pin,
"vcpu-list": xm_vcpu_list,
Index: xen-4.2.1-testing/tools/python/xen/xm/xenapi_create.py
===================================================================
--- xen-4.2.1-testing.orig/tools/python/xen/xm/xenapi_create.py
+++ xen-4.2.1-testing/tools/python/xen/xm/xenapi_create.py
@@ -1085,6 +1085,9 @@ class sxp2xml:
'acpi',
'apic',
'boot',
+ 'actmem',
+ 'xenpaging_file',
+ 'xenpaging_extra',
'device_model',
'smbios_firmware',
'acpi_firmware',
++++++ xenpaging.doc.patch ++++++
---
docs/misc/xenpaging.txt | 49 +++++++++++++++++++++++++++++++++---------------
1 file changed, 34 insertions(+), 15 deletions(-)

Index: xen-4.2.0-testing/docs/misc/xenpaging.txt
===================================================================
--- xen-4.2.0-testing.orig/docs/misc/xenpaging.txt
+++ xen-4.2.0-testing/docs/misc/xenpaging.txt
@@ -22,22 +22,41 @@ functionality.

Usage:

-Up to now xenpaging is not integrated into libxl/xend, so it has to be
-started manually for each guest.
+Up to now xenpaging is only integrated into xm/xend.

-Once the guest is running, run xenpaging with the guest_id and the path
-to the pagefile:
-
- /usr/lib/xen/bin/xenpaging -f /path/to/page_file -d dom_id &
-
-Once xenpaging runs it needs a memory target, which is the memory
-footprint of the guest. This value (in KiB) must be written manually to
-xenstore. The following example sets the target to 512MB:
-
- xenstore-write /local/domain/<dom_id>/memory/target-tot_pages $((1024*512))
-
-Now xenpaging tries to page-out as many pages to keep the overall memory
-footprint of the guest at 512MB.
+To enable xenpaging for a guest add the option 'actmem=' to the guests
+config file and run 'xm new <vm_config_file>' to make the changes
+active. actmem= takes the amount of memory in MB which a guest is
+allowed to use at a given time. Everything above this limit will be
+paged out. This paging is transparent to the guest.
+
+Example:
+ memory=4096
+ actmem=1024
+In this example a guest gets the impression it has 4GB of memory and
+the guest OS has to configure itself for this amount of memory. But
+xenpaging will page-out 3072MB, leaving only 1024MB active at a time.
+
+At runtime the configured value of actmem= can be changed with the "xm
+mem-swap-target" command.
+ xm mem-swap-target <domain_name> 512
+
+Additional cmdline options for the xenpaging binary can be specified
+with the xenpaging_extra= config file option:
+
+ xenpaging_extra=[ '-f', '/dev/shm/pagefile-guest_name', '-v' ]
+
+To get a list of available options, run /usr/lib/xen/bin/xenpaging -h:
+
+ xenpaging [options] -f <pagefile> -d <domain_id>
+
+options:
+ -d <domid> --domain=<domid> numerical domain_id of guest. This
option is required.
+ -f <file> --pagefile=<file> pagefile to use. This option is
required.
+ -m <max_memkb> --max_memkb=<max_memkb> maximum amount of memory to handle.
+ -r <num> --mru_size=<num> number of paged-in pages to keep in
memory.
+ -v --verbose enable debug output.
+ -h --help this output.

Todo:
- integrate xenpaging into libxl
++++++ xm-create-maxmem.patch ++++++
Cast maxmem to int before computation

Reported in L3 bnc#732782

From: Dario Abatianni <dabatianni@xxxxxxxxxx>

Index: xen-4.2.0-testing/tools/python/xen/xm/xenapi_create.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xm/xenapi_create.py
+++ xen-4.2.0-testing/tools/python/xen/xm/xenapi_create.py
@@ -764,7 +764,7 @@ class sxp2xml:

if get_child_by_name(config, "maxmem"):
memory.attributes["static_max"] = \
- str(int(get_child_by_name(config, "maxmem")*1024*1024))
+ str(int(get_child_by_name(config, "maxmem"))*1024*1024)

vm.appendChild(memory)

++++++ xm-create-xflag.patch ++++++
Index: xen-4.2.0-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xm/create.py
+++ xen-4.2.0-testing/tools/python/xen/xm/create.py
@@ -36,7 +36,7 @@ from xen.xend.server.DevConstants import
from xen.util import blkif
from xen.util import vscsi_util
import xen.util.xsm.xsm as security
-from xen.xm.main import serverType, SERVER_XEN_API, get_single_vm
+from xen.xm.main import serverType, SERVER_XEN_API, SERVER_LEGACY_XMLRPC,
get_single_vm
from xen.util import utils, auxbin
from xen.util.pci import dev_dict_to_sxp, \
parse_pci_name_extended, PciDeviceParseError
@@ -1533,7 +1533,7 @@ def main(argv):
except IOError, exn:
raise OptionError("Cannot read file %s: %s" % (config, exn[1]))

- if serverType == SERVER_XEN_API:
+ if serverType == SERVER_XEN_API or serverType == SERVER_LEGACY_XMLRPC:
from xen.xm.xenapi_create import sxp2xml
sxp2xml_inst = sxp2xml()
doc = sxp2xml_inst.convert_sxp_to_xml(config, transient=True)
@@ -1541,7 +1541,7 @@ def main(argv):
if opts.vals.dryrun and not opts.is_xml:
SXPPrettyPrint.prettyprint(config)

- if opts.vals.xmldryrun and serverType == SERVER_XEN_API:
+ if opts.vals.xmldryrun:
print doc.toprettyxml()

if opts.vals.dryrun or opts.vals.xmldryrun:
Index: xen-4.2.0-testing/tools/python/xen/xend/XendAPIConstants.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendAPIConstants.py
+++ xen-4.2.0-testing/tools/python/xen/xend/XendAPIConstants.py
@@ -45,8 +45,10 @@ XEN_API_ON_NORMAL_EXIT = [
XEN_API_ON_CRASH_BEHAVIOUR = [
'destroy',
'coredump_and_destroy',
+ 'coredump_destroy',
'restart',
'coredump_and_restart',
+ 'coredump_restart',
'preserve',
'rename_restart'
]
++++++ xm-save-check-file.patch ++++++
Index: xen-4.2.0-testing/tools/python/xen/xend/XendAPI.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendAPI.py
+++ xen-4.2.0-testing/tools/python/xen/xend/XendAPI.py
@@ -1959,10 +1959,10 @@ class XendAPI(object):
bool(live), port, node, ssl, bool(chs))
return xen_api_success_void()

- def VM_save(self, _, vm_ref, dest, checkpoint):
+ def VM_save(self, _, vm_ref, dest, checkpoint, force):
xendom = XendDomain.instance()
xeninfo = xendom.get_vm_by_uuid(vm_ref)
- xendom.domain_save(xeninfo.getDomid(), dest, checkpoint)
+ xendom.domain_save(xeninfo.getDomid(), dest, checkpoint, force)
return xen_api_success_void()

def VM_restore(self, _, src, paused):
Index: xen-4.2.0-testing/tools/python/xen/xend/XendDomain.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendDomain.py
+++ xen-4.2.0-testing/tools/python/xen/xend/XendDomain.py
@@ -1505,7 +1505,7 @@ class XendDomain:
pass
sock.close()

- def domain_save(self, domid, dst, checkpoint=False):
+ def domain_save(self, domid, dst, checkpoint=False, force=False):
"""Start saving a domain to file.

@param domid: Domain ID or Name
@@ -1521,6 +1521,9 @@ class XendDomain:
if not dominfo:
raise XendInvalidDomain(str(domid))

+ if os.access(dst, os.F_OK) and not force:
+ raise XendError("Save file:%s exist!\n" % dst)
+
if dominfo.getDomid() == DOM0_ID:
raise XendError("Cannot save privileged domain %s" %
str(domid))
if dominfo._stateGet() != DOM_STATE_RUNNING:
Index: xen-4.2.0-testing/tools/python/xen/xm/main.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xm/main.py
+++ xen-4.2.0-testing/tools/python/xen/xm/main.py
@@ -121,7 +121,7 @@ SUBCOMMAND_HELP = {
'reset' : ('<Domain>', 'Reset a domain.'),
'restore' : ('<CheckpointFile> [-p]',
'Restore a domain from a saved state.'),
- 'save' : ('[-c] <Domain> <CheckpointFile>',
+ 'save' : ('[-c|-f] <Domain> <CheckpointFile>',
'Save a domain state to restore later.'),
'shutdown' : ('<Domain> [-waRH]', 'Shutdown a domain.'),
'top' : ('', 'Monitor a host and the domains in real time.'),
@@ -342,6 +342,7 @@ SUBCOMMAND_OPTIONS = {
),
'save': (
('-c', '--checkpoint', 'Leave domain running after creating snapshot'),
+ ('-f', '--force', 'Force to overwrite exist file'),
),
'restore': (
('-p', '--paused', 'Do not unpause domain after restoring it'),
@@ -864,18 +865,21 @@ def xm_event_monitor(args):

def xm_save(args):

- arg_check(args, "save", 2, 3)
+ arg_check(args, "save", 2, 4)

try:
- (options, params) = getopt.gnu_getopt(args, 'c', ['checkpoint'])
+ (options, params) = getopt.gnu_getopt(args, 'cf', ['checkpoint',
'force'])
except getopt.GetoptError, opterr:
err(opterr)
usage('save')

checkpoint = False
+ force = False
for (k, v) in options:
if k in ['-c', '--checkpoint']:
checkpoint = True
+ if k in ['-f', '--force']:
+ force = True

if len(params) != 2:
err("Wrong number of parameters")
@@ -889,9 +893,9 @@ def xm_save(args):
sys.exit(1)

if serverType == SERVER_XEN_API:
- server.xenapi.VM.save(get_single_vm(dom), savefile, checkpoint)
+ server.xenapi.VM.save(get_single_vm(dom), savefile, checkpoint, force)
else:
- server.xend.domain.save(dom, savefile, checkpoint)
+ server.xend.domain.save(dom, savefile, checkpoint, force)

def xm_restore(args):
arg_check(args, "restore", 1, 2)
++++++ xmclone.sh ++++++
++++ 795 lines (skipped)

++++++ xmexample.disks ++++++
# A VM's disks can be stored in a variety of ways.
# Here are some examples:
disk = [
# Block device
'phy:/dev/hdb,xvda,w',
# Raw format, accessed via loopback
'file:/var/lib/xen/images/disk-example/disk0,xvdb,w',
# Raw format, accessed via blocktap
'tap:aio:/var/lib/xen/images/disk-example/disk0,xvdc,w',
# QCOW format, accessed via blocktap
'tap:qcow:/var/lib/xen/images/disk-example/disk0.qcow,xvdd,w',
# NBD (network block device): IP and port are separated by space
'nbd:192.168.0.1 20004,xvde,w',
# iSCSI: The usual colon is replaced with '@'
'iscsi:iqn.2006-09.de.suse@0ac47ee2-216e-452a-a341-a12624cd0225,xvdf,w',
# Fibre Channel N_Port ID Virtualization
'npiv:210400e08b80c40f,xvdg,w' ]


# Remaining settings for the example VM:
name="disk-example"
memory=512
vcpus=1
on_crash="destroy"
on_poweroff="destroy"
on_reboot="restart"
localtime=0
builder="linux"
bootloader="/usr/lib/xen/boot/domUloader.py"
bootargs="--entry=xvda2:/boot/vmlinuz-xen,/boot/initrd-xen"
vif=[ 'mac=00:16:3e:00:01:02,bridge=xenbr0' ]
vfb=['type=vnc,vncunused=1']
++++++ xmexample.domUloader ++++++
# This is a bootloader used to boot paravirtualized domains. You can optionally
# plug in a different boot loader here, e.g., pygrub. There is usually no
# reason to change this. Don't explicitly specify kernel and ramdisk if you use
# a bootloader.
bootloader="/usr/lib/xen/boot/domUloader.py"

# The domUloader will pull the specified kernel and initrd out of the domU's
# disk, and use that to boot. This is easier to manage, compared to manually
# keeping a copy of the kernel and initrd in sync in dom0's filesystem and
# pointing to them with the "kernel" and "ramdisk" parameters. Syntax is
# "VDEV:KERNEL,INITRD", where VDEV is the block device (from domU's point of
# view) within which KERNEL and INITRD can be found.
bootentry = "hda1:/boot/vmlinuz-xen,/boot/initrd-xen"


# Remaining settings for the example VM:
name="domUloader-example"
memory=512
vcpus=1
on_crash="destroy"
on_poweroff="destroy"
on_reboot="restart"
localtime=0
builder="linux"
bootargs="--entry=xvda2:/boot/vmlinuz-xen,/boot/initrd-xen"
disk=[ 'file:/var/lib/xen/images/domUloader-example/disk0,xvda,w' ]
vif=[ 'mac=00:16:3e:00:01:02,bridge=xenbr0' ]
vfb=['type=vnc,vncunused=1']
++++++ xnloader.py ++++++
# NetWare-specific operations
#
# Copyright (c) 2013 Suse Linux Products.
# Author: Charles Arnold <carnold@xxxxxxxx>
#
# This software may be freely redistributed under the terms of the GNU
# general public license.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

# Binary patching of xnloader.sys
# For launching NetWare on Xen 4.2 and newer

import os, sys, base64

CODE_OFFSET=0x49F5
NUMBER_OF_CODE_BYTES=17
ORIGINAL_CODE="BA00080000C786FC1F0000FFFFFFFF31C9"
PATCHED_CODE="BAF8070000834C961CFFB9080000009090"
XNLOADER_SYS_MD5SUM="eb76cce2a2d45928ea2bf26e01430af2"

def patch_netware_loader(loader):
"""Open the given xnloader.sys file and patch the relevant code hunk."""

# domUloader calls this with all kernels so perhaps this is not the NetWare
loader
md5sum_cmd = 'md5sum ' + loader
p = os.popen(md5sum_cmd)
sum = p.read().split()[0]
p.close()
if sum != XNLOADER_SYS_MD5SUM:
return

try:
fd = os.open(loader, os.O_RDWR)
except Exception, e:
print >>sys.stderr, e
raise

# Validate minimum size for I/O
stat = os.fstat(fd)
if stat.st_size < CODE_OFFSET+NUMBER_OF_CODE_BYTES:
os.close(fd)
return

# Seek to location of code hunk
os.lseek(fd, CODE_OFFSET, os.SEEK_SET)

# Read code bytes at offset
buf = os.read(fd, NUMBER_OF_CODE_BYTES)

code_as_hex = base64.b16encode(buf)
if code_as_hex == ORIGINAL_CODE:
# Seek back to start location of the code hunk
os.lseek(fd, CODE_OFFSET, os.SEEK_SET)
# Convert the PATCHED_CODE string to raw binary
code_as_bin = base64.b16decode(PATCHED_CODE)
# Write the patched code
os.write(fd, code_as_bin)
os.close(fd)

--
To unsubscribe, e-mail: opensuse-commit+unsubscribe@xxxxxxxxxxxx
For additional commands, e-mail: opensuse-commit+help@xxxxxxxxxxxx

< Previous Next >
This Thread
  • No further messages