Mailinglist Archive: opensuse-commit (1737 mails)
| < Previous | Next > |
commit xen for openSUSE:12.1:Update:Test
- From: root@xxxxxxxxxxxxxxx (h_root)
- Date: Wed, 08 Feb 2012 18:18:52 +0100
- Message-id: <20120208171852.BD0CE203B6@hilbert.suse.de>
Hello community,
here is the log from the commit of package xen for openSUSE:12.1:Update:Test
checked in at 2012-02-08 18:18:42
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:12.1:Update:Test/xen (Old)
and /work/SRC/openSUSE:12.1:Update:Test/.xen.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "xen", Maintainer is "CARNOLD@xxxxxxxx"
Changes:
--------
New Changes file:
--- /dev/null 2010-08-26 16:28:41.000000000 +0200
+++ /work/SRC/openSUSE:12.1:Update:Test/.xen.new/xen.changes 2012-02-08
18:18:48.000000000 +0100
@@ -0,0 +1,6025 @@
+-------------------------------------------------------------------
+Mon Jan 23 13:41:42 MST 2012 - carnold@xxxxxxxxxx
+
+- The xen kmp packages fail on the 09-check-packaged-twice script.
+ Rename xen_pvdrivers.conf to xen_pvdrivers-<kernel flavor>.conf
+
+-------------------------------------------------------------------
+Wed Jan 18 09:42:54 MST 2012 - carnold@xxxxxxxxxx
+
+- bnc#739585 - L3: Xen block-attach fails after repeated attach/detach
+ blktap-close-fifos.patch
+
+-------------------------------------------------------------------
+Fri Jan 13 10:37:53 MST 2012 - jfehlig@xxxxxxxx
+
+- bnc#741159 - Fix default setting of XENSTORED_ROOTDIR in
+ xencommons init script
+ xencommons-xenstored-root.patch
+
+-------------------------------------------------------------------
+Thu Jan 12 06:49:57 MST 2012 - carnold@xxxxxxxxxx
+
+- bnc#740625 - xen: cannot interact with xend after upgrade (SLES)
+- bnc#738694 - xen: cannot interact with xend after upgrade (os12.1)
+- Other README changes included.
+ README.SuSE
+
+-------------------------------------------------------------------
+Tue Jan 10 17:30:20 CET 2012 - ohering@xxxxxxx
+
+- bnc#694863 - kexec fails in xen
+ 24478-libxl_add_feature_flag_to_xenstore_for_XS_RESET_WATCHES.patch
+
+-------------------------------------------------------------------
+Mon Jan 9 16:10:19 CET 2012 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ xenpaging.speedup-page-out.patch
+
+-------------------------------------------------------------------
+Tue Jan 3 08:26:42 MST 2012 - carnold@xxxxxxxxxx
+
+- bnc#735806 - VF doesn't work after hot-plug for many times
+ 24448-x86-pt-irq-leak.patch
+- Upstream patches from Jan
+ 24261-x86-cpuidle-Westmere-EX.patch
+ 24417-amd-erratum-573.patch
+ 24429-mceinj-tool.patch
+ 24447-x86-TXT-INIT-SIPI-delay.patch
+ ioemu-9868-MSI-X.patch
+
+-------------------------------------------------------------------
+Mon Jan 2 10:05:57 CET 2012 - ohering@xxxxxxx
+
+- bnc#732884 - remove private runlevel 4 from init scripts
+ xen.no-default-runlevel-4.patch
+
+-------------------------------------------------------------------
+Mon Dec 19 15:22:13 MST 2011 - carnold@xxxxxxxxxx
+
+- bnc#727515 - Fragmented packets hang network boot of HVM guest
+ ipxe-gcc45-warnings.patch
+ ipxe-ipv4-fragment.patch
+ ipxe-enable-nics.patch
+
+-------------------------------------------------------------------
+Mon Dec 19 12:43:11 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ update xenpaging.autostart.patch, make changes with mem-swap-target
+ permanent
+ update xenpaging.doc.patch, mention issues with live migration
+
+-------------------------------------------------------------------
+Thu Dec 15 17:53:51 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ add xenpaging.evict_mmap_readonly.patch
+ update xenpaging.error-handling.patch, reduce debug output
+
+-------------------------------------------------------------------
+Thu Dec 15 08:35:27 MST 2011 - carnold@xxxxxxxxxx
+
+- bnc#736824 - Microcode patches for AMD's 15h processors panic the
+ system
+ 24189-x86-p2m-pod-locking.patch
+ 24412-x86-AMD-errata-model-shift.patch
+ 24411-x86-ucode-AMD-Fam15.patch
+
+-------------------------------------------------------------------
+Wed Dec 14 10:08:24 MST 2011 - carnold@xxxxxxxxxx
+
+- bnc#711219 - SR-IOV VF doesn't work in SLES11 sp2 guest
+ 24357-firmware-no-_PS0-_PS3.patch
+- Upstream patches from Jan
+ 24153-x86-emul-feature-checks.patch
+ 24275-x86-emul-lzcnt.patch
+ 24277-x86-dom0-features.patch
+ 24278-x86-dom0-no-PCID.patch
+ 24282-x86-log-dirty-bitmap-leak.patch
+ 24359-x86-domU-features.patch
+ 24360-x86-pv-domU-no-PCID.patch
+ 24389-amd-fam10-gart-tlb-walk-err.patch
+ 24391-x86-pcpu-version.patch
+
+-------------------------------------------------------------------
+Thu Dec 8 14:19:49 CET 2011 - ohering@xxxxxxx
+
+- bnc#729208 - xenpaging=-1 doesn't work
+ xenpaging.doc.patch
+
+-------------------------------------------------------------------
+Thu Dec 8 08:41:36 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ readd xenpaging.qemu.flush-cache.patch
+
+-------------------------------------------------------------------
+Wed Dec 7 11:01:43 MST 2011 - jfehlig@xxxxxxxx
+
+- bnc#732782 - L3: xm create hangs when maxmen value is enclosed
+ in "quotes"
+ xm-create-maxmem.patch
+
+-------------------------------------------------------------------
+Wed Dec 7 10:44:06 MST 2011 - carnold@xxxxxxxxxx
+
+- Upstream patches / changes from Jan
+ Added 24358-kexec-compat-overflow.patch
+ Removed
24341-x86-64-mmcfg_remove___initdata_annotation_overlooked_in_23749e8d1c8f074ba.patch
+ Removed
24345-tools-libxc_Fix_x86_32_build_breakage_in_previous_changeset..patch
+
+-------------------------------------------------------------------
+Wed Dec 7 16:42:44 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ 24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch
+ Use wait queues for paging, improve foreign mappings.
+ xenpaging.versioned-interface.patch
+ xenpaging.mmap-before-nominate.patch
+ xenpaging.p2m_is_paged.patch
+ xenpaging.evict_fail_fast_forward.patch
+ xenpaging.error-handling.patch
+ xenpaging.mem_event-use-wait_queue.patch
+ xenpaging.waitqueue-paging.patch
+ Remove obsolete patch, not needed with wait queue usage
+ xenpaging.HVMCOPY_gfn_paged_out.patch
+
+-------------------------------------------------------------------
+Wed Dec 7 16:23:49 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ Fix incorrect backport, remove double memset, use xzalloc
+ 24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch
+
+-------------------------------------------------------------------
+Wed Dec 7 12:08:31 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ fix typo in nominate, use lock instead of double unlock
+ 23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch
+
+-------------------------------------------------------------------
+Wed Dec 7 11:07:23 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+
24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch
+
24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch
+
24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch
+
+-------------------------------------------------------------------
+Tue Dec 6 11:14:51 MST 2011 - jfehlig@xxxxxxxx
+
+- bnc#734826 - xm rename doesn't work anymore
+ Updated xend-migration-domname-fix.patch
+
+-------------------------------------------------------------------
+Fri Dec 2 20:35:29 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ 24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch
+ 24270-Free_d-mem_event_on_domain_destruction..patch
+
+-------------------------------------------------------------------
+Fri Dec 2 20:25:24 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ 24318-x86-mm_Fix_checks_during_foreign_mapping_of_paged_pages.patch
+
+-------------------------------------------------------------------
+Fri Dec 2 20:21:48 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ 23949-constify_vcpu_set_affinitys_second_parameter.patch
+
+-------------------------------------------------------------------
+Fri Dec 2 19:36:33 CET 2011 - ohering@xxxxxxx
++++ 5828 more lines (skipped)
++++ between /dev/null
++++ and /work/SRC/openSUSE:12.1:Update:Test/.xen.new/xen.changes
New:
----
22998-x86-get_page_from_l1e-retcode.patch
22999-x86-mod_l1_entry-retcode.patch
23000-x86-mod_l2_entry-retcode.patch
23050-xentrace_dynamic_tracebuffer_allocation.patch
23074-pfn.h.patch
23091-xentrace_fix_t_info_pages_calculation..patch
23092-xentrace_print_calculated_numbers_in_calculate_tbuf_size.patch
23093-xentrace_remove_gdprintk_usage_since_they_are_not_in_guest_context.patch
23094-xentrace_update_comments.patch
23095-xentrace_use_consistent_printk_prefix.patch
23096-x86-hpet-no-cpumask_lock.patch
23099-x86-rwlock-scalability.patch
23103-x86-pirq-guest-eoi-check.patch
23127-vtd-bios-settings.patch
23128-xentrace_correct_formula_to_calculate_t_info_pages.patch
23129-xentrace_remove_unneeded_debug_printk.patch
23173-xentrace_Move_register_cpu_notifier_call_into_boot-time_init..patch
23199-amd-iommu-unmapped-intr-fault.patch
23233-hvm-cr-access.patch
23234-svm-decode-assist-base.patch
23235-svm-decode-assist-crs.patch
23236-svm-decode-assist-invlpg.patch
23238-svm-decode-assist-insn-fetch.patch
23239-xentrace_correct_overflow_check_for_number_of_per-cpu_trace_pages.patch
23246-x86-xsave-enable.patch
23303-cpufreq-misc.patch
23304-amd-oprofile-strings.patch
23305-amd-fam15-xenoprof.patch
23306-amd-fam15-vpmu.patch
23308-xentrace_Move_the_global_variable_t_info_first_offset_into_calculate_tbuf_size.patch
23309-xentrace_Mark_data_size___read_mostly_because_its_only_written_once.patch
23310-xentrace_Remove_unneeded_cast_when_assigning_pointer_value_to_dst.patch
23334-amd-fam12+14-vpmu.patch
23383-libxc-rm-static-vars.patch
23404-xentrace_reduce_trace_buffer_size_to_something_mfn_offset_can_reach.patch
23405-xentrace_fix_type_of_offset_to_avoid_ouf-of-bounds_access.patch
23406-xentrace_update___insert_record_to_copy_the_trace_record_to_individual_mfns.patch
23407-xentrace_allocate_non-contiguous_per-cpu_trace_buffers.patch
23437-amd-fam15-TSC-scaling.patch
23462-libxc-cpu-feature.patch
23506-x86_Disable_set_gpfn_from_mfn_until_m2p_table_is_allocated..patch
23507-xenpaging_update_machine_to_phys_mapping_during_page_deallocation.patch
23508-vmx-proc-based-ctls-probe.patch
23509-x86_32_Fix_build_Define_machine_to_phys_mapping_valid.patch
23511-amd-fam15-no-flush-for-C3.patch
23562-xenpaging_remove_unused_spinlock_in_pager.patch
23571-vtd-fault-verbosity.patch
23574-x86-dom0-compressed-ELF.patch
23575-x86-DMI.patch
23576-x86_show_page_walk_also_for_early_page_faults.patch
23577-tools_merge_several_bitop_functions_into_xc_bitops.h.patch
23578-xenpaging_add_xs_handle_to_struct_xenpaging.patch
23579-xenpaging_drop_xc.c_remove_ASSERT.patch
23580-xenpaging_drop_xc.c_remove_xc_platform_info_t.patch
23581-xenpaging_drop_xc.c_remove_xc_wait_for_event.patch
23582-xenpaging_drop_xc.c_move_xc_mem_paging_flush_ioemu_cache.patch
23583-xenpaging_drop_xc.c_move_xc_wait_for_event_or_timeout.patch
23584-xenpaging_drop_xc.c_remove_xc_files.patch
23585-xenpaging_correct_dropping_of_pages_to_avoid_full_ring_buffer.patch
23586-xenpaging_do_not_bounce_p2mt_back_to_the_hypervisor.patch
23587-xenpaging_remove_srand_call.patch
23588-xenpaging_remove_return_values_from_functions_that_can_not_fail.patch
23589-xenpaging_catch_xc_mem_paging_resume_errors.patch
23590-xenpaging_remove_local_domain_id_variable.patch
23591-xenpaging_move_num_pages_into_xenpaging_struct.patch
23592-xenpaging_start_paging_in_the_middle_of_gfn_range.patch
23593-xenpaging_pass_integer_to_xenpaging_populate_page.patch
23594-xenpaging_add_helper_function_for_unlinking_pagefile.patch
23595-xenpaging_add_watch_thread_to_catch_guest_shutdown.patch
23596-xenpaging_implement_stopping_of_pager_by_sending_SIGTERM-SIGINT.patch
23597-xenpaging_remove_private_mem_event.h.patch
23599-tools_fix_build_after_recent_xenpaging_changes.patch
23610-x86-topology-info.patch
23611-amd-fam15-topology.patch
23613-EFI-headers.patch
23614-x86_64-EFI-boot.patch
23615-x86_64-EFI-runtime.patch
23616-x86_64-EFI-MPS.patch
23643-xentrace_Allow_tracing_to_be_enabled_at_boot.patch
23676-x86_64-image-map-bounds.patch
23719-xentrace_update___trace_var_comment.patch
23723-x86-CMOS-lock.patch
23724-x86-smpboot-x2apic.patch
23726-x86-intel-flexmigration-v2.patch
23735-guest-dom0-cap.patch
23747-mmcfg-base-address.patch
23749-mmcfg-reservation.patch
23752-x86-shared-IRQ-vector-maps.patch
23754-AMD-perdev-vector-map.patch
23771-x86-ioapic-clear-pin.patch
23772-x86-trampoline.patch
23774-x86_64-EFI-EDD.patch
23781-pm-wide-ACPI-ids.patch
23782-x86-ioapic-clear-irr.patch
23783-ACPI-set-_PDC-bits.patch
23795-intel-ich10-quirk.patch
23800-x86_64-guest-addr-range.patch
23804-x86-IPI-counts.patch
23817-mem_event_add_ref_counting_for_free_requestslots.patch
23818-mem_event_use_mem_event_mark_and_pause_in_mem_event_check_ring.patch
23819-make-docs.patch
23827-xenpaging_use_batch_of_pages_during_final_page-in.patch
23841-mem_event_pass_mem_event_domain_pointer_to_mem_event_functions.patch
23842-mem_event_use_different_ringbuffers_for_share_paging_and_access.patch
23853-x86-pv-cpuid-xsave.patch
23874-xenpaging_track_number_of_paged_pages_in_struct_domain.patch
23897-x86-mce-offline-again.patch
23900-xzalloc.patch
23904-xenpaging_use_p2m-get_entry_in_p2m_mem_paging_functions.patch
23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch
23906-xenpaging_remove_confusing_comment_from_p2m_mem_paging_populate.patch
23908-p2m_query-modify_p2mt_with_p2m_lock_held.patch
23925-x86-AMD-ARAT-Fam12.patch
23933-pt-bus2bridge-update.patch
23943-xenpaging_clear_page_content_after_evict.patch
23949-constify_vcpu_set_affinitys_second_parameter.patch
23953-xenpaging_handle_evict_failures.patch
23955-x86-pv-cpuid-xsave.patch
23957-cpufreq-error-paths.patch
23978-xenpaging_check_p2mt_in_p2m_mem_paging_functions.patch
23979-xenpaging_document_p2m_mem_paging_functions.patch
23980-xenpaging_disallow_paging_in_a_PoD_guest.patch
23993-x86-microcode-amd-fix-23871.patch
24104-waitqueue_Double_size_of_x86_shadow_stack..patch
24105-xenpaging_compare_domain_pointer_in_p2m_mem_paging_populate.patch
24106-mem_event_check_capabilities_only_once.patch
24116-x86-continuation-cancel.patch
24123-x86-cpuidle-quiesce.patch
24124-x86-microcode-amd-quiesce.patch
24137-revert-23666.patch
24138-xenpaging_munmap_all_pages_after_page-in.patch
24144-cpufreq-turbo-crash.patch
24148-shadow-pgt-dying-op-performance.patch
24153-x86-emul-feature-checks.patch
24155-x86-ioapic-EOI-after-migration.patch
24156-x86-ioapic-shared-vectors.patch
24157-x86-xstate-init.patch
24168-x86-vioapic-clear-remote_irr.patch
24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch
24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch
24189-x86-p2m-pod-locking.patch
24190-hap-log-dirty-disable-rc.patch
24193-hap-track-dirty-vram-rc.patch
24195-waitqueue_Detect_saved-stack_overflow_and_crash_the_guest..patch
24196-waitqueue_Reorder_prepare_to_wait_so_that_vcpu_is_definitely_on_the.patch
24197-x86-waitqueue_Because_we_have_per-cpu_stacks_we_must_wake_up_on_teh.patch
24201-x86-pcpu-platform-op.patch
24208-xenpaging_remove_filename_from_comment.patch
24209-xenpaging_remove_obsolete_comment_in_resume_path.patch
24210-xenpaging_use_PERROR_to_print_errno.patch
24211-xenpaging_simplify_file_op.patch
24212-xenpaging_print_gfn_in_failure_case.patch
24213-xenpaging_update_xenpaging_init.patch
24214-xenpaging_remove_xc_dominfo_t_from_paging_t.patch
24215-xenpaging_track_the_number_of_paged-out_pages.patch
24216-xenpaging_move_page_add-resume_loops_into_its_own_function..patch
24217-xenpaging_improve_mainloop_exit_handling.patch
24218-libxc_add_bitmap_clear_function.patch
24219-xenpaging_retry_unpageable_gfns.patch
24220-xenpaging_install_into_LIBEXEC_dir.patch
24221-xenpaging_add_XEN_PAGING_DIR_-_libxl_xenpaging_dir_path.patch
24222-xenpaging_use_guests_tot_pages_as_working_target.patch
24223-xenpaging_watch_the_guests_memory-target-tot_pages_xenstore_value.patch
24224-xenpaging_add_cmdline_interface_for_pager.patch
24225-xenpaging_improve_policy_mru_list_handling.patch
24226-xenpaging_add_debug_to_show_received_watch_event..patch
24227-xenpaging_restrict_pagefile_permissions.patch
24231-waitqueue_Implement_wake_up_nroneall..patch
24232-waitqueue_Hold_a_reference_to_a_domain_on_a_waitqueue..patch
24261-x86-cpuidle-Westmere-EX.patch
24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch
24270-Free_d-mem_event_on_domain_destruction..patch
24272-xenpaging_Fix_c-s_235070a29c8c3ddf7_update_machine_to_phys_mapping_during_page_deallocation.patch
24275-x86-emul-lzcnt.patch
24277-x86-dom0-features.patch
24278-x86-dom0-no-PCID.patch
24282-x86-log-dirty-bitmap-leak.patch
24318-x86-mm_Fix_checks_during_foreign_mapping_of_paged_pages.patch
24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch
24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch
24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch
24344-tools-x86_64_Fix_cpuid_inline_asm_to_not_clobber_stacks_red_zone.patch
24357-firmware-no-_PS0-_PS3.patch
24358-kexec-compat-overflow.patch
24359-x86-domU-features.patch
24360-x86-pv-domU-no-PCID.patch
24389-amd-fam10-gart-tlb-walk-err.patch
24391-x86-pcpu-version.patch
24411-x86-ucode-AMD-Fam15.patch
24412-x86-AMD-errata-model-shift.patch
24417-amd-erratum-573.patch
24429-mceinj-tool.patch
24447-x86-TXT-INIT-SIPI-delay.patch
24448-x86-pt-irq-leak.patch
24478-libxl_add_feature_flag_to_xenstore_for_XS_RESET_WATCHES.patch
2XXXX-vif-bridge.patch
32on64-extra-mem.patch
README.SuSE
_link
altgr_2.patch
baselibs.conf
bdrv_default_rwflag.patch
bdrv_open2_fix_flags.patch
bdrv_open2_flags_2.patch
blktap-close-fifos.patch
blktap-pv-cdrom.patch
blktap.patch
blktapctrl-default-to-ioemu.patch
block-dmmd
block-iscsi
block-nbd
block-npiv
block-npiv-common.sh
block-npiv-vport
boot.local.xenU
boot.xen
bridge-bonding.diff
bridge-opensuse.patch
bridge-record-creation.patch
bridge-vlan.diff
build-tapdisk-ioemu.patch
capslock_enable.patch
cdrom-removable.patch
change-vnc-passwd.patch
change_home_server.patch
check_device_status.patch
checkpoint-rename.patch
del_usb_xend_entry.patch
disable-xl-when-using-xend.patch
disable_emulated_device.diff
dom-print.patch
domUloader.py
domu-usb-controller.patch
etc_pam.d_xen-api
hibernate.patch
hotplug.losetup.patch
hv_extid_compatibility.patch
init.pciback
init.xen_loop
init.xend
init.xendomains
ioemu-7615-qcow2-fix-alloc_cluster_link_l2.patch
ioemu-9868-MSI-X.patch
ioemu-bdrv-open-CACHE_WB.patch
ioemu-blktap-barriers.patch
ioemu-blktap-fv-init.patch
ioemu-blktap-image-format.patch
ioemu-blktap-zero-size.patch
ioemu-debuginfo.patch
ioemu-disable-emulated-ide-if-pv.patch
ioemu-disable-scsi.patch
ioemu-vnc-resize.patch
ioemu-watchdog-ib700-timer.patch
ioemu-watchdog-linkage.patch
ioemu-watchdog-support.patch
ipxe-enable-nics.patch
ipxe-gcc45-warnings.patch
ipxe-ipv4-fragment.patch
kernel-boot-hvm.patch
kmp_filelist
libxen_permissive.patch
log-guest-console.patch
logrotate.conf
magic_ioport_compat.patch
minios-fixups.patch
multi-xvdp.patch
network-nat-open-SuSEfirewall2-FORWARD.patch
network-nat.patch
pv-driver-build.patch
pvdrv-import-shared-info.patch
pvdrv_emulation_control.patch
qemu-dm-segfault.patch
qemu-security-etch1.diff
serial-split.patch
snapshot-ioemu-delete.patch
snapshot-ioemu-restore.patch
snapshot-ioemu-save.patch
snapshot-without-pv-fix.patch
snapshot-xend.patch
stdvga-cache.patch
stubdom.tar.bz2
supported_module.diff
suspend_evtchn_lock.patch
sysconfig.pciback
tapdisk-ioemu-logfile.patch
tapdisk-ioemu-shutdown-fix.patch
tmp-initscript-modprobe.patch
tmp_build.patch
tools-kboot.diff
tools-watchdog-support.patch
tools-xc_kexec.diff
udev-rules.patch
usb-list.patch
vif-bridge-no-iptables.patch
vif-bridge.mtu.patch
vif-route-ifup.patch
x86-cpufreq-report.patch
x86-extra-trap-info.patch
x86-ioapic-ack-default.patch
xen-4.1.2-testing-src.tar.bz2
xen-api-auth.patch
xen-changeset.diff
xen-config.diff
xen-cpupool-xl-config-format.patch
xen-destdir.diff
xen-disable-qemu-monitor.diff
xen-domUloader.diff
xen-fixme-doc.diff
xen-hvm-default-bridge.diff
xen-hvm-default-pae.diff
xen-ioemu-hvm-pv-support.diff
xen-max-free-mem.diff
xen-minimum-restart-time.patch
xen-no-dummy-nfs-ip.diff
xen-paths.diff
xen-qemu-iscsi-fix.patch
xen-rpmoptflags.diff
xen-updown.sh
xen-utils-0.1.tar.bz2
xen-warnings-unused.diff
xen-warnings.diff
xen-xm-top-needs-root.diff
xen-xmexample-vti.diff
xen-xmexample.diff
xen.changes
xen.no-default-runlevel-4.patch
xen.sles11sp1.bug684297.xen_oldmem_pfn_is_ram.patch
xen.sles11sp1.fate311487.xen_platform_pci.dmistring.patch
xen.spec
xen_pvdrivers.conf
xenalyze.gcc46.patch
xenalyze.hg.tar.bz2
xenapi-console-protocol.patch
xenapiusers
xencommons-proc-xen.patch
xencommons-xenstored-root.patch
xenconsole-no-multiple-connections.patch
xend-config-enable-dump-comment.patch
xend-config.diff
xend-console-port-restore.patch
xend-core-dump-loc.diff
xend-devid-or-name.patch
xend-disable-internal-logrotate.patch
xend-domain-lock-sfex.patch
xend-domain-lock.patch
xend-migration-domname-fix.patch
xend-relocation-server.fw
xend-relocation.sh
xend-sysconfig.patch
xend-vcpu-affinity-fix.patch
xenpaging.autostart.patch
xenpaging.doc.patch
xenpaging.error-handling.patch
xenpaging.evict_fail_fast_forward.patch
xenpaging.evict_mmap_readonly.patch
xenpaging.guest-memusage.patch
xenpaging.mem_event-use-wait_queue.patch
xenpaging.mmap-before-nominate.patch
xenpaging.p2m_is_paged.patch
xenpaging.qemu.flush-cache.patch
xenpaging.speedup-page-out.patch
xenpaging.versioned-interface.patch
xenpaging.waitqueue-paging.patch
xenstored.XS_RESET_WATCHES.patch
xl-create-pv-with-qcow2-img.patch
xm-create-maxmem.patch
xm-create-xflag.patch
xm-save-check-file.patch
xm-test-cleanup.diff
xmclone.sh
xmexample.disks
xmexample.domUloader
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ xen.spec ++++++
++++ 1623 lines (skipped)
++++++ 22998-x86-get_page_from_l1e-retcode.patch ++++++
References: bnc#675363
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1299687371 0
# Node ID e9fab50d7b61d151d51a4b1088930c9e1ca2da47
# Parent 5f28dcea13555f7ab948c9cb95de3e79e0fbfc4b
x86: make get_page_from_l1e() return a proper error code
... so that the guest can actually know the reason for the (hypercall)
failure.
ptwr_do_page_fault() could propagate the error indicator received from
get_page_from_l1e() back to the guest in the high half of the error
code (entry_vector), provided we're sure all existing guests can deal
with that (or indicate so by means of a to-be-added guest feature
flag). Alternatively, a second virtual status register (like CR2)
could be introduced.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/mm/shadow/multi.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/shadow/multi.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/shadow/multi.c
@@ -872,7 +872,7 @@ shadow_get_page_from_l1e(shadow_l1e_t sl
// If a privileged domain is attempting to install a map of a page it does
// not own, we let it succeed anyway.
//
- if ( unlikely(!res) &&
+ if ( unlikely(res < 0) &&
!shadow_mode_translate(d) &&
mfn_valid(mfn = shadow_l1e_get_mfn(sl1e)) &&
(owner = page_get_owner(mfn_to_page(mfn))) &&
@@ -883,11 +883,11 @@ shadow_get_page_from_l1e(shadow_l1e_t sl
SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx "
"which is owned by domain %d: %s\n",
d->domain_id, mfn_x(mfn), owner->domain_id,
- res ? "success" : "failed");
+ res >= 0 ? "success" : "failed");
}
/* Okay, it might still be a grant mapping PTE. Try it. */
- if ( unlikely(!res) &&
+ if ( unlikely(res < 0) &&
(type == p2m_grant_map_rw ||
(type == p2m_grant_map_ro &&
!(shadow_l1e_get_flags(sl1e) & _PAGE_RW))) )
@@ -900,7 +900,7 @@ shadow_get_page_from_l1e(shadow_l1e_t sl
res = get_page_from_l1e(sl1e, d, page_get_owner(mfn_to_page(mfn)));
}
- if ( unlikely(!res) )
+ if ( unlikely(res < 0) )
{
perfc_incr(shadow_get_page_fail);
SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
@@ -1229,15 +1229,15 @@ static int shadow_set_l1e(struct vcpu *v
TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
switch ( shadow_get_page_from_l1e(new_sl1e, d, new_type) )
{
- case 0:
+ default:
/* Doesn't look like a pagetable. */
flags |= SHADOW_SET_ERROR;
new_sl1e = shadow_l1e_empty();
break;
- case -1:
+ case 1:
shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
/* fall through */
- default:
+ case 0:
shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
break;
}
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -799,12 +799,12 @@ get_page_from_l1e(
bool_t write;
if ( !(l1f & _PAGE_PRESENT) )
- return 1;
+ return 0;
if ( unlikely(l1f & l1_disallow_mask(l1e_owner)) )
{
MEM_LOG("Bad L1 flags %x", l1f & l1_disallow_mask(l1e_owner));
- return 0;
+ return -EINVAL;
}
if ( !mfn_valid(mfn) ||
@@ -821,18 +821,21 @@ get_page_from_l1e(
if ( !iomem_access_permitted(pg_owner, mfn, mfn) )
{
if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */
+ {
MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx",
pg_owner->domain_id, mfn);
- return 0;
+ return -EPERM;
+ }
+ return -EINVAL;
}
if ( !(l1f & _PAGE_RW) || IS_PRIV(pg_owner) ||
!rangeset_contains_singleton(mmio_ro_ranges, mfn) )
- return 1;
+ return 0;
dprintk(XENLOG_G_WARNING,
"d%d: Forcing read-only access to MFN %lx\n",
l1e_owner->domain_id, mfn);
- return -1;
+ return 1;
}
if ( unlikely(real_pg_owner != pg_owner) )
@@ -863,6 +866,7 @@ get_page_from_l1e(
{
unsigned long x, nx, y = page->count_info;
unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
+ int err;
if ( is_xen_heap_page(page) )
{
@@ -870,7 +874,7 @@ get_page_from_l1e(
put_page_type(page);
put_page(page);
MEM_LOG("Attempt to change cache attributes of Xen heap page");
- return 0;
+ return -EACCES;
}
do {
@@ -878,7 +882,8 @@ get_page_from_l1e(
nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base);
} while ( (y = cmpxchg(&page->count_info, x, nx)) != x );
- if ( unlikely(update_xen_mappings(mfn, cacheattr) != 0) )
+ err = update_xen_mappings(mfn, cacheattr);
+ if ( unlikely(err) )
{
cacheattr = y & PGC_cacheattr_mask;
do {
@@ -894,11 +899,11 @@ get_page_from_l1e(
" from L1 entry %" PRIpte ") for %d",
mfn, get_gpfn_from_mfn(mfn),
l1e_get_intpte(l1e), l1e_owner->domain_id);
- return 0;
+ return err;
}
}
- return 1;
+ return 0;
could_not_pin:
MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
@@ -907,7 +912,7 @@ get_page_from_l1e(
l1e_get_intpte(l1e), l1e_owner->domain_id, pg_owner->domain_id);
if ( real_pg_owner != NULL )
put_page(page);
- return 0;
+ return -EBUSY;
}
@@ -1197,17 +1202,20 @@ static int alloc_l1_table(struct page_in
unsigned long pfn = page_to_mfn(page);
l1_pgentry_t *pl1e;
unsigned int i;
+ int ret = 0;
pl1e = map_domain_page(pfn);
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
if ( is_guest_l1_slot(i) )
- switch ( get_page_from_l1e(pl1e[i], d, d) )
+ switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
{
- case 0:
+ default:
goto fail;
- case -1:
+ case 0:
+ break;
+ case 1:
l1e_remove_flags(pl1e[i], _PAGE_RW);
break;
}
@@ -1225,7 +1233,7 @@ static int alloc_l1_table(struct page_in
put_page_from_l1e(pl1e[i], d);
unmap_domain_page(pl1e);
- return -EINVAL;
+ return ret;
}
static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
@@ -1794,11 +1802,13 @@ static int mod_l1_entry(l1_pgentry_t *pl
return rc;
}
- switch ( get_page_from_l1e(nl1e, pt_dom, pg_dom) )
+ switch ( rc = get_page_from_l1e(nl1e, pt_dom, pg_dom) )
{
- case 0:
+ default:
return 0;
- case -1:
+ case 0:
+ break;
+ case 1:
l1e_remove_flags(nl1e, _PAGE_RW);
break;
}
@@ -4948,7 +4958,7 @@ static int ptwr_emulated_update(
nl1e = l1e_from_intpte(val);
switch ( get_page_from_l1e(nl1e, d, d) )
{
- case 0:
+ default:
if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
!do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
{
@@ -4968,7 +4978,9 @@ static int ptwr_emulated_update(
return X86EMUL_UNHANDLEABLE;
}
break;
- case -1:
+ case 0:
+ break;
+ case 1:
l1e_remove_flags(nl1e, _PAGE_RW);
break;
}
++++++ 22999-x86-mod_l1_entry-retcode.patch ++++++
References: bnc#675363
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1299687409 0
# Node ID 82b5f8d12903e140f957ae8d13d66e44be076b05
# Parent e9fab50d7b61d151d51a4b1088930c9e1ca2da47
x86: make mod_l1_entry() return a proper error code
... again is so that the guest can actually know the reason for the
(hypercall) failure.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -1765,15 +1765,16 @@ static int mod_l1_entry(l1_pgentry_t *pl
struct domain *pt_dom = pt_vcpu->domain;
unsigned long mfn;
p2m_type_t p2mt;
- int rc = 1;
+ int rc = 0;
if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
- return 0;
+ return -EFAULT;
if ( unlikely(paging_mode_refcounts(pt_dom)) )
{
- rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, preserve_ad);
- return rc;
+ if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, preserve_ad) )
+ return 0;
+ return -EBUSY;
}
if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
@@ -1782,7 +1783,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(pg_dom),
l1e_get_pfn(nl1e), &p2mt));
if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) )
- return 0;
+ return -EINVAL;
ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
@@ -1790,22 +1791,23 @@ static int mod_l1_entry(l1_pgentry_t *pl
{
MEM_LOG("Bad L1 flags %x",
l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom));
- return 0;
+ return -EINVAL;
}
/* Fast path for identical mapping, r/w and presence. */
if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
{
adjust_guest_l1e(nl1e, pt_dom);
- rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
- preserve_ad);
- return rc;
+ if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
+ preserve_ad) )
+ return 0;
+ return -EBUSY;
}
switch ( rc = get_page_from_l1e(nl1e, pt_dom, pg_dom) )
{
default:
- return 0;
+ return rc;
case 0:
break;
case 1:
@@ -1818,13 +1820,13 @@ static int mod_l1_entry(l1_pgentry_t *pl
preserve_ad)) )
{
ol1e = nl1e;
- rc = 0;
+ rc = -EBUSY;
}
}
else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
preserve_ad)) )
{
- return 0;
+ return -EBUSY;
}
put_page_from_l1e(ol1e, pt_dom);
@@ -3516,9 +3518,10 @@ int do_mmu_update(
}
#endif
- okay = mod_l1_entry(va, l1e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, v,
- pg_owner);
+ rc = mod_l1_entry(va, l1e, mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v,
+ pg_owner);
+ okay = !rc;
}
break;
case PGT_l2_page_table:
@@ -4300,7 +4303,7 @@ static int __do_update_va_mapping(
goto out;
}
- rc = mod_l1_entry(pl1e, val, gl1mfn, 0, v, pg_owner) ? 0 : -EINVAL;
+ rc = mod_l1_entry(pl1e, val, gl1mfn, 0, v, pg_owner);
page_unlock(gl1pg);
put_page(gl1pg);
++++++ 23000-x86-mod_l2_entry-retcode.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1299687446 0
# Node ID d428fa67abaa0db20b915a697f1d5ba16e554185
# Parent 82b5f8d12903e140f957ae8d13d66e44be076b05
x86: make mod_l2_entry() return a proper error code
... so that finally all mod_lN_entry() functions behave identically,
allowing some cleanup in do_mmu_update() (which no longer needs to
track both an okay status and an error code).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -1845,16 +1845,16 @@ static int mod_l2_entry(l2_pgentry_t *pl
struct domain *d = vcpu->domain;
struct page_info *l2pg = mfn_to_page(pfn);
unsigned long type = l2pg->u.inuse.type_info;
- int rc = 1;
+ int rc = 0;
if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
{
MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
- return 0;
+ return -EPERM;
}
if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
- return 0;
+ return -EFAULT;
if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
{
@@ -1862,32 +1862,33 @@ static int mod_l2_entry(l2_pgentry_t *pl
{
MEM_LOG("Bad L2 flags %x",
l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
- return 0;
+ return -EINVAL;
}
/* Fast path for identical mapping and presence. */
if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) )
{
adjust_guest_l2e(nl2e, d);
- rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad);
- return rc;
+ if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) )
+ return 0;
+ return -EBUSY;
}
- if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
- return 0;
+ if ( unlikely((rc = get_page_from_l2e(nl2e, pfn, d)) < 0) )
+ return rc;
adjust_guest_l2e(nl2e, d);
if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
preserve_ad)) )
{
ol2e = nl2e;
- rc = 0;
+ rc = -EBUSY;
}
}
else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
preserve_ad)) )
{
- return 0;
+ return -EBUSY;
}
put_page_from_l2e(ol2e, pfn);
@@ -3367,7 +3368,7 @@ int do_mmu_update(
void *va;
unsigned long gpfn, gmfn, mfn;
struct page_info *page;
- int rc = 0, okay = 1, i = 0;
+ int rc = 0, i = 0;
unsigned int cmd, done = 0, pt_dom;
struct vcpu *v = current;
struct domain *d = v->domain, *pt_owner = d, *pg_owner;
@@ -3434,7 +3435,6 @@ int do_mmu_update(
}
cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
- okay = 0;
switch ( cmd )
{
@@ -3451,6 +3451,7 @@ int do_mmu_update(
rc = xsm_mmu_normal_update(d, pg_owner, req.val);
if ( rc )
break;
+ rc = -EINVAL;
req.ptr -= cmd;
gmfn = req.ptr >> PAGE_SHIFT;
@@ -3521,7 +3522,6 @@ int do_mmu_update(
rc = mod_l1_entry(va, l1e, mfn,
cmd == MMU_PT_UPDATE_PRESERVE_AD, v,
pg_owner);
- okay = !rc;
}
break;
case PGT_l2_page_table:
@@ -3545,13 +3545,12 @@ int do_mmu_update(
else if ( p2m_ram_shared == l2e_p2mt )
{
MEM_LOG("Unexpected attempt to map shared page.\n");
- rc = -EINVAL;
break;
}
- okay = mod_l2_entry(va, l2e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
+ rc = mod_l2_entry(va, l2e, mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
}
break;
case PGT_l3_page_table:
@@ -3575,13 +3574,11 @@ int do_mmu_update(
else if ( p2m_ram_shared == l3e_p2mt )
{
MEM_LOG("Unexpected attempt to map shared page.\n");
- rc = -EINVAL;
break;
}
rc = mod_l3_entry(va, l3e, mfn,
cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
- okay = !rc;
}
break;
#if CONFIG_PAGING_LEVELS >= 4
@@ -3607,20 +3604,18 @@ int do_mmu_update(
else if ( p2m_ram_shared == l4e_p2mt )
{
MEM_LOG("Unexpected attempt to map shared page.\n");
- rc = -EINVAL;
break;
}
rc = mod_l4_entry(va, l4e, mfn,
cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
- okay = !rc;
}
break;
#endif
case PGT_writable_page:
perfc_incr(writable_mmu_updates);
- okay = paging_write_guest_entry(
- v, va, req.val, _mfn(mfn));
+ if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) )
+ rc = 0;
break;
}
page_unlock(page);
@@ -3630,8 +3625,8 @@ int do_mmu_update(
else if ( get_page_type(page, PGT_writable_page) )
{
perfc_incr(writable_mmu_updates);
- okay = paging_write_guest_entry(
- v, va, req.val, _mfn(mfn));
+ if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) )
+ rc = 0;
put_page_type(page);
}
@@ -3652,17 +3647,18 @@ int do_mmu_update(
if ( unlikely(!get_page_from_pagenr(mfn, pg_owner)) )
{
MEM_LOG("Could not get page for mach->phys update");
+ rc = -EINVAL;
break;
}
if ( unlikely(paging_mode_translate(pg_owner)) )
{
MEM_LOG("Mach-phys update on auto-translate guest");
+ rc = -EINVAL;
break;
}
set_gpfn_from_mfn(mfn, gpfn);
- okay = 1;
paging_mark_dirty(pg_owner, mfn);
@@ -3672,15 +3668,11 @@ int do_mmu_update(
default:
MEM_LOG("Invalid page update command %x", cmd);
rc = -ENOSYS;
- okay = 0;
break;
}
- if ( unlikely(!okay) )
- {
- rc = rc ? rc : -EINVAL;
+ if ( unlikely(rc) )
break;
- }
guest_handle_add_offset(ureqs, 1);
}
++++++ 23050-xentrace_dynamic_tracebuffer_allocation.patch ++++++
changeset: 23050:4ebba54b666f
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Mar 17 13:29:01 2011 +0000
files: xen/common/trace.c
description:
xentrace: dynamic tracebuffer allocation
Allocate tracebuffers dynamically, based on the requested buffer size.
Calculate t_info_size from requested t_buf size.
Fix allocation failure path, free pages outside the spinlock.
Remove casts for rawbuf, it can be a void pointer since no math is
done.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 249 ++++++++++++++++++++++-------------------------------
1 file changed, 104 insertions(+), 145 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -42,14 +42,14 @@ CHECK_t_buf;
#define compat_t_rec t_rec
#endif
-/* opt_tbuf_size: trace buffer size (in pages) */
-static unsigned int opt_tbuf_size = 0;
+/* opt_tbuf_size: trace buffer size (in pages) for each cpu */
+static unsigned int opt_tbuf_size;
integer_param("tbuf_size", opt_tbuf_size);
/* Pointers to the meta-data objects for all system trace buffers */
static struct t_info *t_info;
-#define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */
-#define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
+static unsigned int t_info_pages;
+
static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
@@ -78,6 +78,21 @@ static u32 tb_event_mask = TRC_ALL;
* i.e., sizeof(_type) * ans >= _x. */
#define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type))
+static int cpu_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ if ( action == CPU_UP_PREPARE )
+ spin_lock_init(&per_cpu(t_lock, cpu));
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block cpu_nfb = {
+ .notifier_call = cpu_callback
+};
+
static void calc_tinfo_first_offset(void)
{
int offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
@@ -85,20 +100,34 @@ static void calc_tinfo_first_offset(void
}
/**
- * check_tbuf_size - check to make sure that the proposed size will fit
+ * calculate_tbuf_size - check to make sure that the proposed size will fit
* in the currently sized struct t_info and allows prod and cons to
* reach double the value without overflow.
+ * Initialize t_info_pages based on number of trace pages.
*/
-static int check_tbuf_size(u32 pages)
+static int calculate_tbuf_size(unsigned int pages)
{
struct t_buf dummy;
typeof(dummy.prod) size;
-
- size = ((typeof(dummy.prod))pages) * PAGE_SIZE;
-
- return (size / PAGE_SIZE != pages)
- || (size + size < size)
- || (num_online_cpus() * pages + t_info_first_offset > T_INFO_SIZE /
sizeof(uint32_t));
+
+ /* force maximum value for an unsigned type */
+ size = -1;
+
+ /* max size holds up to n pages */
+ size /= PAGE_SIZE;
+ if ( pages > size )
+ {
+ gdprintk(XENLOG_INFO, "%s: requested number of %u pages reduced to
%u\n",
+ __func__, pages, (unsigned int)size);
+ pages = size;
+ }
+
+ t_info_pages = num_online_cpus() * pages + t_info_first_offset;
+ t_info_pages *= sizeof(uint32_t);
+ t_info_pages /= PAGE_SIZE;
+ if ( t_info_pages % PAGE_SIZE )
+ t_info_pages++;
+ return pages;
}
/**
@@ -111,47 +140,28 @@ static int check_tbuf_size(u32 pages)
* This function may also be called later when enabling trace buffers
* via the SET_SIZE hypercall.
*/
-static int alloc_trace_bufs(void)
+static int alloc_trace_bufs(unsigned int pages)
{
- int i, cpu, order;
- unsigned long nr_pages;
+ int i, cpu, order;
/* Start after a fixed-size array of NR_CPUS */
uint32_t *t_info_mfn_list;
int offset;
- if ( opt_tbuf_size == 0 )
- return -EINVAL;
+ if ( t_info )
+ return -EBUSY;
- if ( check_tbuf_size(opt_tbuf_size) )
- {
- printk("Xen trace buffers: tb size %d too large. "
- "Tracing disabled.\n",
- opt_tbuf_size);
+ if ( pages == 0 )
return -EINVAL;
- }
- /* t_info size is fixed for now. Currently this works great, so there
- * seems to be no need to make it dynamic. */
- t_info = alloc_xenheap_pages(get_order_from_pages(T_INFO_PAGES), 0);
- if ( t_info == NULL )
- {
- printk("Xen trace buffers: t_info allocation failed! "
- "Tracing disabled.\n");
- return -ENOMEM;
- }
-
- for ( i = 0; i < T_INFO_PAGES; i++ )
- share_xen_page_with_privileged_guests(
- virt_to_page(t_info) + i, XENSHARE_readonly);
-
- t_info_mfn_list = (uint32_t *)t_info;
- offset = t_info_first_offset;
+ /* Calculate offset in u32 of first mfn */
+ calc_tinfo_first_offset();
- t_info->tbuf_size = opt_tbuf_size;
- printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
+ pages = calculate_tbuf_size(pages);
+ order = get_order_from_pages(pages);
- nr_pages = opt_tbuf_size;
- order = get_order_from_pages(nr_pages);
+ t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);
+ if ( t_info == NULL )
+ goto out_dealloc;
/*
* First, allocate buffers for all of the cpus. If any
@@ -159,27 +169,29 @@ static int alloc_trace_bufs(void)
*/
for_each_online_cpu(cpu)
{
- int flags;
- char *rawbuf;
+ void *rawbuf;
struct t_buf *buf;
if ( (rawbuf = alloc_xenheap_pages(
order, MEMF_bits(32 + PAGE_SHIFT))) == NULL )
{
- printk("Xen trace buffers: memory allocation failed\n");
- opt_tbuf_size = 0;
+ printk("Xen trace buffers: memory allocation failed on cpu %d\n",
cpu);
goto out_dealloc;
}
- spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
-
- per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
+ per_cpu(t_bufs, cpu) = buf = rawbuf;
buf->cons = buf->prod = 0;
per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
+ }
- spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags);
+ offset = t_info_first_offset;
+ t_info_mfn_list = (uint32_t *)t_info;
- }
+ for(i = 0; i < t_info_pages; i++)
+ share_xen_page_with_privileged_guests(
+ virt_to_page(t_info) + i, XENSHARE_readonly);
+
+ t_info->tbuf_size = pages;
/*
* Now share the pages to xentrace can map them, and write them in
@@ -188,89 +200,75 @@ static int alloc_trace_bufs(void)
for_each_online_cpu(cpu)
{
/* Share pages so that xentrace can map them. */
- char *rawbuf;
+ void *rawbuf = per_cpu(t_bufs, cpu);
+ struct page_info *p = virt_to_page(rawbuf);
+ uint32_t mfn = virt_to_mfn(rawbuf);
- if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
+ for ( i = 0; i < pages; i++ )
{
- struct page_info *p = virt_to_page(rawbuf);
- uint32_t mfn = virt_to_mfn(rawbuf);
+ share_xen_page_with_privileged_guests(p + i, XENSHARE_writable);
- for ( i = 0; i < nr_pages; i++ )
- {
- share_xen_page_with_privileged_guests(
- p + i, XENSHARE_writable);
-
- t_info_mfn_list[offset + i]=mfn + i;
- }
- /* Write list first, then write per-cpu offset. */
- wmb();
- t_info->mfn_offset[cpu]=offset;
- printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
- cpu, mfn, offset);
- offset+=i;
+ t_info_mfn_list[offset + i]=mfn + i;
}
+ t_info->mfn_offset[cpu]=offset;
+ printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
+ cpu, mfn, offset);
+ offset+=i;
+
+ spin_lock_init(&per_cpu(t_lock, cpu));
}
- data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
+ data_size = (pages * PAGE_SIZE - sizeof(struct t_buf));
t_buf_highwater = data_size >> 1; /* 50% high water */
+ opt_tbuf_size = pages;
+
+ register_cpu_notifier(&cpu_nfb);
+
+ printk("Xen trace buffers: initialised\n");
+ wmb(); /* above must be visible before tb_init_done flag set */
+ tb_init_done = 1;
return 0;
+
out_dealloc:
for_each_online_cpu(cpu)
{
- int flags;
- char * rawbuf;
-
- spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
- if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
+ void *rawbuf = per_cpu(t_bufs, cpu);
+ per_cpu(t_bufs, cpu) = NULL;
+ printk("Xen trace buffers: cpu %d p %p\n", cpu, rawbuf);
+ if ( rawbuf )
{
- per_cpu(t_bufs, cpu) = NULL;
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
free_xenheap_pages(rawbuf, order);
}
- spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags);
}
-
+ free_xenheap_pages(t_info, get_order_from_pages(t_info_pages));
+ t_info = NULL;
+ printk("Xen trace buffers: allocation failed! Tracing disabled.\n");
return -ENOMEM;
}
/**
- * tb_set_size - handle the logic involved with dynamically
- * allocating and deallocating tbufs
+ * tb_set_size - handle the logic involved with dynamically allocating tbufs
*
* This function is called when the SET_SIZE hypercall is done.
*/
-static int tb_set_size(int size)
+static int tb_set_size(unsigned int pages)
{
/*
* Setting size is a one-shot operation. It can be done either at
* boot time or via control tools, but not by both. Once buffers
* are created they cannot be destroyed.
*/
- int ret = 0;
-
- if ( opt_tbuf_size != 0 )
+ if ( opt_tbuf_size && pages != opt_tbuf_size )
{
- if ( size != opt_tbuf_size )
- gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not
implemented\n",
- opt_tbuf_size, size);
+ gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not implemented\n",
+ opt_tbuf_size, pages);
return -EINVAL;
}
- if ( size <= 0 )
- return -EINVAL;
-
- opt_tbuf_size = size;
-
- if ( (ret = alloc_trace_bufs()) != 0 )
- {
- opt_tbuf_size = 0;
- return ret;
- }
-
- printk("Xen trace buffers: initialized\n");
- return 0;
+ return alloc_trace_bufs(pages);
}
int trace_will_trace_event(u32 event)
@@ -299,21 +297,6 @@ int trace_will_trace_event(u32 event)
return 1;
}
-static int cpu_callback(
- struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- if ( action == CPU_UP_PREPARE )
- spin_lock_init(&per_cpu(t_lock, cpu));
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block cpu_nfb = {
- .notifier_call = cpu_callback
-};
-
/**
* init_trace_bufs - performs initialization of the per-cpu trace buffers.
*
@@ -323,37 +306,13 @@ static struct notifier_block cpu_nfb = {
*/
void __init init_trace_bufs(void)
{
- int i;
-
- /* Calculate offset in u32 of first mfn */
- calc_tinfo_first_offset();
-
- /* Per-cpu t_lock initialisation. */
- for_each_online_cpu ( i )
- spin_lock_init(&per_cpu(t_lock, i));
- register_cpu_notifier(&cpu_nfb);
-
- if ( opt_tbuf_size == 0 )
- {
- printk("Xen trace buffers: disabled\n");
- goto fail;
- }
-
- if ( alloc_trace_bufs() != 0 )
+ if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) )
{
- dprintk(XENLOG_INFO, "Xen trace buffers: "
- "allocation size %d failed, disabling\n",
- opt_tbuf_size);
- goto fail;
+ gdprintk(XENLOG_INFO, "Xen trace buffers: "
+ "allocation size %d failed, disabling\n",
+ opt_tbuf_size);
+ opt_tbuf_size = 0;
}
-
- printk("Xen trace buffers: initialised\n");
- wmb(); /* above must be visible before tb_init_done flag set */
- tb_init_done = 1;
- return;
-
- fail:
- opt_tbuf_size = 0;
}
/**
@@ -372,7 +331,7 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
case XEN_SYSCTL_TBUFOP_get_info:
tbc->evt_mask = tb_event_mask;
tbc->buffer_mfn = t_info ? virt_to_mfn(t_info) : 0;
- tbc->size = T_INFO_PAGES * PAGE_SIZE;
+ tbc->size = t_info_pages * PAGE_SIZE;
break;
case XEN_SYSCTL_TBUFOP_set_cpu_mask:
rc = xenctl_cpumap_to_cpumask(&tb_cpu_mask, &tbc->cpu_mask);
++++++ 23074-pfn.h.patch ++++++
References: fate#311376, fate#311529, bnc#578927, bnc#628554
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1300887295 0
# Node ID c80e0fb4fe932b4d8379ea5739af93ae22a30ea5
# Parent 3831bd253e02aa0536ed32e936777d026abb955e
Define new <pfn.h> header for PFN_{DOWN,UP} macros.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/domain_build.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/domain_build.c
+++ xen-4.1.2-testing/xen/arch/x86/domain_build.c
@@ -21,6 +21,7 @@
#include <xen/bitops.h>
#include <xen/compat.h>
#include <xen/libelf.h>
+#include <xen/pfn.h>
#include <asm/regs.h>
#include <asm/system.h>
#include <asm/io.h>
Index: xen-4.1.2-testing/xen/arch/x86/e820.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/e820.c
+++ xen-4.1.2-testing/xen/arch/x86/e820.c
@@ -4,6 +4,7 @@
#include <xen/mm.h>
#include <xen/compat.h>
#include <xen/dmi.h>
+#include <xen/pfn.h>
#include <asm/e820.h>
#include <asm/page.h>
#include <asm/processor.h>
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -99,6 +99,7 @@
#include <xen/event.h>
#include <xen/iocap.h>
#include <xen/guest_access.h>
+#include <xen/pfn.h>
#include <asm/paging.h>
#include <asm/shadow.h>
#include <asm/page.h>
Index: xen-4.1.2-testing/xen/arch/x86/msi.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/msi.c
+++ xen-4.1.2-testing/xen/arch/x86/msi.c
@@ -18,6 +18,7 @@
#include <xen/pci_regs.h>
#include <xen/iocap.h>
#include <xen/keyhandler.h>
+#include <xen/pfn.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/desc.h>
Index: xen-4.1.2-testing/xen/arch/x86/numa.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/numa.c
+++ xen-4.1.2-testing/xen/arch/x86/numa.c
@@ -13,6 +13,7 @@
#include <xen/keyhandler.h>
#include <xen/time.h>
#include <xen/smp.h>
+#include <xen/pfn.h>
#include <asm/acpi.h>
#include <xen/sched.h>
Index: xen-4.1.2-testing/xen/arch/x86/setup.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/setup.c
+++ xen-4.1.2-testing/xen/arch/x86/setup.c
@@ -21,6 +21,7 @@
#include <xen/rcupdate.h>
#include <xen/vga.h>
#include <xen/dmi.h>
+#include <xen/pfn.h>
#include <xen/nodemask.h>
#include <public/version.h>
#ifdef CONFIG_COMPAT
Index: xen-4.1.2-testing/xen/arch/x86/srat.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/srat.c
+++ xen-4.1.2-testing/xen/arch/x86/srat.c
@@ -17,6 +17,7 @@
#include <xen/nodemask.h>
#include <xen/acpi.h>
#include <xen/numa.h>
+#include <xen/pfn.h>
#include <asm/e820.h>
#include <asm/page.h>
Index: xen-4.1.2-testing/xen/arch/x86/tboot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/tboot.c
+++ xen-4.1.2-testing/xen/arch/x86/tboot.c
@@ -6,6 +6,7 @@
#include <xen/domain_page.h>
#include <xen/iommu.h>
#include <xen/acpi.h>
+#include <xen/pfn.h>
#include <asm/fixmap.h>
#include <asm/page.h>
#include <asm/processor.h>
Index: xen-4.1.2-testing/xen/include/asm-x86/page.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/page.h
+++ xen-4.1.2-testing/xen/include/asm-x86/page.h
@@ -396,8 +396,6 @@ static inline uint32_t cacheattr_to_pte_
#endif /* !__ASSEMBLY__ */
-#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PAGE_ALIGN(x) (((x) + PAGE_SIZE - 1) & PAGE_MASK)
#endif /* __X86_PAGE_H__ */
Index: xen-4.1.2-testing/xen/include/xen/pfn.h
===================================================================
--- /dev/null
+++ xen-4.1.2-testing/xen/include/xen/pfn.h
@@ -0,0 +1,9 @@
+#ifndef __XEN_PFN_H__
+#define __XEN_PFN_H__
+
+#include <asm/page.h>
+
+#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+
+#endif /* __XEN_PFN_H__ */
++++++ 23091-xentrace_fix_t_info_pages_calculation..patch ++++++
changeset: 23091:67632e5cf652
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Mar 25 08:56:33 2011 +0000
files: xen/common/trace.c
description:
xentrace: fix t_info_pages calculation.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -29,6 +29,7 @@
#include <xen/init.h>
#include <xen/mm.h>
#include <xen/percpu.h>
+#include <xen/pfn.h>
#include <xen/cpu.h>
#include <asm/atomic.h>
#include <public/sysctl.h>
@@ -109,6 +110,7 @@ static int calculate_tbuf_size(unsigned
{
struct t_buf dummy;
typeof(dummy.prod) size;
+ unsigned int t_info_words, t_info_bytes;
/* force maximum value for an unsigned type */
size = -1;
@@ -122,11 +124,9 @@ static int calculate_tbuf_size(unsigned
pages = size;
}
- t_info_pages = num_online_cpus() * pages + t_info_first_offset;
- t_info_pages *= sizeof(uint32_t);
- t_info_pages /= PAGE_SIZE;
- if ( t_info_pages % PAGE_SIZE )
- t_info_pages++;
+ t_info_words = num_online_cpus() * pages + t_info_first_offset;
+ t_info_bytes = t_info_words * sizeof(uint32_t);
+ t_info_pages = PFN_UP(t_info_bytes);
return pages;
}
++++++ 23092-xentrace_print_calculated_numbers_in_calculate_tbuf_size.patch
++++++
changeset: 23092:45dafa422812
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Mar 25 08:57:28 2011 +0000
files: xen/common/trace.c
description:
xentrace: print calculated numbers in calculate_tbuf_size()
Print number of pages to allocate for per-cpu tracebuffer and metadata
to ease debugging when allocation fails.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 2 ++
1 file changed, 2 insertions(+)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -127,6 +127,8 @@ static int calculate_tbuf_size(unsigned
t_info_words = num_online_cpus() * pages + t_info_first_offset;
t_info_bytes = t_info_words * sizeof(uint32_t);
t_info_pages = PFN_UP(t_info_bytes);
+ printk(XENLOG_INFO "xentrace: requesting %u t_info pages for %u trace
pages on %u cpus\n",
+ t_info_pages, pages, num_online_cpus());
return pages;
}
++++++
23093-xentrace_remove_gdprintk_usage_since_they_are_not_in_guest_context.patch
++++++
changeset: 23093:4b784605b089
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Mar 25 08:57:47 2011 +0000
files: xen/common/trace.c
description:
xentrace: remove gdprintk usage since they are not in guest context
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -119,7 +119,7 @@ static int calculate_tbuf_size(unsigned
size /= PAGE_SIZE;
if ( pages > size )
{
- gdprintk(XENLOG_INFO, "%s: requested number of %u pages reduced to
%u\n",
+ printk(XENLOG_INFO "%s: requested number of %u pages reduced to %u\n",
__func__, pages, (unsigned int)size);
pages = size;
}
@@ -265,7 +265,7 @@ static int tb_set_size(unsigned int page
*/
if ( opt_tbuf_size && pages != opt_tbuf_size )
{
- gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not implemented\n",
+ printk(XENLOG_INFO "tb_set_size from %d to %d not implemented\n",
opt_tbuf_size, pages);
return -EINVAL;
}
@@ -310,7 +310,7 @@ void __init init_trace_bufs(void)
{
if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) )
{
- gdprintk(XENLOG_INFO, "Xen trace buffers: "
+ printk(XENLOG_INFO "Xen trace buffers: "
"allocation size %d failed, disabling\n",
opt_tbuf_size);
opt_tbuf_size = 0;
++++++ 23094-xentrace_update_comments.patch ++++++
changeset: 23094:d09e8885bc82
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Mar 25 08:58:04 2011 +0000
files: xen/common/trace.c
description:
xentrace: update comments
Fix a typo, remove redundant comment.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -196,12 +196,11 @@ static int alloc_trace_bufs(unsigned int
t_info->tbuf_size = pages;
/*
- * Now share the pages to xentrace can map them, and write them in
+ * Now share the pages so xentrace can map them, and write them in
* the global t_info structure.
*/
for_each_online_cpu(cpu)
{
- /* Share pages so that xentrace can map them. */
void *rawbuf = per_cpu(t_bufs, cpu);
struct page_info *p = virt_to_page(rawbuf);
uint32_t mfn = virt_to_mfn(rawbuf);
++++++ 23095-xentrace_use_consistent_printk_prefix.patch ++++++
changeset: 23095:941119d58655
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Mar 25 09:01:37 2011 +0000
files: xen/common/trace.c
description:
xentrace: use consistent printk prefix
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 31 +++++++++++++++++--------------
1 file changed, 17 insertions(+), 14 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -119,16 +119,18 @@ static int calculate_tbuf_size(unsigned
size /= PAGE_SIZE;
if ( pages > size )
{
- printk(XENLOG_INFO "%s: requested number of %u pages reduced to %u\n",
- __func__, pages, (unsigned int)size);
+ printk(XENLOG_INFO "xentrace: requested number of %u pages "
+ "reduced to %u\n",
+ pages, (unsigned int)size);
pages = size;
}
t_info_words = num_online_cpus() * pages + t_info_first_offset;
t_info_bytes = t_info_words * sizeof(uint32_t);
t_info_pages = PFN_UP(t_info_bytes);
- printk(XENLOG_INFO "xentrace: requesting %u t_info pages for %u trace
pages on %u cpus\n",
- t_info_pages, pages, num_online_cpus());
+ printk(XENLOG_INFO "xentrace: requesting %u t_info pages "
+ "for %u trace pages on %u cpus\n",
+ t_info_pages, pages, num_online_cpus());
return pages;
}
@@ -177,7 +179,8 @@ static int alloc_trace_bufs(unsigned int
if ( (rawbuf = alloc_xenheap_pages(
order, MEMF_bits(32 + PAGE_SHIFT))) == NULL )
{
- printk("Xen trace buffers: memory allocation failed on cpu %d\n",
cpu);
+ printk(XENLOG_INFO "xentrace: memory allocation failed "
+ "on cpu %d\n", cpu);
goto out_dealloc;
}
@@ -212,7 +215,7 @@ static int alloc_trace_bufs(unsigned int
t_info_mfn_list[offset + i]=mfn + i;
}
t_info->mfn_offset[cpu]=offset;
- printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
+ printk(XENLOG_INFO "xentrace: p%d mfn %"PRIx32" offset %d\n",
cpu, mfn, offset);
offset+=i;
@@ -225,7 +228,7 @@ static int alloc_trace_bufs(unsigned int
register_cpu_notifier(&cpu_nfb);
- printk("Xen trace buffers: initialised\n");
+ printk("xentrace: initialised\n");
wmb(); /* above must be visible before tb_init_done flag set */
tb_init_done = 1;
@@ -236,7 +239,7 @@ out_dealloc:
{
void *rawbuf = per_cpu(t_bufs, cpu);
per_cpu(t_bufs, cpu) = NULL;
- printk("Xen trace buffers: cpu %d p %p\n", cpu, rawbuf);
+ printk(XENLOG_DEBUG "xentrace: cpu %d p %p\n", cpu, rawbuf);
if ( rawbuf )
{
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
@@ -245,7 +248,7 @@ out_dealloc:
}
free_xenheap_pages(t_info, get_order_from_pages(t_info_pages));
t_info = NULL;
- printk("Xen trace buffers: allocation failed! Tracing disabled.\n");
+ printk(XENLOG_WARNING "xentrace: allocation failed! Tracing disabled.\n");
return -ENOMEM;
}
@@ -264,8 +267,9 @@ static int tb_set_size(unsigned int page
*/
if ( opt_tbuf_size && pages != opt_tbuf_size )
{
- printk(XENLOG_INFO "tb_set_size from %d to %d not implemented\n",
- opt_tbuf_size, pages);
+ printk(XENLOG_INFO "xentrace: tb_set_size from %d to %d "
+ "not implemented\n",
+ opt_tbuf_size, pages);
return -EINVAL;
}
@@ -309,9 +313,8 @@ void __init init_trace_bufs(void)
{
if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) )
{
- printk(XENLOG_INFO "Xen trace buffers: "
- "allocation size %d failed, disabling\n",
- opt_tbuf_size);
+ printk(XENLOG_INFO "xentrace: allocation size %d failed, disabling\n",
+ opt_tbuf_size);
opt_tbuf_size = 0;
}
}
++++++ 23096-x86-hpet-no-cpumask_lock.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1301043797 0
# Node ID a65612bcbb921e98a8843157bf365e4ab16e8144
# Parent 941119d58655f2b2df86d9ecc4cb502bbc5e783c
x86/hpet: eliminate cpumask_lock
According to the (now getting removed) comment in struct
hpet_event_channel, this was to prevent accessing a CPU's
timer_deadline after it got cleared from cpumask. This can be done
without a lock altogether - hpet_broadcast_exit() can simply clear
the bit, and handle_hpet_broadcast() can read timer_deadline before
looking at the mask a second time (the cpumask bit was already
found set by the surrounding loop).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Acked-by: Gang Wei <gang.wei@xxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hpet.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hpet.c
+++ xen-4.1.2-testing/xen/arch/x86/hpet.c
@@ -34,18 +34,6 @@ struct hpet_event_channel
int shift;
s_time_t next_event;
cpumask_t cpumask;
- /*
- * cpumask_lock is used to prevent hpet intr handler from accessing other
- * cpu's timer_deadline after the other cpu's mask was cleared --
- * mask cleared means cpu waken up, then accessing timer_deadline from
- * other cpu is not safe.
- * It is not used for protecting cpumask, so set ops needn't take it.
- * Multiple cpus clear cpumask simultaneously is ok due to the atomic
- * feature of cpu_clear, so hpet_broadcast_exit() can take read lock for
- * clearing cpumask, and handle_hpet_broadcast() have to take write lock
- * for read cpumask & access timer_deadline.
- */
- rwlock_t cpumask_lock;
spinlock_t lock;
void (*event_handler)(struct hpet_event_channel *);
@@ -208,17 +196,18 @@ again:
/* find all expired events */
for_each_cpu_mask(cpu, ch->cpumask)
{
- write_lock_irq(&ch->cpumask_lock);
+ s_time_t deadline;
- if ( cpu_isset(cpu, ch->cpumask) )
- {
- if ( per_cpu(timer_deadline, cpu) <= now )
- cpu_set(cpu, mask);
- else if ( per_cpu(timer_deadline, cpu) < next_event )
- next_event = per_cpu(timer_deadline, cpu);
- }
+ rmb();
+ deadline = per_cpu(timer_deadline, cpu);
+ rmb();
+ if ( !cpu_isset(cpu, ch->cpumask) )
+ continue;
- write_unlock_irq(&ch->cpumask_lock);
+ if ( deadline <= now )
+ cpu_set(cpu, mask);
+ else if ( deadline < next_event )
+ next_event = deadline;
}
/* wakeup the cpus which have an expired event. */
@@ -598,7 +587,6 @@ void hpet_broadcast_init(void)
hpet_events[i].shift = 32;
hpet_events[i].next_event = STIME_MAX;
spin_lock_init(&hpet_events[i].lock);
- rwlock_init(&hpet_events[i].cpumask_lock);
wmb();
hpet_events[i].event_handler = handle_hpet_broadcast;
}
@@ -634,7 +622,6 @@ void hpet_broadcast_init(void)
legacy_hpet_event.idx = 0;
legacy_hpet_event.flags = 0;
spin_lock_init(&legacy_hpet_event.lock);
- rwlock_init(&legacy_hpet_event.cpumask_lock);
wmb();
legacy_hpet_event.event_handler = handle_hpet_broadcast;
@@ -716,9 +703,7 @@ void hpet_broadcast_exit(void)
if ( !reprogram_timer(this_cpu(timer_deadline)) )
raise_softirq(TIMER_SOFTIRQ);
- read_lock_irq(&ch->cpumask_lock);
cpu_clear(cpu, ch->cpumask);
- read_unlock_irq(&ch->cpumask_lock);
if ( ch != &legacy_hpet_event )
{
++++++ 23099-x86-rwlock-scalability.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1301126601 0
# Node ID 612171ff82ea51aaf65d98fd1a551eb8d50fb481
# Parent c9f745c153ec8c3775e2ee03adc3cb30370b84f6
rwlock: Allow to scale to 2^31-1 readers on x86.
Also rework to match the 'trylock' style of raw function used for
spinlocks.
Inspired by Jan Beulich's patch to do similar improved scaling.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1301214635 -3600
# Node ID 0bc1c4746c8939337f693a513fd837fc03477db1
# Parent 48dac730a93b27ff60a340564e9a7afd7f9385f4
x86_32: Fix _raw_read_trylock() build on some gcc versions.
Was broken by 23099:612171ff82ea.
A bool_t is a single byte, and needs a 'q' register constraint. Avoid
the whole issue by changing the variable to an int, and explicitly
specify the operand suffix as 'l' for good measure.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/common/spinlock.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/spinlock.c
+++ xen-4.1.2-testing/xen/common/spinlock.c
@@ -234,7 +234,11 @@ void _spin_unlock_recursive(spinlock_t *
void _read_lock(rwlock_t *lock)
{
check_lock(&lock->debug);
- _raw_read_lock(&lock->raw);
+ while ( unlikely(!_raw_read_trylock(&lock->raw)) )
+ {
+ while ( likely(_raw_rw_is_write_locked(&lock->raw)) )
+ cpu_relax();
+ }
preempt_disable();
}
@@ -243,7 +247,13 @@ void _read_lock_irq(rwlock_t *lock)
ASSERT(local_irq_is_enabled());
local_irq_disable();
check_lock(&lock->debug);
- _raw_read_lock(&lock->raw);
+ while ( unlikely(!_raw_read_trylock(&lock->raw)) )
+ {
+ local_irq_enable();
+ while ( likely(_raw_rw_is_write_locked(&lock->raw)) )
+ cpu_relax();
+ local_irq_disable();
+ }
preempt_disable();
}
@@ -252,11 +262,26 @@ unsigned long _read_lock_irqsave(rwlock_
unsigned long flags;
local_irq_save(flags);
check_lock(&lock->debug);
- _raw_read_lock(&lock->raw);
+ while ( unlikely(!_raw_read_trylock(&lock->raw)) )
+ {
+ local_irq_restore(flags);
+ while ( likely(_raw_rw_is_write_locked(&lock->raw)) )
+ cpu_relax();
+ local_irq_save(flags);
+ }
preempt_disable();
return flags;
}
+int _read_trylock(rwlock_t *lock)
+{
+ check_lock(&lock->debug);
+ if ( !_raw_read_trylock(&lock->raw) )
+ return 0;
+ preempt_disable();
+ return 1;
+}
+
void _read_unlock(rwlock_t *lock)
{
preempt_enable();
@@ -280,7 +305,11 @@ void _read_unlock_irqrestore(rwlock_t *l
void _write_lock(rwlock_t *lock)
{
check_lock(&lock->debug);
- _raw_write_lock(&lock->raw);
+ while ( unlikely(!_raw_write_trylock(&lock->raw)) )
+ {
+ while ( likely(_raw_rw_is_locked(&lock->raw)) )
+ cpu_relax();
+ }
preempt_disable();
}
@@ -289,7 +318,13 @@ void _write_lock_irq(rwlock_t *lock)
ASSERT(local_irq_is_enabled());
local_irq_disable();
check_lock(&lock->debug);
- _raw_write_lock(&lock->raw);
+ while ( unlikely(!_raw_write_trylock(&lock->raw)) )
+ {
+ local_irq_enable();
+ while ( likely(_raw_rw_is_locked(&lock->raw)) )
+ cpu_relax();
+ local_irq_disable();
+ }
preempt_disable();
}
@@ -298,7 +333,13 @@ unsigned long _write_lock_irqsave(rwlock
unsigned long flags;
local_irq_save(flags);
check_lock(&lock->debug);
- _raw_write_lock(&lock->raw);
+ while ( unlikely(!_raw_write_trylock(&lock->raw)) )
+ {
+ local_irq_restore(flags);
+ while ( likely(_raw_rw_is_locked(&lock->raw)) )
+ cpu_relax();
+ local_irq_save(flags);
+ }
preempt_disable();
return flags;
}
Index: xen-4.1.2-testing/xen/include/asm-ia64/linux-xen/asm/spinlock.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-ia64/linux-xen/asm/spinlock.h
+++ xen-4.1.2-testing/xen/include/asm-ia64/linux-xen/asm/spinlock.h
@@ -35,17 +35,6 @@ typedef struct {
} raw_rwlock_t;
#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { 0, 0 }
-#define _raw_read_lock(rw)
\
-do {
\
- raw_rwlock_t *__read_lock_ptr = (rw);
\
-
\
- while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) {
\
- ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);
\
- while (*(volatile int *)__read_lock_ptr < 0)
\
- cpu_relax();
\
- }
\
-} while (0)
-
#define _raw_read_unlock(rw) \
do { \
raw_rwlock_t *__read_lock_ptr = (rw); \
@@ -53,20 +42,6 @@ do {
\
} while (0)
#ifdef ASM_SUPPORTED
-#define _raw_write_lock(rw)
\
-do {
\
- __asm__ __volatile__ (
\
- "mov ar.ccv = r0\n"
\
- "dep r29 = -1, r0, 31, 1;;\n"
\
- "1:\n"
\
- "ld4 r2 = [%0];;\n"
\
- "cmp4.eq p0,p7 = r0,r2\n"
\
- "(p7) br.cond.spnt.few 1b \n"
\
- "cmpxchg4.acq r2 = [%0], r29, ar.ccv;;\n"
\
- "cmp4.eq p0,p7 = r0, r2\n"
\
- "(p7) br.cond.spnt.few 1b;;\n"
\
- :: "r"(rw) : "ar.ccv", "p7", "r2", "r29", "memory");
\
-} while(0)
#define _raw_write_trylock(rw)
\
({
\
@@ -82,16 +57,6 @@ do {
\
#else /* !ASM_SUPPORTED */
-#define _raw_write_lock(l)
\
-({
\
- __u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1);
\
- __u32 *ia64_write_lock_ptr = (__u32 *) (l);
\
- do {
\
- while (*ia64_write_lock_ptr)
\
- ia64_barrier();
\
- ia64_val = ia64_cmpxchg4_acq(ia64_write_lock_ptr, ia64_set_val,
0); \
- } while (ia64_val);
\
-})
#define _raw_write_trylock(rw) \
({ \
Index: xen-4.1.2-testing/xen/include/asm-x86/spinlock.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/spinlock.h
+++ xen-4.1.2-testing/xen/include/asm-x86/spinlock.h
@@ -35,51 +35,29 @@ typedef struct {
volatile int lock;
} raw_rwlock_t;
-#define RW_LOCK_BIAS 0x01000000
-#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { RW_LOCK_BIAS }
+#define RW_WRITE_BIAS 0x7fffffff
+#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { 0 }
-static always_inline void _raw_read_lock(raw_rwlock_t *rw)
+static always_inline int _raw_read_trylock(raw_rwlock_t *rw)
{
- asm volatile (
- "1: lock; decl %0 \n"
- " jns 3f \n"
- " lock; incl %0 \n"
- "2: rep; nop \n"
- " cmpl $1,%0 \n"
- " js 2b \n"
- " jmp 1b \n"
- "3:"
- : "=m" (rw->lock) : : "memory" );
-}
+ int acquired;
-static always_inline void _raw_write_lock(raw_rwlock_t *rw)
-{
asm volatile (
- "1: lock; subl %1,%0 \n"
- " jz 3f \n"
- " lock; addl %1,%0 \n"
- "2: rep; nop \n"
- " cmpl %1,%0 \n"
- " jne 2b \n"
+ " lock; decl %0 \n"
+ " jns 2f \n"
+ "1: .subsection 1 \n"
+ "2: lock; incl %0 \n"
+ " decl %1 \n"
" jmp 1b \n"
- "3:"
- : "=m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory" );
+ " .subsection 0 \n"
+ : "=m" (rw->lock), "=r" (acquired) : "1" (1) : "memory" );
+
+ return acquired;
}
static always_inline int _raw_write_trylock(raw_rwlock_t *rw)
{
- int rc;
-
- asm volatile (
- " lock; subl %2,%0 \n"
- " jz 1f \n"
- " lock; addl %2,%0 \n"
- " dec %1 \n"
- "1:"
- : "=m" (rw->lock), "=r" (rc) : "i" (RW_LOCK_BIAS), "1" (1)
- : "memory" );
-
- return rc;
+ return (cmpxchg(&rw->lock, 0, RW_WRITE_BIAS) == 0);
}
static always_inline void _raw_read_unlock(raw_rwlock_t *rw)
@@ -92,11 +70,11 @@ static always_inline void _raw_read_unlo
static always_inline void _raw_write_unlock(raw_rwlock_t *rw)
{
asm volatile (
- "lock ; addl %1,%0"
- : "=m" ((rw)->lock) : "i" (RW_LOCK_BIAS) : "memory" );
+ "lock ; subl %1,%0"
+ : "=m" ((rw)->lock) : "i" (RW_WRITE_BIAS) : "memory" );
}
-#define _raw_rw_is_locked(x) ((x)->lock < RW_LOCK_BIAS)
-#define _raw_rw_is_write_locked(x) ((x)->lock <= 0)
+#define _raw_rw_is_locked(x) ((x)->lock != 0)
+#define _raw_rw_is_write_locked(x) ((x)->lock > 0)
#endif /* __ASM_SPINLOCK_H */
Index: xen-4.1.2-testing/xen/include/xen/spinlock.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/spinlock.h
+++ xen-4.1.2-testing/xen/include/xen/spinlock.h
@@ -157,6 +157,7 @@ unsigned long _read_lock_irqsave(rwlock_
void _read_unlock(rwlock_t *lock);
void _read_unlock_irq(rwlock_t *lock);
void _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags);
+int _read_trylock(rwlock_t *lock);
void _write_lock(rwlock_t *lock);
void _write_lock_irq(rwlock_t *lock);
@@ -210,6 +211,7 @@ int _rw_is_write_locked(rwlock_t *lock);
#define read_unlock(l) _read_unlock(l)
#define read_unlock_irq(l) _read_unlock_irq(l)
#define read_unlock_irqrestore(l, f) _read_unlock_irqrestore(l, f)
+#define read_trylock(l) _read_trylock(l)
#define write_lock(l) _write_lock(l)
#define write_lock_irq(l) _write_lock_irq(l)
++++++ 23103-x86-pirq-guest-eoi-check.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1301132521 0
# Node ID 48dac730a93b27ff60a340564e9a7afd7f9385f4
# Parent 8f001d864fefac689b7662bc9979eaddf4fd6e9c
x86: __pirq_guest_eoi() must check it is called for a fully
guest-bound irq before accessing desc->action.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/irq.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/irq.c
+++ xen-4.1.2-testing/xen/arch/x86/irq.c
@@ -1033,6 +1033,12 @@ static void __pirq_guest_eoi(struct doma
return;
}
+ if ( !(desc->status & IRQ_GUEST) )
+ {
+ spin_unlock_irq(&desc->lock);
+ return;
+ }
+
action = (irq_guest_action_t *)desc->action;
irq = desc - irq_desc;
++++++ 23127-vtd-bios-settings.patch ++++++
# HG changeset patch
# User Allen Kay <allen.m.kay@xxxxxxxxx>
# Date 1301755765 -3600
# Node ID 1046830079376a4b29fcad0cd037a834e808ed06
# Parent 89c23f58aa986092da0c9a7dfac1c41befbe1f3f
[VTD] check BIOS settings before enabling interrupt remapping or x2apic
Check flags field in ACPI DMAR structure before enabling interrupt
remapping or x2apic. This allows platform vendors to disable
interrupt remapping or x2apic features if on board BIOS does not
support them.
Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx>
# HG changeset patch
# User Allen Kay <allen.m.kay@xxxxxxxxx>
# Date 1302077462 -3600
# Node ID c7916d6f4dfba9d6c7eeb0fc2796068d75e2fb4a
# Parent 42fa70e0761bbb0596618ca5323664f31a2faa76
[VTD] Fixes to ACPI DMAR flag checks.
* platform_supports_{intremap,x2apic} should not be marked __init as
they are used during S3 resume.
* DMAR flags should be taken from the table passed to
acpi_parse_dmar() -- this is the trusted copy of the DMAR, when
running in TXT mode.
Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/apic.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/apic.c
+++ xen-4.1.2-testing/xen/arch/x86/apic.c
@@ -566,7 +566,7 @@ static void resume_x2apic(void)
mask_8259A();
mask_IO_APIC_setup(ioapic_entries);
- iommu_enable_IR();
+ iommu_enable_x2apic_IR();
__enable_x2apic();
restore_IO_APIC_setup(ioapic_entries);
@@ -783,7 +783,7 @@ int lapic_suspend(void)
local_irq_save(flags);
disable_local_APIC();
- iommu_disable_IR();
+ iommu_disable_x2apic_IR();
local_irq_restore(flags);
return 0;
}
@@ -1029,7 +1029,7 @@ void __init x2apic_bsp_setup(void)
mask_8259A();
mask_IO_APIC_setup(ioapic_entries);
- if ( iommu_enable_IR() )
+ if ( iommu_enable_x2apic_IR() )
{
if ( x2apic_enabled )
panic("Interrupt remapping could not be enabled while "
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/dmar.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/dmar.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/dmar.c
@@ -46,6 +46,7 @@ LIST_HEAD(acpi_rmrr_units);
LIST_HEAD(acpi_atsr_units);
LIST_HEAD(acpi_rhsa_units);
+static int __read_mostly dmar_flags;
static u64 igd_drhd_address;
u8 dmar_host_address_width;
@@ -684,6 +685,7 @@ static int __init acpi_parse_dmar(struct
int ret = 0;
dmar = (struct acpi_table_dmar *)table;
+ dmar_flags = dmar->flags;
if ( !iommu_enabled )
{
@@ -804,3 +806,22 @@ void acpi_dmar_zap(void)
dmar_table->signature[0] = 'X';
dmar_table->checksum -= 'X'-'D';
}
+
+int platform_supports_intremap(void)
+{
+ unsigned int flags = 0;
+
+ flags = DMAR_INTR_REMAP;
+ return ((dmar_flags & flags) == DMAR_INTR_REMAP);
+}
+
+int platform_supports_x2apic(void)
+{
+ unsigned int flags = 0;
+
+ if (!cpu_has_x2apic)
+ return 0;
+
+ flags = DMAR_INTR_REMAP | DMAR_X2APIC_OPT_OUT;
+ return ((dmar_flags & flags) == DMAR_INTR_REMAP);
+}
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/extern.h
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/extern.h
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/extern.h
@@ -87,5 +87,7 @@ void vtd_ops_preamble_quirk(struct iommu
void vtd_ops_postamble_quirk(struct iommu* iommu);
void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map);
void pci_vtd_quirk(struct pci_dev *pdev);
+int platform_supports_intremap(void);
+int platform_supports_x2apic(void);
#endif // _VTD_EXTERN_H_
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/intremap.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/intremap.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/intremap.c
@@ -741,6 +741,13 @@ int enable_intremap(struct iommu *iommu,
ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
+ if ( !platform_supports_intremap() )
+ {
+ dprintk(XENLOG_ERR VTDPREFIX,
+ "Platform firmware does not support interrupt remapping\n");
+ return -EINVAL;
+ }
+
ir_ctrl = iommu_ir_ctrl(iommu);
sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
@@ -847,10 +854,10 @@ out:
}
/*
- * This function is used to enable Interrutp remapping when
+ * This function is used to enable Interrupt remapping when
* enable x2apic
*/
-int iommu_enable_IR(void)
+int iommu_enable_x2apic_IR(void)
{
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
@@ -858,6 +865,9 @@ int iommu_enable_IR(void)
if ( !iommu_supports_eim() )
return -1;
+ if ( !platform_supports_x2apic() )
+ return -1;
+
for_each_drhd_unit ( drhd )
{
struct qi_ctrl *qi_ctrl = NULL;
@@ -907,7 +917,7 @@ int iommu_enable_IR(void)
* This function is used to disable Interrutp remapping when
* suspend local apic
*/
-void iommu_disable_IR(void)
+void iommu_disable_x2apic_IR(void)
{
struct acpi_drhd_unit *drhd;
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/iommu.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.c
@@ -2006,7 +2006,7 @@ static int init_vtd_hw(void)
{
iommu_intremap = 0;
dprintk(XENLOG_WARNING VTDPREFIX,
- "Failed to enable Interrupt Remapping!\n");
+ "Interrupt Remapping not enabled\n");
break;
}
}
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.h
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/iommu.h
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.h
@@ -22,6 +22,10 @@
#include <xen/types.h>
+/* DMAR Flags bits */
+#define DMAR_INTR_REMAP 0x1
+#define DMAR_X2APIC_OPT_OUT 0x2
+
/*
* Intel IOMMU register specification per version 1.0 public spec.
*/
Index: xen-4.1.2-testing/xen/include/xen/iommu.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/iommu.h
+++ xen-4.1.2-testing/xen/include/xen/iommu.h
@@ -66,8 +66,8 @@ struct iommu {
int iommu_setup(void);
int iommu_supports_eim(void);
-int iommu_enable_IR(void);
-void iommu_disable_IR(void);
+int iommu_enable_x2apic_IR(void);
+void iommu_disable_x2apic_IR(void);
int iommu_add_device(struct pci_dev *pdev);
int iommu_remove_device(struct pci_dev *pdev);
++++++ 23128-xentrace_correct_formula_to_calculate_t_info_pages.patch ++++++
changeset: 23128:4a335f1000ea
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sat Apr 02 15:50:19 2011 +0100
files: xen/common/trace.c
description:
xentrace: correct formula to calculate t_info_pages
The current formula to calculate t_info_pages, based on the initial
code, is slightly incorrect. It may allocate more than needed.
Each cpu has some pages/mfns stored as uint32_t.
That list is stored with an offset at tinfo.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -110,7 +110,7 @@ static int calculate_tbuf_size(unsigned
{
struct t_buf dummy;
typeof(dummy.prod) size;
- unsigned int t_info_words, t_info_bytes;
+ unsigned int t_info_words;
/* force maximum value for an unsigned type */
size = -1;
@@ -125,9 +125,8 @@ static int calculate_tbuf_size(unsigned
pages = size;
}
- t_info_words = num_online_cpus() * pages + t_info_first_offset;
- t_info_bytes = t_info_words * sizeof(uint32_t);
- t_info_pages = PFN_UP(t_info_bytes);
+ t_info_words = num_online_cpus() * pages * sizeof(uint32_t);
+ t_info_pages = PFN_UP(t_info_first_offset + t_info_words);
printk(XENLOG_INFO "xentrace: requesting %u t_info pages "
"for %u trace pages on %u cpus\n",
t_info_pages, pages, num_online_cpus());
++++++ 23129-xentrace_remove_unneeded_debug_printk.patch ++++++
changeset: 23129:219ba19aedeb
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sat Apr 02 15:50:47 2011 +0100
files: xen/common/trace.c
description:
xentrace: remove unneeded debug printk
The pointer value in case of an allocation failure is rather
uninteresting.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 1 -
1 file changed, 1 deletion(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -238,7 +238,6 @@ out_dealloc:
{
void *rawbuf = per_cpu(t_bufs, cpu);
per_cpu(t_bufs, cpu) = NULL;
- printk(XENLOG_DEBUG "xentrace: cpu %d p %p\n", cpu, rawbuf);
if ( rawbuf )
{
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
++++++
23173-xentrace_Move_register_cpu_notifier_call_into_boot-time_init..patch ++++++
changeset: 23173:94cef9aaf0cd
user: Keir Fraser <keir@xxxxxxx>
date: Wed Apr 06 15:52:50 2011 +0100
files: xen/common/trace.c
description:
xentrace: Move register_cpu_notifier() call into boot-time init.
We can't do it lazily from alloc_trace_bufs() as that gets called
later if tracing is enabled later by dom0.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/trace.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -225,8 +225,6 @@ static int alloc_trace_bufs(unsigned int
t_buf_highwater = data_size >> 1; /* 50% high water */
opt_tbuf_size = pages;
- register_cpu_notifier(&cpu_nfb);
-
printk("xentrace: initialised\n");
wmb(); /* above must be visible before tb_init_done flag set */
tb_init_done = 1;
@@ -309,6 +307,8 @@ int trace_will_trace_event(u32 event)
*/
void __init init_trace_bufs(void)
{
+ register_cpu_notifier(&cpu_nfb);
+
if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) )
{
printk(XENLOG_INFO "xentrace: allocation size %d failed, disabling\n",
++++++ 23199-amd-iommu-unmapped-intr-fault.patch ++++++
# HG changeset patch
# User Wei Wang <wei.wang2@xxxxxxx>
# Date 1302610857 -3600
# Node ID dbd98ab2f87facba8117bb881fa2ea5dfdb92960
# Parent 697ac895c11c6d5d82524de56796cee98fded2a5
amd iommu: Unmapped interrupt should generate IO page faults.
This helps us to debug interrupt issues.
Signed-off-by: Wei Wang <wei.wang2@xxxxxxx>
Index: xen-4.1.2-testing/xen/drivers/passthrough/amd/iommu_map.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/amd/iommu_map.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/amd/iommu_map.c
@@ -327,8 +327,9 @@ void amd_iommu_set_intremap_table(u32 *d
set_field_in_reg_u32(0xB, entry,
IOMMU_DEV_TABLE_INT_TABLE_LENGTH_MASK,
IOMMU_DEV_TABLE_INT_TABLE_LENGTH_SHIFT, &entry);
- /* ignore unmapped interrupts */
- set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+
+ /* unmapped interrupt results io page faults*/
+ set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_MASK,
IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_SHIFT, &entry);
set_field_in_reg_u32(int_valid ? IOMMU_CONTROL_ENABLED :
++++++ 23233-hvm-cr-access.patch ++++++
References: FATE#309900
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1303116432 -3600
# Node ID 1276926e3795b11ef6ac2f59df900d8e0ba9f54b
# Parent 07d832ad23021445bc56fafaeb2843c94d868005
vmx/hvm: move mov-cr handling functions to generic HVM code
Currently the handling of CR accesses intercepts is done much
differently in SVM and VMX. For future usage move the VMX part
into the generic HVM path and use the exported functions.
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
@@ -1298,6 +1298,86 @@ static void hvm_set_uc_mode(struct vcpu
return hvm_funcs.set_uc_mode(v);
}
+int hvm_mov_to_cr(unsigned int cr, unsigned int gpr)
+{
+ struct vcpu *curr = current;
+ unsigned long val, *reg;
+
+ if ( (reg = get_x86_gpr(guest_cpu_user_regs(), gpr)) == NULL )
+ {
+ gdprintk(XENLOG_ERR, "invalid gpr: %u\n", gpr);
+ goto exit_and_crash;
+ }
+
+ val = *reg;
+ HVMTRACE_LONG_2D(CR_WRITE, cr, TRC_PAR_LONG(val));
+ HVM_DBG_LOG(DBG_LEVEL_1, "CR%u, value = %lx", cr, val);
+
+ switch ( cr )
+ {
+ case 0:
+ return hvm_set_cr0(val);
+
+ case 3:
+ return hvm_set_cr3(val);
+
+ case 4:
+ return hvm_set_cr4(val);
+
+ case 8:
+ vlapic_set_reg(vcpu_vlapic(curr), APIC_TASKPRI, ((val & 0x0f) << 4));
+ break;
+
+ default:
+ gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
+ goto exit_and_crash;
+ }
+
+ return X86EMUL_OKAY;
+
+ exit_and_crash:
+ domain_crash(curr->domain);
+ return X86EMUL_UNHANDLEABLE;
+}
+
+int hvm_mov_from_cr(unsigned int cr, unsigned int gpr)
+{
+ struct vcpu *curr = current;
+ unsigned long val = 0, *reg;
+
+ if ( (reg = get_x86_gpr(guest_cpu_user_regs(), gpr)) == NULL )
+ {
+ gdprintk(XENLOG_ERR, "invalid gpr: %u\n", gpr);
+ goto exit_and_crash;
+ }
+
+ switch ( cr )
+ {
+ case 0:
+ case 2:
+ case 3:
+ case 4:
+ val = curr->arch.hvm_vcpu.guest_cr[cr];
+ break;
+ case 8:
+ val = (vlapic_get_reg(vcpu_vlapic(curr), APIC_TASKPRI) & 0xf0) >> 4;
+ break;
+ default:
+ gdprintk(XENLOG_ERR, "invalid cr: %u\n", cr);
+ goto exit_and_crash;
+ }
+
+ *reg = val;
+ HVMTRACE_LONG_2D(CR_READ, cr, TRC_PAR_LONG(val));
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%u, value = %lx", cr, val);
+
+ return X86EMUL_OKAY;
+
+ exit_and_crash:
+ domain_crash(curr->domain);
+ return X86EMUL_UNHANDLEABLE;
+}
+
int hvm_set_cr0(unsigned long value)
{
struct vcpu *v = current;
Index: xen-4.1.2-testing/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/vmx/vmx.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/vmx/vmx.c
@@ -1545,182 +1545,42 @@ static void vmx_invlpg_intercept(unsigne
vpid_sync_vcpu_gva(curr, vaddr);
}
-#define CASE_SET_REG(REG, reg) \
- case VMX_CONTROL_REG_ACCESS_GPR_ ## REG: regs->reg = value; break
-#define CASE_GET_REG(REG, reg) \
- case VMX_CONTROL_REG_ACCESS_GPR_ ## REG: value = regs->reg; break
-
-#define CASE_EXTEND_SET_REG \
- CASE_EXTEND_REG(S)
-#define CASE_EXTEND_GET_REG \
- CASE_EXTEND_REG(G)
-
-#ifdef __i386__
-#define CASE_EXTEND_REG(T)
-#else
-#define CASE_EXTEND_REG(T) \
- CASE_ ## T ## ET_REG(R8, r8); \
- CASE_ ## T ## ET_REG(R9, r9); \
- CASE_ ## T ## ET_REG(R10, r10); \
- CASE_ ## T ## ET_REG(R11, r11); \
- CASE_ ## T ## ET_REG(R12, r12); \
- CASE_ ## T ## ET_REG(R13, r13); \
- CASE_ ## T ## ET_REG(R14, r14); \
- CASE_ ## T ## ET_REG(R15, r15)
-#endif
-
-static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
-{
- unsigned long value;
- struct vcpu *v = current;
- struct vlapic *vlapic = vcpu_vlapic(v);
- int rc = 0;
- unsigned long old;
-
- switch ( gp )
- {
- CASE_GET_REG(EAX, eax);
- CASE_GET_REG(ECX, ecx);
- CASE_GET_REG(EDX, edx);
- CASE_GET_REG(EBX, ebx);
- CASE_GET_REG(EBP, ebp);
- CASE_GET_REG(ESI, esi);
- CASE_GET_REG(EDI, edi);
- CASE_GET_REG(ESP, esp);
- CASE_EXTEND_GET_REG;
- default:
- gdprintk(XENLOG_ERR, "invalid gp: %d\n", gp);
- goto exit_and_crash;
- }
-
- HVMTRACE_LONG_2D(CR_WRITE, cr, TRC_PAR_LONG(value));
-
- HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value);
-
- switch ( cr )
- {
- case 0:
- old = v->arch.hvm_vcpu.guest_cr[0];
- rc = !hvm_set_cr0(value);
- if (rc)
- hvm_memory_event_cr0(value, old);
- return rc;
-
- case 3:
- old = v->arch.hvm_vcpu.guest_cr[3];
- rc = !hvm_set_cr3(value);
- if (rc)
- hvm_memory_event_cr3(value, old);
- return rc;
-
- case 4:
- old = v->arch.hvm_vcpu.guest_cr[4];
- rc = !hvm_set_cr4(value);
- if (rc)
- hvm_memory_event_cr4(value, old);
- return rc;
-
- case 8:
- vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
- break;
-
- default:
- gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
- goto exit_and_crash;
- }
-
- return 1;
-
- exit_and_crash:
- domain_crash(v->domain);
- return 0;
-}
-
-/*
- * Read from control registers. CR0 and CR4 are read from the shadow.
- */
-static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
+static int vmx_cr_access(unsigned long exit_qualification)
{
- unsigned long value = 0;
- struct vcpu *v = current;
- struct vlapic *vlapic = vcpu_vlapic(v);
-
- switch ( cr )
- {
- case 3:
- value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
- break;
- case 8:
- value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
- value = (value & 0xF0) >> 4;
- break;
- default:
- gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
- domain_crash(v->domain);
- break;
- }
-
- switch ( gp ) {
- CASE_SET_REG(EAX, eax);
- CASE_SET_REG(ECX, ecx);
- CASE_SET_REG(EDX, edx);
- CASE_SET_REG(EBX, ebx);
- CASE_SET_REG(EBP, ebp);
- CASE_SET_REG(ESI, esi);
- CASE_SET_REG(EDI, edi);
- CASE_SET_REG(ESP, esp);
- CASE_EXTEND_SET_REG;
- default:
- printk("invalid gp: %d\n", gp);
- domain_crash(v->domain);
- break;
- }
-
- HVMTRACE_LONG_2D(CR_READ, cr, TRC_PAR_LONG(value));
-
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value);
-}
-
-static int vmx_cr_access(unsigned long exit_qualification,
- struct cpu_user_regs *regs)
-{
- unsigned int gp, cr;
- unsigned long value;
- struct vcpu *v = current;
+ struct vcpu *curr = current;
- switch ( exit_qualification & VMX_CONTROL_REG_ACCESS_TYPE )
+ switch ( VMX_CONTROL_REG_ACCESS_TYPE(exit_qualification) )
{
- case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR:
- gp = exit_qualification & VMX_CONTROL_REG_ACCESS_GPR;
- cr = exit_qualification & VMX_CONTROL_REG_ACCESS_NUM;
- return mov_to_cr(gp, cr, regs);
- case VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR:
- gp = exit_qualification & VMX_CONTROL_REG_ACCESS_GPR;
- cr = exit_qualification & VMX_CONTROL_REG_ACCESS_NUM;
- mov_from_cr(cr, gp, regs);
- break;
- case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
- {
- unsigned long old = v->arch.hvm_vcpu.guest_cr[0];
- v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
- vmx_update_guest_cr(v, 0);
-
- hvm_memory_event_cr0(v->arch.hvm_vcpu.guest_cr[0], old);
-
+ case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR: {
+ unsigned long gp = VMX_CONTROL_REG_ACCESS_GPR(exit_qualification);
+ unsigned long cr = VMX_CONTROL_REG_ACCESS_NUM(exit_qualification);
+ return hvm_mov_to_cr(cr, gp);
+ }
+ case VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR: {
+ unsigned long gp = VMX_CONTROL_REG_ACCESS_GPR(exit_qualification);
+ unsigned long cr = VMX_CONTROL_REG_ACCESS_NUM(exit_qualification);
+ return hvm_mov_from_cr(cr, gp);
+ }
+ case VMX_CONTROL_REG_ACCESS_TYPE_CLTS: {
+ unsigned long old = curr->arch.hvm_vcpu.guest_cr[0];
+ curr->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
+ vmx_update_guest_cr(curr, 0);
+ hvm_memory_event_cr0(curr->arch.hvm_vcpu.guest_cr[0], old);
HVMTRACE_0D(CLTS);
break;
}
- case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
- value = v->arch.hvm_vcpu.guest_cr[0];
+ case VMX_CONTROL_REG_ACCESS_TYPE_LMSW: {
+ unsigned long value = curr->arch.hvm_vcpu.guest_cr[0];
/* LMSW can: (1) set bits 0-3; (2) clear bits 1-3. */
value = (value & ~0xe) | ((exit_qualification >> 16) & 0xf);
HVMTRACE_LONG_1D(LMSW, value);
- return !hvm_set_cr0(value);
+ return hvm_set_cr0(value);
+ }
default:
BUG();
}
- return 1;
+ return X86EMUL_OKAY;
}
static const struct lbr_info {
@@ -2525,7 +2385,7 @@ asmlinkage void vmx_vmexit_handler(struc
case EXIT_REASON_CR_ACCESS:
{
exit_qualification = __vmread(EXIT_QUALIFICATION);
- if ( vmx_cr_access(exit_qualification, regs) )
+ if ( vmx_cr_access(exit_qualification) == X86EMUL_OKAY )
update_guest_eip(); /* Safe: MOV Cn, LMSW, CLTS */
break;
}
Index: xen-4.1.2-testing/xen/arch/x86/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/traps.c
@@ -368,6 +368,36 @@ void vcpu_show_execution_state(struct vc
vcpu_unpause(v);
}
+unsigned long *get_x86_gpr(struct cpu_user_regs *regs, unsigned int modrm_reg)
+{
+ void *p;
+
+ switch ( modrm_reg )
+ {
+ case 0: p = ®s->eax; break;
+ case 1: p = ®s->ecx; break;
+ case 2: p = ®s->edx; break;
+ case 3: p = ®s->ebx; break;
+ case 4: p = ®s->esp; break;
+ case 5: p = ®s->ebp; break;
+ case 6: p = ®s->esi; break;
+ case 7: p = ®s->edi; break;
+#if defined(__x86_64__)
+ case 8: p = ®s->r8; break;
+ case 9: p = ®s->r9; break;
+ case 10: p = ®s->r10; break;
+ case 11: p = ®s->r11; break;
+ case 12: p = ®s->r12; break;
+ case 13: p = ®s->r13; break;
+ case 14: p = ®s->r14; break;
+ case 15: p = ®s->r15; break;
+#endif
+ default: p = NULL; break;
+ }
+
+ return p;
+}
+
static char *trapstr(int trapnr)
{
static char *strings[] = {
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/support.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/support.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/support.h
@@ -137,5 +137,7 @@ int hvm_set_cr3(unsigned long value);
int hvm_set_cr4(unsigned long value);
int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content);
int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content);
+int hvm_mov_to_cr(unsigned int cr, unsigned int gpr);
+int hvm_mov_from_cr(unsigned int cr, unsigned int gpr);
#endif /* __ASM_X86_HVM_SUPPORT_H__ */
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/vmx/vmx.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/vmx/vmx.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -144,31 +144,15 @@ void vmx_update_cpu_exec_control(struct
* Exit Qualifications for MOV for Control Register Access
*/
/* 3:0 - control register number (CRn) */
-#define VMX_CONTROL_REG_ACCESS_NUM 0xf
+#define VMX_CONTROL_REG_ACCESS_NUM(eq) ((eq) & 0xf)
/* 5:4 - access type (CR write, CR read, CLTS, LMSW) */
-#define VMX_CONTROL_REG_ACCESS_TYPE 0x30
+#define VMX_CONTROL_REG_ACCESS_TYPE(eq) (((eq) >> 4) & 0x3)
+# define VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR 0
+# define VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR 1
+# define VMX_CONTROL_REG_ACCESS_TYPE_CLTS 2
+# define VMX_CONTROL_REG_ACCESS_TYPE_LMSW 3
/* 10:8 - general purpose register operand */
-#define VMX_CONTROL_REG_ACCESS_GPR 0xf00
-#define VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR (0 << 4)
-#define VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR (1 << 4)
-#define VMX_CONTROL_REG_ACCESS_TYPE_CLTS (2 << 4)
-#define VMX_CONTROL_REG_ACCESS_TYPE_LMSW (3 << 4)
-#define VMX_CONTROL_REG_ACCESS_GPR_EAX (0 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_ECX (1 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_EDX (2 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_EBX (3 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_ESP (4 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_EBP (5 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_ESI (6 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_EDI (7 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R8 (8 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R9 (9 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R10 (10 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R11 (11 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R12 (12 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R13 (13 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R14 (14 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R15 (15 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR(eq) (((eq) >> 8) & 0xf)
/*
* Access Rights
Index: xen-4.1.2-testing/xen/include/asm-x86/processor.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/processor.h
+++ xen-4.1.2-testing/xen/include/asm-x86/processor.h
@@ -592,6 +592,8 @@ int wrmsr_hypervisor_regs(uint32_t idx,
int microcode_update(XEN_GUEST_HANDLE(const_void), unsigned long len);
int microcode_resume_cpu(int cpu);
+unsigned long *get_x86_gpr(struct cpu_user_regs *regs, unsigned int modrm_reg);
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_X86_PROCESSOR_H */
++++++ 23234-svm-decode-assist-base.patch ++++++
References: FATE#309900
# HG changeset patch
# User Andre Przywara <andre.przywara@xxxxxxx>
# Date 1303116553 -3600
# Node ID bf7afd48339a18cd86d89337f3c055045fb78d3b
# Parent 1276926e3795b11ef6ac2f59df900d8e0ba9f54b
svm: add bit definitions for SVM DecodeAssist
Chapter 15.33 of recent APM Vol.2 manuals describe some additions
to SVM called DecodeAssist. Add the newly added fields to the VMCB
structure and name the associated CPUID bit.
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1303117802 -3600
# Node ID 381ab77db71a4739b8a4f4fdad4ef3504999f998
# Parent e324c4d1dd6eeb9417fec513640ca795bd0f5dd4
svm: decode-assists feature must depend on nextrip feature.
...since the decode-assist fast paths assume nextrip vmcb field is
valid.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -928,11 +928,16 @@ struct hvm_function_table * __init start
printk("SVM: Supported advanced features:\n");
+ /* DecodeAssists fast paths assume nextrip is valid for fast rIP update. */
+ if ( !cpu_has_svm_nrips )
+ clear_bit(SVM_FEATURE_DECODEASSISTS, &svm_feature_flags);
+
#define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; }
P(cpu_has_svm_npt, "Nested Page Tables (NPT)");
P(cpu_has_svm_lbrv, "Last Branch Record (LBR) Virtualisation");
P(cpu_has_svm_nrips, "Next-RIP Saved on #VMEXIT");
P(cpu_has_svm_cleanbits, "VMCB Clean Bits");
+ P(cpu_has_svm_decode, "DecodeAssists");
P(cpu_has_pause_filter, "Pause-Intercept Filter");
#undef P
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/svm.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
@@ -80,6 +80,7 @@ extern u32 svm_feature_flags;
#define cpu_has_svm_svml cpu_has_svm_feature(SVM_FEATURE_SVML)
#define cpu_has_svm_nrips cpu_has_svm_feature(SVM_FEATURE_NRIPS)
#define cpu_has_svm_cleanbits cpu_has_svm_feature(SVM_FEATURE_VMCBCLEAN)
+#define cpu_has_svm_decode cpu_has_svm_feature(SVM_FEATURE_DECODEASSISTS)
#define cpu_has_pause_filter cpu_has_svm_feature(SVM_FEATURE_PAUSEFILTER)
#endif /* __ASM_X86_HVM_SVM_H__ */
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/vmcb.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/vmcb.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -432,7 +432,9 @@ struct vmcb_struct {
vmcbcleanbits_t cleanbits; /* offset 0xC0 */
u32 res09; /* offset 0xC4 */
u64 nextrip; /* offset 0xC8 */
- u64 res10a[102]; /* offset 0xD0 pad to save area */
+ u8 guest_ins_len; /* offset 0xD0 */
+ u8 guest_ins[15]; /* offset 0xD1 */
+ u64 res10a[100]; /* offset 0xE0 pad to save area */
svm_segment_register_t es; /* offset 1024 - cleanbit 8 */
svm_segment_register_t cs; /* cleanbit 8 */
++++++ 23235-svm-decode-assist-crs.patch ++++++
References: FATE#309900
# HG changeset patch
# User Andre Przywara <andre.przywara@xxxxxxx>
# Date 1303117266 -3600
# Node ID 2c8ad607ece18b4740b9fc4ffe267a0e0893c141
# Parent bf7afd48339a18cd86d89337f3c055045fb78d3b
svm: implement CR access part of DecodeAssist
Newer SVM implementations (Bulldozer) now give the used general
purpose register on a MOV-CR intercept explictly. This avoids
fetching and decoding the instruction from guest's memory and speeds
up some Windows guest, which exercise CR8 quite often.
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -1039,6 +1039,22 @@ static void svm_vmexit_do_cpuid(struct c
__update_guest_eip(regs, inst_len);
}
+static void svm_vmexit_do_cr_access(
+ struct vmcb_struct *vmcb, struct cpu_user_regs *regs)
+{
+ int gp, cr, dir, rc;
+
+ cr = vmcb->exitcode - VMEXIT_CR0_READ;
+ dir = (cr > 15);
+ cr &= 0xf;
+ gp = vmcb->exitinfo1 & 0xf;
+
+ rc = dir ? hvm_mov_to_cr(cr, gp) : hvm_mov_from_cr(cr, gp);
+
+ if ( rc == X86EMUL_OKAY )
+ __update_guest_eip(regs, vmcb->nextrip - vmcb->rip);
+}
+
static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
{
HVMTRACE_0D(DR_WRITE);
@@ -1620,11 +1636,19 @@ asmlinkage void svm_vmexit_handler(struc
int dir = (vmcb->exitinfo1 & 1) ? IOREQ_READ : IOREQ_WRITE;
if ( handle_pio(port, bytes, dir) )
__update_guest_eip(regs, vmcb->exitinfo2 - vmcb->rip);
- break;
}
- /* fallthrough to emulation if a string instruction */
+ else if ( !handle_mmio() )
+ hvm_inject_exception(TRAP_gp_fault, 0, 0);
+ break;
+
case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
+ if ( cpu_has_svm_decode && (vmcb->exitinfo1 & (1ULL << 63)) )
+ svm_vmexit_do_cr_access(vmcb, regs);
+ else if ( !handle_mmio() )
+ hvm_inject_exception(TRAP_gp_fault, 0, 0);
+ break;
+
case VMEXIT_INVLPG:
case VMEXIT_INVLPGA:
if ( !handle_mmio() )
++++++ 23236-svm-decode-assist-invlpg.patch ++++++
References: FATE#309900
# HG changeset patch
# User Christoph Egger <Christoph.Egger@xxxxxxx>
# Date 1302700499 -3600
# Node ID 3b2182100ba2fa5c4a3a450e473717e2300aa8f1
# Parent 2284c79b606ac14ef5c5bc2c1cce62188b5bd9ee
x86/svm/asid: Introduce svm_invlpga()
Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx>
# HG changeset patch
# User Andre Przywara <andre.przywara@xxxxxxx>
# Date 1303117597 -3600
# Node ID e324c4d1dd6eeb9417fec513640ca795bd0f5dd4
# Parent 2c8ad607ece18b4740b9fc4ffe267a0e0893c141
svm: implement INVLPG part of DecodeAssist
Newer SVM implementations (Bulldozer) give the desired address on
a INVLPG intercept explicitly in the EXITINFO1 field of the VMCB.
Use this address to avoid a costly instruction fetch and decode
cycle.
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
# HG changeset patch
# User Christoph Egger <Christoph.Egger@xxxxxxx>
# Date 1305187246 -3600
# Node ID 19d6541c4abec3486c83de76102ec46d7fe22a16
# Parent b6e8e916ed2827fb1329de0de2e23ee5b6b78662
nestedsvm: update rip on invlpga intercept
Fixes endless loop.
Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/emulate.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/emulate.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/emulate.c
@@ -102,6 +102,7 @@ MAKE_INSTR(INT3, 1, 0xcc);
MAKE_INSTR(RDTSC, 2, 0x0f, 0x31);
MAKE_INSTR(PAUSE, 1, 0x90);
MAKE_INSTR(XSETBV, 3, 0x0f, 0x01, 0xd1);
+MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf);
static const u8 *opc_bytes[INSTR_MAX_COUNT] =
{
@@ -116,6 +117,7 @@ static const u8 *opc_bytes[INSTR_MAX_COU
[INSTR_RDTSC] = OPCODE_RDTSC,
[INSTR_PAUSE] = OPCODE_PAUSE,
[INSTR_XSETBV] = OPCODE_XSETBV,
+ [INSTR_INVLPGA] = OPCODE_INVLPGA,
};
static int fetch(struct vcpu *v, u8 *buf, unsigned long addr, int len)
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -1650,11 +1650,22 @@ asmlinkage void svm_vmexit_handler(struc
break;
case VMEXIT_INVLPG:
- case VMEXIT_INVLPGA:
- if ( !handle_mmio() )
+ if ( cpu_has_svm_decode )
+ {
+ svm_invlpg_intercept(vmcb->exitinfo1);
+ __update_guest_eip(regs, vmcb->nextrip - vmcb->rip);
+ }
+ else if ( !handle_mmio() )
hvm_inject_exception(TRAP_gp_fault, 0, 0);
break;
+ case VMEXIT_INVLPGA:
+ if ( (inst_len = __get_instruction_length(v, INSTR_INVLPGA)) == 0 )
+ break;
+ svm_invlpga(regs->eax, v->arch.hvm_vcpu.asid);
+ __update_guest_eip(regs, inst_len);
+ break;
+
case VMEXIT_VMMCALL:
if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
break;
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/asid.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/asid.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/asid.h
@@ -34,10 +34,7 @@ static inline void svm_asid_g_invlpg(str
{
#if 0
/* Optimization? */
- asm volatile (".byte 0x0F,0x01,0xDF \n"
- : /* output */
- : /* input */
- "a" (g_vaddr), "c"(v->arch.hvm_svm.vmcb->guest_asid) );
+ svm_invlpga(g_vaddr, v->arch.hvm_svm.vmcb->guest_asid);
#endif
/* Safe fallback. Take a new ASID. */
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/emulate.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/emulate.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/emulate.h
@@ -33,6 +33,7 @@ enum instruction_index {
INSTR_RDTSC,
INSTR_PAUSE,
INSTR_XSETBV,
+ INSTR_INVLPGA,
INSTR_MAX_COUNT /* Must be last - Number of instructions supported */
};
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/svm.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
@@ -62,6 +62,15 @@ static inline void svm_vmsave(void *vmcb
: : "a" (__pa(vmcb)) : "memory" );
}
+static inline void svm_invlpga(unsigned long vaddr, uint32_t asid)
+{
+ asm volatile (
+ ".byte 0x0f,0x01,0xdf"
+ : /* output */
+ : /* input */
+ "a" (vaddr), "c" (asid));
+}
+
extern u32 svm_feature_flags;
#define SVM_FEATURE_NPT 0 /* Nested page table support */
++++++ 23238-svm-decode-assist-insn-fetch.patch ++++++
References: FATE#309900
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1303130170 -3600
# Node ID 60f5df2afcbbe1e8d8438c2b7b8223d9d2102e06
# Parent 381ab77db71a4739b8a4f4fdad4ef3504999f998
svm: implement instruction fetch part of DecodeAssist (on #PF/#NPF)
Newer SVM implementations (Bulldozer) copy up to 15 bytes from the
instruction stream into the VMCB when a #PF or #NPF exception is
intercepted. This patch makes use of this information if available.
This saves us from a) traversing the guest's page tables, b) mapping
the guest's memory and c) copy the instructions from there into the
hypervisor's address space.
This speeds up #NPF intercepts quite a lot and avoids cache and TLB
trashing.
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/emulate.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/emulate.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/emulate.c
@@ -996,6 +996,8 @@ int hvm_emulate_one(
hvmemul_ctxt->insn_buf_eip = regs->eip;
hvmemul_ctxt->insn_buf_bytes =
+ hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf)
+ ? :
(hvm_virtual_to_linear_addr(
x86_seg_cs, &hvmemul_ctxt->seg_reg[x86_seg_cs],
regs->eip, sizeof(hvmemul_ctxt->insn_buf),
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -603,6 +603,21 @@ static void svm_set_rdtsc_exiting(struct
vmcb_set_general1_intercepts(vmcb, general1_intercepts);
}
+static unsigned int svm_get_insn_bytes(struct vcpu *v, uint8_t *buf)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ unsigned int len = v->arch.hvm_svm.cached_insn_len;
+
+ if ( len != 0 )
+ {
+ /* Latch and clear the cached instruction. */
+ memcpy(buf, vmcb->guest_ins, 15);
+ v->arch.hvm_svm.cached_insn_len = 0;
+ }
+
+ return len;
+}
+
static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
{
char *p;
@@ -1448,7 +1463,8 @@ static struct hvm_function_table __read_
.msr_read_intercept = svm_msr_read_intercept,
.msr_write_intercept = svm_msr_write_intercept,
.invlpg_intercept = svm_invlpg_intercept,
- .set_rdtsc_exiting = svm_set_rdtsc_exiting
+ .set_rdtsc_exiting = svm_set_rdtsc_exiting,
+ .get_insn_bytes = svm_get_insn_bytes
};
asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
@@ -1554,7 +1570,12 @@ asmlinkage void svm_vmexit_handler(struc
(unsigned long)regs->ecx, (unsigned long)regs->edx,
(unsigned long)regs->esi, (unsigned long)regs->edi);
- if ( paging_fault(va, regs) )
+ if ( cpu_has_svm_decode )
+ v->arch.hvm_svm.cached_insn_len = vmcb->guest_ins_len & 0xf;
+ rc = paging_fault(va, regs);
+ v->arch.hvm_svm.cached_insn_len = 0;
+
+ if ( rc )
{
if ( trace_will_trace_event(TRC_SHADOW) )
break;
@@ -1720,7 +1741,10 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_NPF:
perfc_incra(svmexits, VMEXIT_NPF_PERFC);
regs->error_code = vmcb->exitinfo1;
+ if ( cpu_has_svm_decode )
+ v->arch.hvm_svm.cached_insn_len = vmcb->guest_ins_len & 0xf;
svm_do_nested_pgfault(vmcb->exitinfo2);
+ v->arch.hvm_svm.cached_insn_len = 0;
break;
case VMEXIT_IRET: {
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/hvm.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/hvm.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/hvm.h
@@ -132,6 +132,9 @@ struct hvm_function_table {
int (*cpu_up)(void);
void (*cpu_down)(void);
+ /* Copy up to 15 bytes from cached instruction bytes at current rIP. */
+ unsigned int (*get_insn_bytes)(struct vcpu *v, uint8_t *buf);
+
/* Instruction intercepts: non-void return values are X86EMUL codes. */
void (*cpuid_intercept)(
unsigned int *eax, unsigned int *ebx,
@@ -328,6 +331,11 @@ static inline void hvm_cpu_down(void)
hvm_funcs.cpu_down();
}
+static inline unsigned int hvm_get_insn_bytes(struct vcpu *v, uint8_t *buf)
+{
+ return (hvm_funcs.get_insn_bytes ? hvm_funcs.get_insn_bytes(v, buf) : 0);
+}
+
enum hvm_task_switch_reason { TSW_jmp, TSW_iret, TSW_call_or_int };
void hvm_task_switch(
uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/vmcb.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/vmcb.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -498,6 +498,9 @@ struct arch_svm_struct {
int launch_core;
bool_t vmcb_in_sync; /* VMCB sync'ed with VMSAVE? */
+ /* VMCB has a cached instruction from #PF/#NPF Decode Assist? */
+ uint8_t cached_insn_len; /* Zero if no cached instruction. */
+
/* Upper four bytes are undefined in the VMCB, therefore we can't
* use the fields in the VMCB. Write a 64bit value and then read a 64bit
* value is fine unless there's a VMRUN/VMEXIT in between which clears
++++++
23239-xentrace_correct_overflow_check_for_number_of_per-cpu_trace_pages.patch
++++++
changeset: 23239:51d89366c859
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon Apr 18 15:12:04 2011 +0100
files: xen/common/trace.c
description:
xentrace: correct overflow check for number of per-cpu trace pages
The calculated number of per-cpu trace pages is stored in t_info and
shared with tools like xentrace. Since its an u16 the value may
overflow because the current check is based on u32. Using the u16
means each cpu could in theory use up to 256MB as trace
buffer. However such a large allocation will currently fail on x86 due
to the MAX_ORDER limit. Check both max theoretical number of pages
per cpu and max number of pages reachable by struct t_buf->prod/cons
variables with requested number of pages.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -104,25 +104,33 @@ static void calc_tinfo_first_offset(void
* calculate_tbuf_size - check to make sure that the proposed size will fit
* in the currently sized struct t_info and allows prod and cons to
* reach double the value without overflow.
+ * The t_info layout is fixed and cant be changed without breaking xentrace.
* Initialize t_info_pages based on number of trace pages.
*/
static int calculate_tbuf_size(unsigned int pages)
{
- struct t_buf dummy;
- typeof(dummy.prod) size;
+ struct t_buf dummy_size;
+ typeof(dummy_size.prod) max_size;
+ struct t_info dummy_pages;
+ typeof(dummy_pages.tbuf_size) max_pages;
unsigned int t_info_words;
/* force maximum value for an unsigned type */
- size = -1;
+ max_size = -1;
+ max_pages = -1;
/* max size holds up to n pages */
- size /= PAGE_SIZE;
- if ( pages > size )
+ max_size /= PAGE_SIZE;
+
+ if ( max_size < max_pages )
+ max_pages = max_size;
+
+ if ( pages > max_pages )
{
printk(XENLOG_INFO "xentrace: requested number of %u pages "
"reduced to %u\n",
- pages, (unsigned int)size);
- pages = size;
+ pages, max_pages);
+ pages = max_pages;
}
t_info_words = num_online_cpus() * pages * sizeof(uint32_t);
++++++ 23246-x86-xsave-enable.patch ++++++
References: bnc#718668
# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxx>
# Date 1303297371 -3600
# Node ID eb4505f8dd97f894ee4b4e1b55ea1272c05e6759
# Parent 3539ef956a378ad7fe39654ff9aca5b0e7bf8843
xen/x86: re-enable xsave by default now that it supports live migration.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -25,7 +25,7 @@ boolean_param("nofxsr", disable_x86_fxsr
static bool_t __cpuinitdata disable_x86_serial_nr;
boolean_param("noserialnumber", disable_x86_serial_nr);
-static bool_t __cpuinitdata use_xsave;
+static bool_t __cpuinitdata use_xsave = 1;
boolean_param("xsave", use_xsave);
unsigned int __devinitdata opt_cpuid_mask_ecx = ~0u;
integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx);
++++++ 23303-cpufreq-misc.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1304930762 -3600
# Node ID 82180954eda9cfe279e7ecf8c9ed4ffa29796bfb
# Parent c822888f36568f26e95f9844c7f0c5e06df7aa20
misc cpufreq cleanup
- proper handling of governor command line options when using the
default governor
- warning message for unrecognized command line options
- replacing a NR_CPUS sized array with per-CPU data
- a couple of __read_mostly annotations
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/cpufreq/cpufreq.c
+++ xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq.c
@@ -47,7 +47,8 @@
#include <acpi/acpi.h>
#include <acpi/cpufreq/cpufreq.h>
-static unsigned int usr_max_freq, usr_min_freq;
+static unsigned int __read_mostly usr_min_freq;
+static unsigned int __read_mostly usr_max_freq;
static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy);
struct cpufreq_dom {
@@ -57,7 +58,7 @@ struct cpufreq_dom {
};
static LIST_HEAD(cpufreq_dom_list_head);
-struct cpufreq_governor *cpufreq_opt_governor;
+struct cpufreq_governor *__read_mostly cpufreq_opt_governor;
LIST_HEAD(cpufreq_governor_list);
bool_t __read_mostly cpufreq_verbose;
@@ -543,6 +544,7 @@ void __init cpufreq_cmdline_parse(char *
{
static struct cpufreq_governor *__initdata cpufreq_governors[] =
{
+ CPUFREQ_DEFAULT_GOVERNOR,
&cpufreq_gov_userspace,
&cpufreq_gov_dbs,
&cpufreq_gov_performance,
@@ -576,8 +578,10 @@ void __init cpufreq_cmdline_parse(char *
}
if (str && !cpufreq_handle_common_option(str, val) &&
- cpufreq_governors[gov_index]->handle_option)
- cpufreq_governors[gov_index]->handle_option(str, val);
+ (!cpufreq_governors[gov_index]->handle_option ||
+ !cpufreq_governors[gov_index]->handle_option(str, val)))
+ printk(XENLOG_WARNING "cpufreq/%s: option '%s' not recognized\n",
+ cpufreq_governors[gov_index]->name, str);
str = end;
} while (str);
Index: xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq_misc_governors.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/cpufreq/cpufreq_misc_governors.c
+++ xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq_misc_governors.c
@@ -14,14 +14,17 @@
*
*/
+#include <xen/cpu.h>
#include <xen/init.h>
+#include <xen/percpu.h>
#include <xen/sched.h>
#include <acpi/cpufreq/cpufreq.h>
/*
* cpufreq userspace governor
*/
-static unsigned int cpu_set_freq[NR_CPUS];
+static unsigned int __read_mostly userspace_cmdline_freq;
+static DEFINE_PER_CPU(unsigned int, cpu_set_freq);
static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
unsigned int event)
@@ -35,21 +38,21 @@ static int cpufreq_governor_userspace(st
switch (event) {
case CPUFREQ_GOV_START:
- if (!cpu_set_freq[cpu])
- cpu_set_freq[cpu] = policy->cur;
+ if (!per_cpu(cpu_set_freq, cpu))
+ per_cpu(cpu_set_freq, cpu) = policy->cur;
break;
case CPUFREQ_GOV_STOP:
- cpu_set_freq[cpu] = 0;
+ per_cpu(cpu_set_freq, cpu) = 0;
break;
case CPUFREQ_GOV_LIMITS:
- if (policy->max < cpu_set_freq[cpu])
+ if (policy->max < per_cpu(cpu_set_freq, cpu))
ret = __cpufreq_driver_target(policy, policy->max,
CPUFREQ_RELATION_H);
- else if (policy->min > cpu_set_freq[cpu])
+ else if (policy->min > per_cpu(cpu_set_freq, cpu))
ret = __cpufreq_driver_target(policy, policy->min,
CPUFREQ_RELATION_L);
else
- ret = __cpufreq_driver_target(policy, cpu_set_freq[cpu],
+ ret = __cpufreq_driver_target(policy, per_cpu(cpu_set_freq, cpu),
CPUFREQ_RELATION_L);
break;
@@ -68,7 +71,7 @@ int write_userspace_scaling_setspeed(uns
if (!cpu_online(cpu) || !(policy = per_cpu(cpufreq_cpu_policy, cpu)))
return -EINVAL;
- cpu_set_freq[cpu] = freq;
+ per_cpu(cpu_set_freq, cpu) = freq;
if (freq < policy->min)
freq = policy->min;
@@ -78,19 +81,35 @@ int write_userspace_scaling_setspeed(uns
return __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
}
-static void __init
+static bool_t __init
cpufreq_userspace_handle_option(const char *name, const char *val)
{
if (!strcmp(name, "speed") && val) {
- unsigned int usr_cmdline_freq;
- unsigned int cpu;
+ userspace_cmdline_freq = simple_strtoul(val, NULL, 0);
+ return 1;
+ }
+ return 0;
+}
- usr_cmdline_freq = simple_strtoul(val, NULL, 0);
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- cpu_set_freq[cpu] = usr_cmdline_freq;
+static int cpufreq_userspace_cpu_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action)
+ {
+ case CPU_UP_PREPARE:
+ per_cpu(cpu_set_freq, cpu) = userspace_cmdline_freq;
+ break;
}
+
+ return NOTIFY_DONE;
}
+static struct notifier_block cpufreq_userspace_cpu_nfb = {
+ .notifier_call = cpufreq_userspace_cpu_callback
+};
+
struct cpufreq_governor cpufreq_gov_userspace = {
.name = "userspace",
.governor = cpufreq_governor_userspace,
@@ -99,6 +118,11 @@ struct cpufreq_governor cpufreq_gov_user
static int __init cpufreq_gov_userspace_init(void)
{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu)
+ per_cpu(cpu_set_freq, cpu) = userspace_cmdline_freq;
+ register_cpu_notifier(&cpufreq_userspace_cpu_nfb);
return cpufreq_register_governor(&cpufreq_gov_userspace);
}
__initcall(cpufreq_gov_userspace_init);
Index: xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq_ondemand.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/cpufreq/cpufreq_ondemand.c
+++ xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq_ondemand.c
@@ -296,7 +296,7 @@ int cpufreq_governor_dbs(struct cpufreq_
return 0;
}
-static void __init cpufreq_dbs_handle_option(const char *name, const char *val)
+static bool_t __init cpufreq_dbs_handle_option(const char *name, const char
*val)
{
if ( !strcmp(name, "rate") && val )
{
@@ -334,6 +334,9 @@ static void __init cpufreq_dbs_handle_op
}
dbs_tuners_ins.powersave_bias = tmp;
}
+ else
+ return 0;
+ return 1;
}
struct cpufreq_governor cpufreq_gov_dbs = {
Index: xen-4.1.2-testing/xen/include/acpi/cpufreq/cpufreq.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/acpi/cpufreq/cpufreq.h
+++ xen-4.1.2-testing/xen/include/acpi/cpufreq/cpufreq.h
@@ -93,7 +93,7 @@ struct cpufreq_governor {
char name[CPUFREQ_NAME_LEN];
int (*governor)(struct cpufreq_policy *policy,
unsigned int event);
- void (*handle_option)(const char *name, const char *value);
+ bool_t (*handle_option)(const char *name, const char *value);
struct list_head governor_list;
};
++++++ 23304-amd-oprofile-strings.patch ++++++
References: FATE#309893, FATE#309902, FATE#309903, FATE#309906
# HG changeset patch
# User Jacob Shin <jacob.shin@xxxxxxx>
# Date 1304930954 -3600
# Node ID 8981b582be3e2f6647ef5ff3d93e167436ed357a
# Parent 82180954eda9cfe279e7ecf8c9ed4ffa29796bfb
xenoprof: Update cpu_type to sync with upstream oprofile
Update xenoprof's cpu_type to match upstream oprofile. Currently AMD
Family 11h ~ Family 15h are broken due to string mismatches.
Signed-off-by: Jacob Shin <jacob.shin@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/oprofile/nmi_int.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/oprofile/nmi_int.c
+++ xen-4.1.2-testing/xen/arch/x86/oprofile/nmi_int.c
@@ -435,19 +435,19 @@ static int __init nmi_init(void)
break;
case 0x11:
model = &op_athlon_spec;
- cpu_type = "x86-64/family11";
+ cpu_type = "x86-64/family11h";
break;
case 0x12:
model = &op_athlon_spec;
- cpu_type = "x86-64/family12";
+ cpu_type = "x86-64/family12h";
break;
case 0x14:
model = &op_athlon_spec;
- cpu_type = "x86-64/family14";
+ cpu_type = "x86-64/family14h";
break;
case 0x15:
model = &op_athlon_spec;
- cpu_type = "x86-64/family15";
+ cpu_type = "x86-64/family15h";
break;
}
break;
++++++ 23305-amd-fam15-xenoprof.patch ++++++
References: FATE#309893, FATE#309906
# HG changeset patch
# User Jacob Shin <jacob.shin@xxxxxxx>
# Date 1304931187 -3600
# Node ID 014ee4e09644bd3ae55919d267f742c1d60c337a
# Parent 8981b582be3e2f6647ef5ff3d93e167436ed357a
xenoprof: Add support for AMD Family 15h processors
AMD Family 15h CPU mirrors legacy K7 performance monitor counters to
a new location, and adds 2 new counters. This patch updates xenoprof
to take advantage of the new counters.
Signed-off-by: Jacob Shin <jacob.shin@xxxxxxx>
Rename fam15h -> amd_fam15h in a few places, as suggested by Jan
Beulich.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/oprofile/nmi_int.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/oprofile/nmi_int.c
+++ xen-4.1.2-testing/xen/arch/x86/oprofile/nmi_int.c
@@ -30,7 +30,7 @@
struct op_counter_config counter_config[OP_MAX_COUNTER];
struct op_ibs_config ibs_config;
-static struct op_x86_model_spec const *__read_mostly model;
+struct op_x86_model_spec const *__read_mostly model;
static struct op_msrs cpu_msrs[NR_CPUS];
static unsigned long saved_lvtpc[NR_CPUS];
@@ -446,7 +446,7 @@ static int __init nmi_init(void)
cpu_type = "x86-64/family14h";
break;
case 0x15:
- model = &op_athlon_spec;
+ model = &op_amd_fam15h_spec;
cpu_type = "x86-64/family15h";
break;
}
Index: xen-4.1.2-testing/xen/arch/x86/oprofile/op_model_athlon.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/oprofile/op_model_athlon.c
+++ xen-4.1.2-testing/xen/arch/x86/oprofile/op_model_athlon.c
@@ -24,8 +24,13 @@
#include "op_x86_model.h"
#include "op_counter.h"
-#define NUM_COUNTERS 4
-#define NUM_CONTROLS 4
+#define K7_NUM_COUNTERS 4
+#define K7_NUM_CONTROLS 4
+
+#define FAM15H_NUM_COUNTERS 6
+#define FAM15H_NUM_CONTROLS 6
+
+#define MAX_COUNTERS FAM15H_NUM_COUNTERS
#define CTR_READ(msr_content,msrs,c) do {rdmsrl(msrs->counters[(c)].addr,
(msr_content));} while (0)
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned
int)(l), -1);} while (0)
@@ -44,9 +49,10 @@
#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 0x1ULL) << 41))
#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 0x1ULL) << 40))
-static unsigned long reset_value[NUM_COUNTERS];
+static unsigned long reset_value[MAX_COUNTERS];
extern char svm_stgi_label[];
+extern struct op_x86_model_spec const *__read_mostly model;
#ifdef CONFIG_X86_64
u32 ibs_caps = 0;
@@ -175,26 +181,44 @@ static void athlon_fill_in_addresses(str
msrs->controls[3].addr = MSR_K7_EVNTSEL3;
}
-
+static void fam15h_fill_in_addresses(struct op_msrs * const msrs)
+{
+ msrs->counters[0].addr = MSR_AMD_FAM15H_PERFCTR0;
+ msrs->counters[1].addr = MSR_AMD_FAM15H_PERFCTR1;
+ msrs->counters[2].addr = MSR_AMD_FAM15H_PERFCTR2;
+ msrs->counters[3].addr = MSR_AMD_FAM15H_PERFCTR3;
+ msrs->counters[4].addr = MSR_AMD_FAM15H_PERFCTR4;
+ msrs->counters[5].addr = MSR_AMD_FAM15H_PERFCTR5;
+
+ msrs->controls[0].addr = MSR_AMD_FAM15H_EVNTSEL0;
+ msrs->controls[1].addr = MSR_AMD_FAM15H_EVNTSEL1;
+ msrs->controls[2].addr = MSR_AMD_FAM15H_EVNTSEL2;
+ msrs->controls[3].addr = MSR_AMD_FAM15H_EVNTSEL3;
+ msrs->controls[4].addr = MSR_AMD_FAM15H_EVNTSEL4;
+ msrs->controls[5].addr = MSR_AMD_FAM15H_EVNTSEL5;
+}
+
static void athlon_setup_ctrs(struct op_msrs const * const msrs)
{
uint64_t msr_content;
int i;
+ unsigned int const nr_ctrs = model->num_counters;
+ unsigned int const nr_ctrls = model->num_controls;
/* clear all counters */
- for (i = 0 ; i < NUM_CONTROLS; ++i) {
+ for (i = 0 ; i < nr_ctrls; ++i) {
CTRL_READ(msr_content, msrs, i);
CTRL_CLEAR(msr_content);
CTRL_WRITE(msr_content, msrs, i);
}
/* avoid a false detection of ctr overflows in NMI handler */
- for (i = 0; i < NUM_COUNTERS; ++i) {
+ for (i = 0; i < nr_ctrs; ++i) {
CTR_WRITE(1, msrs, i);
}
/* enable active counters */
- for (i = 0; i < NUM_COUNTERS; ++i) {
+ for (i = 0; i < nr_ctrs; ++i) {
if (counter_config[i].enabled) {
reset_value[i] = counter_config[i].count;
@@ -300,6 +324,7 @@ static int athlon_check_ctrs(unsigned in
int mode = 0;
struct vcpu *v = current;
struct cpu_user_regs *guest_regs = guest_cpu_user_regs();
+ unsigned int const nr_ctrs = model->num_counters;
if (!guest_mode(regs) &&
(regs->eip == (unsigned long)svm_stgi_label)) {
@@ -312,7 +337,7 @@ static int athlon_check_ctrs(unsigned in
mode = xenoprofile_get_mode(v, regs);
}
- for (i = 0 ; i < NUM_COUNTERS; ++i) {
+ for (i = 0 ; i < nr_ctrs; ++i) {
CTR_READ(msr_content, msrs, i);
if (CTR_OVERFLOWED(msr_content)) {
xenoprof_log_event(current, regs, eip, mode, i);
@@ -373,7 +398,8 @@ static void athlon_start(struct op_msrs
{
uint64_t msr_content;
int i;
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ unsigned int const nr_ctrs = model->num_counters;
+ for (i = 0 ; i < nr_ctrs ; ++i) {
if (reset_value[i]) {
CTRL_READ(msr_content, msrs, i);
CTRL_SET_ACTIVE(msr_content);
@@ -401,10 +427,11 @@ static void athlon_stop(struct op_msrs c
{
uint64_t msr_content;
int i;
+ unsigned int const nr_ctrs = model->num_counters;
/* Subtle: stop on all counters to avoid race with
* setting our pm callback */
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ for (i = 0 ; i < nr_ctrs ; ++i) {
CTRL_READ(msr_content, msrs, i);
CTRL_SET_INACTIVE(msr_content);
CTRL_WRITE(msr_content, msrs, i);
@@ -512,11 +539,21 @@ void __init ibs_init(void)
#endif /* CONFIG_X86_64 */
struct op_x86_model_spec const op_athlon_spec = {
- .num_counters = NUM_COUNTERS,
- .num_controls = NUM_CONTROLS,
+ .num_counters = K7_NUM_COUNTERS,
+ .num_controls = K7_NUM_CONTROLS,
.fill_in_addresses = &athlon_fill_in_addresses,
.setup_ctrs = &athlon_setup_ctrs,
.check_ctrs = &athlon_check_ctrs,
.start = &athlon_start,
+ .stop = &athlon_stop
+};
+
+struct op_x86_model_spec const op_amd_fam15h_spec = {
+ .num_counters = FAM15H_NUM_COUNTERS,
+ .num_controls = FAM15H_NUM_CONTROLS,
+ .fill_in_addresses = &fam15h_fill_in_addresses,
+ .setup_ctrs = &athlon_setup_ctrs,
+ .check_ctrs = &athlon_check_ctrs,
+ .start = &athlon_start,
.stop = &athlon_stop
};
Index: xen-4.1.2-testing/xen/arch/x86/oprofile/op_x86_model.h
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/oprofile/op_x86_model.h
+++ xen-4.1.2-testing/xen/arch/x86/oprofile/op_x86_model.h
@@ -48,6 +48,7 @@ extern struct op_x86_model_spec op_arch_
extern struct op_x86_model_spec const op_p4_spec;
extern struct op_x86_model_spec const op_p4_ht2_spec;
extern struct op_x86_model_spec const op_athlon_spec;
+extern struct op_x86_model_spec const op_amd_fam15h_spec;
void arch_perfmon_setup_counters(void);
#endif /* OP_X86_MODEL_H */
Index: xen-4.1.2-testing/xen/include/asm-x86/msr-index.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/msr-index.h
+++ xen-4.1.2-testing/xen/include/asm-x86/msr-index.h
@@ -219,6 +219,19 @@
#define MSR_K8_VM_CR 0xc0010114
#define MSR_K8_VM_HSAVE_PA 0xc0010117
+#define MSR_AMD_FAM15H_EVNTSEL0 0xc0010200
+#define MSR_AMD_FAM15H_PERFCTR0 0xc0010201
+#define MSR_AMD_FAM15H_EVNTSEL1 0xc0010202
+#define MSR_AMD_FAM15H_PERFCTR1 0xc0010203
+#define MSR_AMD_FAM15H_EVNTSEL2 0xc0010204
+#define MSR_AMD_FAM15H_PERFCTR2 0xc0010205
+#define MSR_AMD_FAM15H_EVNTSEL3 0xc0010206
+#define MSR_AMD_FAM15H_PERFCTR3 0xc0010207
+#define MSR_AMD_FAM15H_EVNTSEL4 0xc0010208
+#define MSR_AMD_FAM15H_PERFCTR4 0xc0010209
+#define MSR_AMD_FAM15H_EVNTSEL5 0xc001020a
+#define MSR_AMD_FAM15H_PERFCTR5 0xc001020b
+
#define MSR_K8_FEATURE_MASK 0xc0011004
#define MSR_K8_EXT_FEATURE_MASK 0xc0011005
++++++ 23306-amd-fam15-vpmu.patch ++++++
References: FATE#309893, FATE#309906
# HG changeset patch
# User Jacob Shin <jacob.shin@xxxxxxx>
# Date 1304931286 -3600
# Node ID e787d4f2e5acdba48728a9390710de800315a540
# Parent 014ee4e09644bd3ae55919d267f742c1d60c337a
hvm: vpmu: Add support for AMD Family 15h processors
AMD Family 15h CPU mirrors legacy K7 performance monitor counters to
a new location, and adds 2 new counters. This patch updates HVM VPMU
to take advantage of the new counters.
Signed-off-by: Jacob Shin <jacob.shin@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -1142,6 +1142,18 @@ static int svm_msr_read_intercept(unsign
case MSR_K7_EVNTSEL1:
case MSR_K7_EVNTSEL2:
case MSR_K7_EVNTSEL3:
+ case MSR_AMD_FAM15H_PERFCTR0:
+ case MSR_AMD_FAM15H_PERFCTR1:
+ case MSR_AMD_FAM15H_PERFCTR2:
+ case MSR_AMD_FAM15H_PERFCTR3:
+ case MSR_AMD_FAM15H_PERFCTR4:
+ case MSR_AMD_FAM15H_PERFCTR5:
+ case MSR_AMD_FAM15H_EVNTSEL0:
+ case MSR_AMD_FAM15H_EVNTSEL1:
+ case MSR_AMD_FAM15H_EVNTSEL2:
+ case MSR_AMD_FAM15H_EVNTSEL3:
+ case MSR_AMD_FAM15H_EVNTSEL4:
+ case MSR_AMD_FAM15H_EVNTSEL5:
vpmu_do_rdmsr(msr, msr_content);
break;
@@ -1237,6 +1249,18 @@ static int svm_msr_write_intercept(unsig
case MSR_K7_EVNTSEL1:
case MSR_K7_EVNTSEL2:
case MSR_K7_EVNTSEL3:
+ case MSR_AMD_FAM15H_PERFCTR0:
+ case MSR_AMD_FAM15H_PERFCTR1:
+ case MSR_AMD_FAM15H_PERFCTR2:
+ case MSR_AMD_FAM15H_PERFCTR3:
+ case MSR_AMD_FAM15H_PERFCTR4:
+ case MSR_AMD_FAM15H_PERFCTR5:
+ case MSR_AMD_FAM15H_EVNTSEL0:
+ case MSR_AMD_FAM15H_EVNTSEL1:
+ case MSR_AMD_FAM15H_EVNTSEL2:
+ case MSR_AMD_FAM15H_EVNTSEL3:
+ case MSR_AMD_FAM15H_EVNTSEL4:
+ case MSR_AMD_FAM15H_EVNTSEL5:
vpmu_do_wrmsr(msr, msr_content);
break;
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/vpmu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/vpmu.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/vpmu.c
@@ -36,7 +36,9 @@
#include <public/hvm/save.h>
#include <asm/hvm/vpmu.h>
-#define NUM_COUNTERS 4
+#define F10H_NUM_COUNTERS 4
+#define F15H_NUM_COUNTERS 6
+#define MAX_NUM_COUNTERS F15H_NUM_COUNTERS
#define MSR_F10H_EVNTSEL_GO_SHIFT 40
#define MSR_F10H_EVNTSEL_EN_SHIFT 22
@@ -47,6 +49,11 @@
#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT))
#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1))))
+static int __read_mostly num_counters = 0;
+static u32 __read_mostly *counters = NULL;
+static u32 __read_mostly *ctrls = NULL;
+static bool_t __read_mostly k7_counters_mirrored = 0;
+
/* PMU Counter MSRs. */
u32 AMD_F10H_COUNTERS[] = {
MSR_K7_PERFCTR0,
@@ -63,10 +70,28 @@ u32 AMD_F10H_CTRLS[] = {
MSR_K7_EVNTSEL3
};
+u32 AMD_F15H_COUNTERS[] = {
+ MSR_AMD_FAM15H_PERFCTR0,
+ MSR_AMD_FAM15H_PERFCTR1,
+ MSR_AMD_FAM15H_PERFCTR2,
+ MSR_AMD_FAM15H_PERFCTR3,
+ MSR_AMD_FAM15H_PERFCTR4,
+ MSR_AMD_FAM15H_PERFCTR5
+};
+
+u32 AMD_F15H_CTRLS[] = {
+ MSR_AMD_FAM15H_EVNTSEL0,
+ MSR_AMD_FAM15H_EVNTSEL1,
+ MSR_AMD_FAM15H_EVNTSEL2,
+ MSR_AMD_FAM15H_EVNTSEL3,
+ MSR_AMD_FAM15H_EVNTSEL4,
+ MSR_AMD_FAM15H_EVNTSEL5
+};
+
/* storage for context switching */
struct amd_vpmu_context {
- u64 counters[NUM_COUNTERS];
- u64 ctrls[NUM_COUNTERS];
+ u64 counters[MAX_NUM_COUNTERS];
+ u64 ctrls[MAX_NUM_COUNTERS];
u32 hw_lapic_lvtpc;
};
@@ -78,10 +103,45 @@ static inline int get_pmu_reg_type(u32 a
if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) )
return MSR_TYPE_COUNTER;
+ if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) &&
+ (addr <= MSR_AMD_FAM15H_PERFCTR5 ) )
+ {
+ if (addr & 1)
+ return MSR_TYPE_COUNTER;
+ else
+ return MSR_TYPE_CTRL;
+ }
+
/* unsupported registers */
return -1;
}
+static inline u32 get_fam15h_addr(u32 addr)
+{
+ switch ( addr )
+ {
+ case MSR_K7_PERFCTR0:
+ return MSR_AMD_FAM15H_PERFCTR0;
+ case MSR_K7_PERFCTR1:
+ return MSR_AMD_FAM15H_PERFCTR1;
+ case MSR_K7_PERFCTR2:
+ return MSR_AMD_FAM15H_PERFCTR2;
+ case MSR_K7_PERFCTR3:
+ return MSR_AMD_FAM15H_PERFCTR3;
+ case MSR_K7_EVNTSEL0:
+ return MSR_AMD_FAM15H_EVNTSEL0;
+ case MSR_K7_EVNTSEL1:
+ return MSR_AMD_FAM15H_EVNTSEL1;
+ case MSR_K7_EVNTSEL2:
+ return MSR_AMD_FAM15H_EVNTSEL2;
+ case MSR_K7_EVNTSEL3:
+ return MSR_AMD_FAM15H_EVNTSEL3;
+ default:
+ break;
+ }
+
+ return addr;
+}
static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
{
@@ -110,12 +170,12 @@ static inline void context_restore(struc
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct amd_vpmu_context *ctxt = vpmu->context;
- for ( i = 0; i < NUM_COUNTERS; i++ )
- wrmsrl(AMD_F10H_CTRLS[i], ctxt->ctrls[i]);
+ for ( i = 0; i < num_counters; i++ )
+ wrmsrl(ctrls[i], ctxt->ctrls[i]);
- for ( i = 0; i < NUM_COUNTERS; i++ )
+ for ( i = 0; i < num_counters; i++ )
{
- wrmsrl(AMD_F10H_COUNTERS[i], ctxt->counters[i]);
+ wrmsrl(counters[i], ctxt->counters[i]);
/* Force an interrupt to allow guest reset the counter,
if the value is positive */
@@ -147,11 +207,11 @@ static inline void context_save(struct v
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct amd_vpmu_context *ctxt = vpmu->context;
- for ( i = 0; i < NUM_COUNTERS; i++ )
- rdmsrl(AMD_F10H_COUNTERS[i], ctxt->counters[i]);
+ for ( i = 0; i < num_counters; i++ )
+ rdmsrl(counters[i], ctxt->counters[i]);
- for ( i = 0; i < NUM_COUNTERS; i++ )
- rdmsrl(AMD_F10H_CTRLS[i], ctxt->ctrls[i]);
+ for ( i = 0; i < num_counters; i++ )
+ rdmsrl(ctrls[i], ctxt->ctrls[i]);
}
static void amd_vpmu_save(struct vcpu *v)
@@ -175,12 +235,18 @@ static void context_update(unsigned int
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct amd_vpmu_context *ctxt = vpmu->context;
- for ( i = 0; i < NUM_COUNTERS; i++ )
- if ( msr == AMD_F10H_COUNTERS[i] )
+ if ( k7_counters_mirrored &&
+ ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) )
+ {
+ msr = get_fam15h_addr(msr);
+ }
+
+ for ( i = 0; i < num_counters; i++ )
+ if ( msr == counters[i] )
ctxt->counters[i] = msr_content;
- for ( i = 0; i < NUM_COUNTERS; i++ )
- if ( msr == AMD_F10H_CTRLS[i] )
+ for ( i = 0; i < num_counters; i++ )
+ if ( msr == ctrls[i] )
ctxt->ctrls[i] = msr_content;
ctxt->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
@@ -235,10 +301,31 @@ static void amd_vpmu_initialise(struct v
{
struct amd_vpmu_context *ctxt = NULL;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ __u8 family = current_cpu_data.x86;
if ( vpmu->flags & VPMU_CONTEXT_ALLOCATED )
return;
+ if ( counters == NULL )
+ {
+ switch ( family )
+ {
+ case 0x15:
+ num_counters = F15H_NUM_COUNTERS;
+ counters = AMD_F15H_COUNTERS;
+ ctrls = AMD_F15H_CTRLS;
+ k7_counters_mirrored = 1;
+ break;
+ case 0x10:
+ default:
+ num_counters = F10H_NUM_COUNTERS;
+ counters = AMD_F10H_COUNTERS;
+ ctrls = AMD_F10H_CTRLS;
+ k7_counters_mirrored = 0;
+ break;
+ }
+ }
+
ctxt = xmalloc_bytes(sizeof(struct amd_vpmu_context));
if ( !ctxt )
Index: xen-4.1.2-testing/xen/arch/x86/hvm/vpmu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/vpmu.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/vpmu.c
@@ -101,6 +101,7 @@ void vpmu_initialise(struct vcpu *v)
switch ( family )
{
case 0x10:
+ case 0x15:
vpmu->arch_vpmu_ops = &amd_vpmu_ops;
break;
default:
++++++
23308-xentrace_Move_the_global_variable_t_info_first_offset_into_calculate_tbuf_size.patch
++++++
changeset: 23308:fb5313e64335
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon May 09 09:58:36 2011 +0100
files: xen/common/trace.c
description:
xentrace: Move the global variable t_info_first_offset into
calculate_tbuf_size()
Move the global variable t_info_first_offset into
calculate_tbuf_size() because it is only used there. Change the type
from u32 to uint32_t to match type in other places.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -55,7 +55,6 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
static u32 data_size;
-static u32 t_info_first_offset __read_mostly;
/* High water mark for trace buffers; */
/* Send virtual interrupt when buffer level reaches this point */
@@ -94,10 +93,10 @@ static struct notifier_block cpu_nfb = {
.notifier_call = cpu_callback
};
-static void calc_tinfo_first_offset(void)
+static uint32_t calc_tinfo_first_offset(void)
{
int offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
- t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes);
+ return fit_to_type(uint32_t, offset_in_bytes);
}
/**
@@ -107,7 +106,7 @@ static void calc_tinfo_first_offset(void
* The t_info layout is fixed and cant be changed without breaking xentrace.
* Initialize t_info_pages based on number of trace pages.
*/
-static int calculate_tbuf_size(unsigned int pages)
+static int calculate_tbuf_size(unsigned int pages, uint32_t
t_info_first_offset)
{
struct t_buf dummy_size;
typeof(dummy_size.prod) max_size;
@@ -156,6 +155,7 @@ static int alloc_trace_bufs(unsigned int
int i, cpu, order;
/* Start after a fixed-size array of NR_CPUS */
uint32_t *t_info_mfn_list;
+ uint32_t t_info_first_offset;
int offset;
if ( t_info )
@@ -165,9 +165,9 @@ static int alloc_trace_bufs(unsigned int
return -EINVAL;
/* Calculate offset in u32 of first mfn */
- calc_tinfo_first_offset();
+ t_info_first_offset = calc_tinfo_first_offset();
- pages = calculate_tbuf_size(pages);
+ pages = calculate_tbuf_size(pages, t_info_first_offset);
order = get_order_from_pages(pages);
t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);
++++++
23309-xentrace_Mark_data_size___read_mostly_because_its_only_written_once.patch
++++++
changeset: 23309:0ddcc8063690
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon May 09 09:59:13 2011 +0100
files: xen/common/trace.c
description:
xentrace: Mark data_size __read_mostly because it's only written once
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -54,7 +54,7 @@ static unsigned int t_info_pages;
static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static u32 data_size;
+static u32 data_size __read_mostly;
/* High water mark for trace buffers; */
/* Send virtual interrupt when buffer level reaches this point */
++++++
23310-xentrace_Remove_unneeded_cast_when_assigning_pointer_value_to_dst.patch
++++++
changeset: 23310:b7ca55907bd3
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon May 09 09:59:50 2011 +0100
files: xen/common/trace.c
description:
xentrace: Remove unneeded cast when assigning pointer value to dst
Remove unneeded cast when assigning pointer value to dst.
Both arrays are uint32_t and memcpy takes a void pointer.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -483,7 +483,7 @@ static inline void __insert_record(struc
const void *extra_data)
{
struct t_rec *rec;
- unsigned char *dst;
+ uint32_t *dst;
unsigned int extra_word = extra / sizeof(u32);
unsigned int local_rec_size = calc_rec_size(cycles, extra);
uint32_t next;
@@ -508,13 +508,13 @@ static inline void __insert_record(struc
rec->event = event;
rec->extra_u32 = extra_word;
- dst = (unsigned char *)rec->u.nocycles.extra_u32;
+ dst = rec->u.nocycles.extra_u32;
if ( (rec->cycles_included = cycles) != 0 )
{
u64 tsc = (u64)get_cycles();
rec->u.cycles.cycles_lo = (uint32_t)tsc;
rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32);
- dst = (unsigned char *)rec->u.cycles.extra_u32;
+ dst = rec->u.cycles.extra_u32;
}
if ( extra_data && extra )
++++++ 23334-amd-fam12+14-vpmu.patch ++++++
References: FATE#309902, FATE#309903
# HG changeset patch
# User Jacob Shin <jacob.shin@xxxxxxx>
# Date 1305188046 -3600
# Node ID 23e33ea79cac0303c729d4e82905054cded16348
# Parent fabdd682420c0c7b5e81f07f2f54211ebc11babe
hvm: vpmu: Enable HVM VPMU for AMD Family 12h and 14h processors
HVM VPMU support can be enabled for AMD Family 12h and 14h processors
by taking the same code path as 10h.
Signed-off-by: Jacob Shin <jacob.shin@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/vpmu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/vpmu.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/vpmu.c
@@ -317,6 +317,8 @@ static void amd_vpmu_initialise(struct v
k7_counters_mirrored = 1;
break;
case 0x10:
+ case 0x12:
+ case 0x14:
default:
num_counters = F10H_NUM_COUNTERS;
counters = AMD_F10H_COUNTERS;
Index: xen-4.1.2-testing/xen/arch/x86/hvm/vpmu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/vpmu.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/vpmu.c
@@ -101,6 +101,8 @@ void vpmu_initialise(struct vcpu *v)
switch ( family )
{
case 0x10:
+ case 0x12:
+ case 0x14:
case 0x15:
vpmu->arch_vpmu_ops = &amd_vpmu_ops;
break;
++++++ 23383-libxc-rm-static-vars.patch ++++++
# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxx>
# Date 1306228450 -3600
# Node ID 23b423a3955785c9a8679c3a877c3472066a2e1f
# Parent ba8da39c67298b19c2c277e5794981b7f23bedf2
libxc: save/restore: remove static context variables
20544:ad9d75d74bd5 and 20545:cc7d66ba0dad seemingly intended to change these
global static variables into stack variables but didn't remove the static
qualifier.
Also zero the entire struct once with memset rather than clearing fields
piecemeal in two different places.
Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Acked-by: Ian Jackson <ian.jackson.citrix.com>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
Acked-by: Vincent Hanquez <vincent.hanquez@xxxxxxxxxxxxx>
Index: xen-4.1.2-testing/tools/libxc/xc_domain_restore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_domain_restore.c
+++ xen-4.1.2-testing/tools/libxc/xc_domain_restore.c
@@ -1134,23 +1134,19 @@ int xc_domain_restore(xc_interface *xch,
int orig_io_fd_flags;
- static struct restore_ctx _ctx = {
- .live_p2m = NULL,
- .p2m = NULL,
- };
- static struct restore_ctx *ctx = &_ctx;
+ struct restore_ctx _ctx;
+ struct restore_ctx *ctx = &_ctx;
struct domain_info_context *dinfo = &ctx->dinfo;
pagebuf_init(&pagebuf);
memset(&tailbuf, 0, sizeof(tailbuf));
tailbuf.ishvm = hvm;
- /* For info only */
- ctx->nr_pfns = 0;
-
if ( superpages )
return 1;
+ memset(ctx, 0, sizeof(*ctx));
+
ctxt = xc_hypercall_buffer_alloc(xch, ctxt, sizeof(*ctxt));
if ( ctxt == NULL )
Index: xen-4.1.2-testing/tools/libxc/xc_domain_save.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_domain_save.c
+++ xen-4.1.2-testing/tools/libxc/xc_domain_save.c
@@ -958,11 +958,8 @@ int xc_domain_save(xc_interface *xch, in
unsigned long mfn;
struct outbuf ob;
- static struct save_ctx _ctx = {
- .live_p2m = NULL,
- .live_m2p = NULL,
- };
- static struct save_ctx *ctx = &_ctx;
+ struct save_ctx _ctx;
+ struct save_ctx *ctx = &_ctx;
struct domain_info_context *dinfo = &ctx->dinfo;
int completed = 0;
@@ -976,6 +973,8 @@ int xc_domain_save(xc_interface *xch, in
outbuf_init(xch, &ob, OUTBUF_SIZE);
+ memset(ctx, 0, sizeof(*ctx));
+
/* If no explicit control parameters given, use defaults */
max_iters = max_iters ? : DEF_MAX_ITERS;
max_factor = max_factor ? : DEF_MAX_FACTOR;
++++++
23404-xentrace_reduce_trace_buffer_size_to_something_mfn_offset_can_reach.patch
++++++
changeset: 23404:dd0eb070ee44
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu May 26 12:34:44 2011 +0100
files: xen/common/trace.c
description:
xentrace: reduce trace buffer size to something mfn_offset can reach
The start of the array which holds the list of mfns for each cpus
tracebuffer is stored in an unsigned short. This limits the total
amount of pages for each cpu as the number of active cpus increases.
Update the math in calculate_tbuf_size() to apply also this rule to
the max number of trace pages. Without this change the index can
overflow.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -112,11 +112,14 @@ static int calculate_tbuf_size(unsigned
typeof(dummy_size.prod) max_size;
struct t_info dummy_pages;
typeof(dummy_pages.tbuf_size) max_pages;
+ typeof(dummy_pages.mfn_offset[0]) max_mfn_offset;
+ unsigned int max_cpus = num_online_cpus();
unsigned int t_info_words;
/* force maximum value for an unsigned type */
max_size = -1;
max_pages = -1;
+ max_mfn_offset = -1;
/* max size holds up to n pages */
max_size /= PAGE_SIZE;
@@ -124,6 +127,18 @@ static int calculate_tbuf_size(unsigned
if ( max_size < max_pages )
max_pages = max_size;
+ /*
+ * max mfn_offset holds up to n pages per cpu
+ * The array of mfns for the highest cpu can start at the maximum value
+ * mfn_offset can hold. So reduce the number of cpus and also the
mfn_offset.
+ */
+ max_mfn_offset -= t_info_first_offset - 1;
+ max_cpus--;
+ if ( max_cpus )
+ max_mfn_offset /= max_cpus;
+ if ( max_mfn_offset < max_pages )
+ max_pages = max_mfn_offset;
+
if ( pages > max_pages )
{
printk(XENLOG_INFO "xentrace: requested number of %u pages "
++++++ 23405-xentrace_fix_type_of_offset_to_avoid_ouf-of-bounds_access.patch
++++++
changeset: 23405:3057b531d905
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu May 26 12:35:30 2011 +0100
files: xen/common/trace.c
description:
xentrace: fix type of offset to avoid ouf-of-bounds access
Update the type of the local offset variable to match the type where
this variable is stored. Also update the type of t_info_first_offset
because it has also a limited range.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -106,7 +106,7 @@ static uint32_t calc_tinfo_first_offset(
* The t_info layout is fixed and cant be changed without breaking xentrace.
* Initialize t_info_pages based on number of trace pages.
*/
-static int calculate_tbuf_size(unsigned int pages, uint32_t
t_info_first_offset)
+static int calculate_tbuf_size(unsigned int pages, uint16_t
t_info_first_offset)
{
struct t_buf dummy_size;
typeof(dummy_size.prod) max_size;
@@ -170,8 +170,8 @@ static int alloc_trace_bufs(unsigned int
int i, cpu, order;
/* Start after a fixed-size array of NR_CPUS */
uint32_t *t_info_mfn_list;
- uint32_t t_info_first_offset;
- int offset;
+ uint16_t t_info_first_offset;
+ uint16_t offset;
if ( t_info )
return -EBUSY;
@@ -179,7 +179,7 @@ static int alloc_trace_bufs(unsigned int
if ( pages == 0 )
return -EINVAL;
- /* Calculate offset in u32 of first mfn */
+ /* Calculate offset in units of u32 of first mfn */
t_info_first_offset = calc_tinfo_first_offset();
pages = calculate_tbuf_size(pages, t_info_first_offset);
++++++
23406-xentrace_update___insert_record_to_copy_the_trace_record_to_individual_mfns.patch
++++++
changeset: 23406:956438803307
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu May 26 12:36:03 2011 +0100
files: xen/common/trace.c
description:
xentrace: update __insert_record() to copy the trace record to individual mfns
Update __insert_record() to copy the trace record to individual mfns.
This is a prereq before changing the per-cpu allocation from
contiguous to non-contiguous allocation.
v2:
update offset calculation to use shift and mask
update type of mfn_offset to match type of data source
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 71 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 55 insertions(+), 16 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -52,7 +52,6 @@ static struct t_info *t_info;
static unsigned int t_info_pages;
static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
-static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
static u32 data_size __read_mostly;
@@ -208,7 +207,6 @@ static int alloc_trace_bufs(unsigned int
per_cpu(t_bufs, cpu) = buf = rawbuf;
buf->cons = buf->prod = 0;
- per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
}
offset = t_info_first_offset;
@@ -472,10 +470,16 @@ static inline u32 calc_bytes_avail(const
return data_size - calc_unconsumed_bytes(buf);
}
-static inline struct t_rec *next_record(const struct t_buf *buf,
- uint32_t *next)
+static unsigned char *next_record(const struct t_buf *buf, uint32_t *next,
+ unsigned char **next_page,
+ uint32_t *offset_in_page)
{
u32 x = buf->prod, cons = buf->cons;
+ uint16_t per_cpu_mfn_offset;
+ uint32_t per_cpu_mfn_nr;
+ uint32_t *mfn_list;
+ uint32_t mfn;
+ unsigned char *this_page;
barrier(); /* must read buf->prod and buf->cons only once */
*next = x;
@@ -487,7 +491,27 @@ static inline struct t_rec *next_record(
ASSERT(x < data_size);
- return (struct t_rec *)&this_cpu(t_data)[x];
+ /* add leading header to get total offset of next record */
+ x += sizeof(struct t_buf);
+ *offset_in_page = x & ~PAGE_MASK;
+
+ /* offset into array of mfns */
+ per_cpu_mfn_nr = x >> PAGE_SHIFT;
+ per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()];
+ mfn_list = (uint32_t *)t_info;
+ mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr];
+ this_page = mfn_to_virt(mfn);
+ if (per_cpu_mfn_nr + 1 >= opt_tbuf_size)
+ {
+ /* reached end of buffer? */
+ *next_page = NULL;
+ }
+ else
+ {
+ mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1];
+ *next_page = mfn_to_virt(mfn);
+ }
+ return this_page;
}
static inline void __insert_record(struct t_buf *buf,
@@ -497,28 +521,37 @@ static inline void __insert_record(struc
unsigned int rec_size,
const void *extra_data)
{
- struct t_rec *rec;
+ struct t_rec split_rec, *rec;
uint32_t *dst;
+ unsigned char *this_page, *next_page;
unsigned int extra_word = extra / sizeof(u32);
unsigned int local_rec_size = calc_rec_size(cycles, extra);
uint32_t next;
+ uint32_t offset;
+ uint32_t remaining;
BUG_ON(local_rec_size != rec_size);
BUG_ON(extra & 3);
- rec = next_record(buf, &next);
- if ( !rec )
+ this_page = next_record(buf, &next, &next_page, &offset);
+ if ( !this_page )
return;
- /* Double-check once more that we have enough space.
- * Don't bugcheck here, in case the userland tool is doing
- * something stupid. */
- if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
+
+ remaining = PAGE_SIZE - offset;
+
+ if ( unlikely(rec_size > remaining) )
{
- if ( printk_ratelimit() )
+ if ( next_page == NULL )
+ {
+ /* access beyond end of buffer */
printk(XENLOG_WARNING
- "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
- __func__, data_size, next, buf->cons, rec_size);
- return;
+ "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n",
+ __func__, data_size, next, buf->cons, rec_size, remaining);
+ return;
+ }
+ rec = &split_rec;
+ } else {
+ rec = (struct t_rec*)(this_page + offset);
}
rec->event = event;
@@ -535,6 +568,12 @@ static inline void __insert_record(struc
if ( extra_data && extra )
memcpy(dst, extra_data, extra);
+ if ( unlikely(rec_size > remaining) )
+ {
+ memcpy(this_page + offset, rec, remaining);
+ memcpy(next_page, (char *)rec + remaining, rec_size - remaining);
+ }
+
wmb();
next += rec_size;
++++++ 23407-xentrace_allocate_non-contiguous_per-cpu_trace_buffers.patch ++++++
changeset: 23407:b19898ac3e32
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu May 26 12:36:27 2011 +0100
files: xen/common/trace.c
description:
xentrace: allocate non-contiguous per-cpu trace buffers
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 92 ++++++++++++++++++++++++++++-------------------------
1 file changed, 50 insertions(+), 42 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -166,7 +166,7 @@ static int calculate_tbuf_size(unsigned
*/
static int alloc_trace_bufs(unsigned int pages)
{
- int i, cpu, order;
+ int i, cpu;
/* Start after a fixed-size array of NR_CPUS */
uint32_t *t_info_mfn_list;
uint16_t t_info_first_offset;
@@ -182,34 +182,11 @@ static int alloc_trace_bufs(unsigned int
t_info_first_offset = calc_tinfo_first_offset();
pages = calculate_tbuf_size(pages, t_info_first_offset);
- order = get_order_from_pages(pages);
t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);
if ( t_info == NULL )
- goto out_dealloc;
+ goto out_dealloc_t_info;
- /*
- * First, allocate buffers for all of the cpus. If any
- * fails, deallocate what you have so far and exit.
- */
- for_each_online_cpu(cpu)
- {
- void *rawbuf;
- struct t_buf *buf;
-
- if ( (rawbuf = alloc_xenheap_pages(
- order, MEMF_bits(32 + PAGE_SHIFT))) == NULL )
- {
- printk(XENLOG_INFO "xentrace: memory allocation failed "
- "on cpu %d\n", cpu);
- goto out_dealloc;
- }
-
- per_cpu(t_bufs, cpu) = buf = rawbuf;
- buf->cons = buf->prod = 0;
- }
-
- offset = t_info_first_offset;
t_info_mfn_list = (uint32_t *)t_info;
for(i = 0; i < t_info_pages; i++)
@@ -219,27 +196,53 @@ static int alloc_trace_bufs(unsigned int
t_info->tbuf_size = pages;
/*
- * Now share the pages so xentrace can map them, and write them in
- * the global t_info structure.
+ * Allocate buffers for all of the cpus.
+ * If any fails, deallocate what you have so far and exit.
*/
for_each_online_cpu(cpu)
{
- void *rawbuf = per_cpu(t_bufs, cpu);
- struct page_info *p = virt_to_page(rawbuf);
- uint32_t mfn = virt_to_mfn(rawbuf);
+ offset = t_info_first_offset + (cpu * pages);
+ t_info->mfn_offset[cpu] = offset;
for ( i = 0; i < pages; i++ )
{
- share_xen_page_with_privileged_guests(p + i, XENSHARE_writable);
-
- t_info_mfn_list[offset + i]=mfn + i;
+ void *p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT));
+ if ( !p )
+ {
+ printk(XENLOG_INFO "xentrace: memory allocation failed "
+ "on cpu %d after %d pages\n", cpu, i);
+ t_info_mfn_list[offset + i] = 0;
+ goto out_dealloc;
+ }
+ t_info_mfn_list[offset + i] = virt_to_mfn(p);
}
- t_info->mfn_offset[cpu]=offset;
- printk(XENLOG_INFO "xentrace: p%d mfn %"PRIx32" offset %d\n",
- cpu, mfn, offset);
- offset+=i;
+ }
+
+ /*
+ * Initialize buffers for all of the cpus.
+ */
+ for_each_online_cpu(cpu)
+ {
+ struct t_buf *buf;
+ struct page_info *pg;
spin_lock_init(&per_cpu(t_lock, cpu));
+
+ offset = t_info->mfn_offset[cpu];
+
+ /* Initialize the buffer metadata */
+ per_cpu(t_bufs, cpu) = buf = mfn_to_virt(t_info_mfn_list[offset]);
+ buf->cons = buf->prod = 0;
+
+ printk(XENLOG_INFO "xentrace: p%d mfn %x offset %u\n",
+ cpu, t_info_mfn_list[offset], offset);
+
+ /* Now share the trace pages */
+ for ( i = 0; i < pages; i++ )
+ {
+ pg = mfn_to_page(t_info_mfn_list[offset + i]);
+ share_xen_page_with_privileged_guests(pg, XENSHARE_writable);
+ }
}
data_size = (pages * PAGE_SIZE - sizeof(struct t_buf));
@@ -255,14 +258,19 @@ static int alloc_trace_bufs(unsigned int
out_dealloc:
for_each_online_cpu(cpu)
{
- void *rawbuf = per_cpu(t_bufs, cpu);
- per_cpu(t_bufs, cpu) = NULL;
- if ( rawbuf )
+ offset = t_info->mfn_offset[cpu];
+ if ( !offset )
+ continue;
+ for ( i = 0; i < pages; i++ )
{
- ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
- free_xenheap_pages(rawbuf, order);
+ uint32_t mfn = t_info_mfn_list[offset + i];
+ if ( !mfn )
+ break;
+ ASSERT(!(mfn_to_page(mfn)->count_info & PGC_allocated));
+ free_xenheap_pages(mfn_to_virt(mfn), 0);
}
}
+out_dealloc_t_info:
free_xenheap_pages(t_info, get_order_from_pages(t_info_pages));
t_info = NULL;
printk(XENLOG_WARNING "xentrace: allocation failed! Tracing disabled.\n");
++++++ 23437-amd-fam15-TSC-scaling.patch ++++++
References: FATE#309901
# HG changeset patch
# User Wei Huang <wei.huang2@xxxxxxx>
# Date 1306569488 -3600
# Node ID d7c755c25bb9d6ed77d64cb6736b6c4f339db1bf
# Parent f6ce871e568949f5817470f6c7bab6ed1f8f6c13
HVM/SVM: enable tsc scaling ratio for SVM
Future AMD CPUs support TSC scaling. It allows guests to have a
different TSC frequency from host system using this formula: guest_tsc
= host_tsc * tsc_ratio + vmcb_offset. The tsc_ratio is a 64bit MSR
contains a fixed-point number in 8.32 format (8 bits for integer part
and 32bits for fractional part). For instance 0x00000003_80000000
means tsc_ratio=3.5.
This patch enables TSC scaling ratio for SVM. With it, guest VMs don't
need take #VMEXIT to calculate a translated TSC value when it is
running under TSC emulation mode. This can substancially reduce the
rdtsc overhead.
Signed-off-by: Wei Huang <wei.huang2@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -588,6 +588,22 @@ static void svm_set_segment_register(str
static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ struct domain *d = v->domain;
+
+ /* Re-adjust the offset value when TSC_RATIO is available */
+ if ( cpu_has_tsc_ratio && d->arch.vtsc )
+ {
+ uint64_t host_tsc, guest_tsc;
+
+ rdtscll(host_tsc);
+ guest_tsc = hvm_get_guest_tsc(v);
+
+ /* calculate hi,lo parts in 64bits to prevent overflow */
+ offset = (((host_tsc >> 32) * d->arch.tsc_khz / cpu_khz) << 32) +
+ (host_tsc & 0xffffffffULL) * d->arch.tsc_khz / cpu_khz;
+ offset = guest_tsc - offset;
+ }
+
vmcb_set_tsc_offset(vmcb, offset);
}
@@ -638,6 +654,19 @@ static void svm_init_hypercall_page(stru
*(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
}
+static inline void svm_tsc_ratio_save(struct vcpu *v)
+{
+ /* Other vcpus might not have vtsc enabled. So disable TSC_RATIO here. */
+ if ( cpu_has_tsc_ratio && v->domain->arch.vtsc )
+ wrmsrl(MSR_AMD64_TSC_RATIO, DEFAULT_TSC_RATIO);
+}
+
+static inline void svm_tsc_ratio_load(struct vcpu *v)
+{
+ if ( cpu_has_tsc_ratio && v->domain->arch.vtsc )
+ wrmsrl(MSR_AMD64_TSC_RATIO, vcpu_tsc_ratio(v));
+}
+
static void svm_ctxt_switch_from(struct vcpu *v)
{
int cpu = smp_processor_id();
@@ -646,6 +675,7 @@ static void svm_ctxt_switch_from(struct
svm_save_dr(v);
vpmu_save(v);
+ svm_tsc_ratio_save(v);
svm_sync_vmcb(v);
svm_vmload(per_cpu(root_vmcb, cpu));
@@ -689,6 +719,7 @@ static void svm_ctxt_switch_to(struct vc
svm_vmload(vmcb);
vmcb->cleanbits.bytes = 0;
vpmu_load(v);
+ svm_tsc_ratio_load(v);
if ( cpu_has_rdtscp )
wrmsrl(MSR_TSC_AUX, hvm_msr_tsc_aux(v));
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/vmcb.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/vmcb.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/vmcb.c
@@ -165,7 +165,9 @@ static int construct_vmcb(struct vcpu *v
/* TSC. */
vmcb->_tsc_offset = 0;
- if ( v->domain->arch.vtsc )
+
+ /* Don't need to intercept RDTSC if CPU supports TSC rate scaling */
+ if ( v->domain->arch.vtsc && !cpu_has_tsc_ratio )
{
vmcb->_general1_intercepts |= GENERAL1_INTERCEPT_RDTSC;
vmcb->_general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP;
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/svm.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
@@ -91,5 +91,13 @@ extern u32 svm_feature_flags;
#define cpu_has_svm_cleanbits cpu_has_svm_feature(SVM_FEATURE_VMCBCLEAN)
#define cpu_has_svm_decode cpu_has_svm_feature(SVM_FEATURE_DECODEASSISTS)
#define cpu_has_pause_filter cpu_has_svm_feature(SVM_FEATURE_PAUSEFILTER)
+#define cpu_has_tsc_ratio cpu_has_svm_feature(SVM_FEATURE_TSCRATEMSR)
+
+/* TSC rate */
+#define DEFAULT_TSC_RATIO 0x0000000100000000ULL
+#define TSC_RATIO_RSVD_BITS 0xffffff0000000000ULL
+#define TSC_RATIO(g_khz, h_khz) ( (((u64)(g_khz)<<32)/(u64)(h_khz)) & \
+ ~TSC_RATIO_RSVD_BITS )
+#define vcpu_tsc_ratio(v) TSC_RATIO((v)->domain->arch.tsc_khz, cpu_khz)
#endif /* __ASM_X86_HVM_SVM_H__ */
Index: xen-4.1.2-testing/xen/include/asm-x86/msr-index.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/msr-index.h
+++ xen-4.1.2-testing/xen/include/asm-x86/msr-index.h
@@ -261,6 +261,9 @@
#define MSR_AMD_PATCHLEVEL 0x0000008b
#define MSR_AMD_PATCHLOADER 0xc0010020
+/* AMD TSC RATE MSR */
+#define MSR_AMD64_TSC_RATIO 0xc0000104
+
/* AMD OS Visible Workaround MSRs */
#define MSR_AMD_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD_OSVW_STATUS 0xc0010141
++++++ 23462-libxc-cpu-feature.patch ++++++
References: FATE#311951
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1307023264 -3600
# Node ID 4804af7048cafecfc014c30cfea374eb0a0360e8
# Parent 5839e797a1307fceffcd0b9ad35ed31644378b47
libxc: Simplify and clean up xc_cpufeature.h
* Remove Linux-private defns with no direct relation to CPUID
* Remove word offsets into Linux-defined cpu_caps array
* Hard tabs -> soft tabs
Signed-off-by: Keir Fraser <keir@xxxxxxx>
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1307118421 -3600
# Node ID bcd2476c2e2d00dc6371e52fbff66fe3178b7944
# Parent 55c5eff9bf84d4c5f3463c01f038edc1c46f30bc
libxc: Don't refer to meaningless 'word offsets' in xc_cpufeature.h
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/tools/libxc/xc_cpufeature.h
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_cpufeature.h
+++ xen-4.1.2-testing/tools/libxc/xc_cpufeature.h
@@ -17,134 +17,115 @@
#ifndef __LIBXC_CPUFEATURE_H
#define __LIBXC_CPUFEATURE_H
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers,
RDMSR, WRMSR */
-#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address
Extensions */
-#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture
*/
-#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture
*/
-#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and
FCOMI too if FPU present) */
-#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */
-#define X86_FEATURE_DS (0*32+21) /* Debug Store */
-#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
(fast save and restore */
- /* of FPU context), and CR4.OSFXSR
available */
-#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions
*/
-#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
-#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
-#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */
-#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx) */
+#define X86_FEATURE_FPU 0 /* Onboard FPU */
+#define X86_FEATURE_VME 1 /* Virtual Mode Extensions */
+#define X86_FEATURE_DE 2 /* Debugging Extensions */
+#define X86_FEATURE_PSE 3 /* Page Size Extensions */
+#define X86_FEATURE_TSC 4 /* Time Stamp Counter */
+#define X86_FEATURE_MSR 5 /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE 6 /* Physical Address Extensions */
+#define X86_FEATURE_MCE 7 /* Machine Check Architecture */
+#define X86_FEATURE_CX8 8 /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC 9 /* Onboard APIC */
+#define X86_FEATURE_SEP 11 /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR 12 /* Memory Type Range Registers */
+#define X86_FEATURE_PGE 13 /* Page Global Enable */
+#define X86_FEATURE_MCA 14 /* Machine Check Architecture */
+#define X86_FEATURE_CMOV 15 /* CMOV instruction */
+#define X86_FEATURE_PAT 16 /* Page Attribute Table */
+#define X86_FEATURE_PSE36 17 /* 36-bit PSEs */
+#define X86_FEATURE_PN 18 /* Processor serial number */
+#define X86_FEATURE_CLFLSH 19 /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DS 21 /* Debug Store */
+#define X86_FEATURE_ACPI 22 /* ACPI via MSR */
+#define X86_FEATURE_MMX 23 /* Multimedia Extensions */
+#define X86_FEATURE_FXSR 24 /* FXSAVE and FXRSTOR instructions */
+#define X86_FEATURE_XMM 25 /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2 26 /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP 27 /* CPU self snoop */
+#define X86_FEATURE_HT 28 /* Hyper-Threading */
+#define X86_FEATURE_ACC 29 /* Automatic clock control */
+#define X86_FEATURE_IA64 30 /* IA-64 processor */
+#define X86_FEATURE_PBE 31 /* Pending Break Enable */
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* AMD-defined CPU features, CPUID level 0x80000001 */
/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FFXSR (1*32+25) /* FFXSR instruction optimizations */
-#define X86_FEATURE_PAGE1GB (1*32+26) /* 1Gb large page support */
-#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */
-#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */
-#define X86_FEATURE_P3 (3*32+ 6) /* P3 */
-#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
-#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
-#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* Carry-less multiplication */
-#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */
-#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */
-#define X86_FEATURE_VMXE (4*32+ 5) /* Virtual Machine Extensions */
-#define X86_FEATURE_SMXE (4*32+ 6) /* Safer Mode Extensions */
-#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental Streaming SIMD
Extensions-3 */
-#define X86_FEATURE_CID (4*32+10) /* Context ID */
-#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capability MSR */
-#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */
-#define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */
-#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
-#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE (4*32+24) /* "tdt" TSC Deadline Timer */
-#define X86_FEATURE_AES (4*32+25) /* AES acceleration
instructions */
-#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions
*/
-#define X86_FEATURE_F16C (4*32+29) /* Half-precision convert instruction
*/
-#define X86_FEATURE_RDRAND (4*32+30) /* Digital Random Number Generator */
-#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running under some hypervisor */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
-#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */
-#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */
-#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE (5*32+ 10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN (5*32+ 11) /* PHE enabled */
-#define X86_FEATURE_PMM (5*32+ 12) /* PadLock Montgomery
Multiplier */
-#define X86_FEATURE_PMM_EN (5*32+ 13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE (7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
-#define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution
Protection */
-#define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_SYSCALL 11 /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP 19 /* MP Capable. */
+#define X86_FEATURE_NX 20 /* Execute Disable */
+#define X86_FEATURE_MMXEXT 22 /* AMD MMX extensions */
+#define X86_FEATURE_FFXSR 25 /* FFXSR instruction optimizations */
+#define X86_FEATURE_PAGE1GB 26 /* 1Gb large page support */
+#define X86_FEATURE_RDTSCP 27 /* RDTSCP */
+#define X86_FEATURE_LM 29 /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT 30 /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW 31 /* 3DNow! */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
+#define X86_FEATURE_XMM3 0 /* Streaming SIMD Extensions-3 */
+#define X86_FEATURE_PCLMULQDQ 1 /* Carry-less multiplication */
+#define X86_FEATURE_DTES64 2 /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT 3 /* Monitor/Mwait support */
+#define X86_FEATURE_DSCPL 4 /* CPL Qualified Debug Store */
+#define X86_FEATURE_VMXE 5 /* Virtual Machine Extensions */
+#define X86_FEATURE_SMXE 6 /* Safer Mode Extensions */
+#define X86_FEATURE_EST 7 /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 8 /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 9 /* Supplemental Streaming SIMD Exts-3 */
+#define X86_FEATURE_CID 10 /* Context ID */
+#define X86_FEATURE_CX16 13 /* CMPXCHG16B */
+#define X86_FEATURE_XTPR 14 /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM 15 /* Perf/Debug Capability MSR */
+#define X86_FEATURE_DCA 18 /* Direct Cache Access */
+#define X86_FEATURE_SSE4_1 19 /* Streaming SIMD Extensions 4.1 */
+#define X86_FEATURE_SSE4_2 20 /* Streaming SIMD Extensions 4.2 */
+#define X86_FEATURE_X2APIC 21 /* x2APIC */
+#define X86_FEATURE_POPCNT 23 /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE 24 /* "tdt" TSC Deadline Timer */
+#define X86_FEATURE_AES 25 /* AES acceleration instructions */
+#define X86_FEATURE_XSAVE 26 /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_AVX 28 /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C 29 /* Half-precision convert instruction */
+#define X86_FEATURE_RDRAND 30 /* Digital Random Number Generator */
+#define X86_FEATURE_HYPERVISOR 31 /* Running under some hypervisor */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001 */
+#define X86_FEATURE_XSTORE 2 /* on-CPU RNG present (xstore insn) */
+#define X86_FEATURE_XSTORE_EN 3 /* on-CPU RNG enabled */
+#define X86_FEATURE_XCRYPT 6 /* on-CPU crypto (xcrypt insn) */
+#define X86_FEATURE_XCRYPT_EN 7 /* on-CPU crypto enabled */
+#define X86_FEATURE_ACE2 8 /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN 9 /* ACE v2 enabled */
+#define X86_FEATURE_PHE 10 /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN 11 /* PHE enabled */
+#define X86_FEATURE_PMM 12 /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN 13 /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx */
+#define X86_FEATURE_LAHF_LM 0 /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY 1 /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM 2 /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC 3 /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY 4 /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM 5 /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A 6 /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE 7 /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH 8 /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW 9 /* OS Visible Workaround */
+#define X86_FEATURE_IBS 10 /* Instruction Based Sampling */
+#define X86_FEATURE_XOP 11 /* extended AVX instructions */
+#define X86_FEATURE_SKINIT 12 /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT 13 /* Watchdog timer */
+#define X86_FEATURE_LWP 15 /* Light Weight Profiling */
+#define X86_FEATURE_FMA4 16 /* 4 operands MAC instructions */
+#define X86_FEATURE_NODEID_MSR 19 /* NodeId MSR */
+#define X86_FEATURE_TBM 21 /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT 22 /* topology extensions CPUID leafs */
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
+#define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */
#endif /* __LIBXC_CPUFEATURE_H */
Index: xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_cpuid_x86.c
+++ xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c
@@ -25,9 +25,9 @@
#include "xc_cpufeature.h"
#include <xen/hvm/params.h>
-#define bitmaskof(idx) (1u << ((idx) & 31))
-#define clear_bit(idx, dst) ((dst) &= ~(1u << ((idx) & 31)))
-#define set_bit(idx, dst) ((dst) |= (1u << ((idx) & 31)))
+#define bitmaskof(idx) (1u << (idx))
+#define clear_bit(idx, dst) ((dst) &= ~(1u << (idx)))
+#define set_bit(idx, dst) ((dst) |= (1u << (idx)))
#define DEF_MAX_BASE 0x0000000du
#define DEF_MAX_EXT 0x80000008u
++++++ 23506-x86_Disable_set_gpfn_from_mfn_until_m2p_table_is_allocated..patch
++++++
changeset: 23506:d1309a79bde8
user: Keir Fraser <keir@xxxxxxx>
date: Fri Jun 10 08:18:33 2011 +0100
files: xen/arch/x86/x86_64/mm.c xen/include/asm-x86/mm.h
description:
x86: Disable set_gpfn_from_mfn until m2p table is allocated.
This is a prerequisite for calling set_gpfn_from_mfn() unconditionally
from free_heap_pages().
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/x86_64/mm.c | 4 ++++
xen/include/asm-x86/mm.h | 15 +++++++++++++--
2 files changed, 17 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
@@ -47,6 +47,8 @@ unsigned int __read_mostly pfn_pdx_hole_
unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
+bool_t __read_mostly machine_to_phys_mapping_valid = 0;
+
/* Top-level master (and idle-domain) page directory. */
l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
idle_pg_table[L4_PAGETABLE_ENTRIES];
@@ -800,6 +802,8 @@ void __init paging_init(void)
#undef CNT
#undef MFN
+ machine_to_phys_mapping_valid = 1;
+
/* Set up linear page table mapping. */
l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)],
l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR));
Index: xen-4.1.2-testing/xen/include/asm-x86/mm.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/mm.h
+++ xen-4.1.2-testing/xen/include/asm-x86/mm.h
@@ -469,7 +469,7 @@ TYPE_SAFE(unsigned long,mfn);
#ifdef CONFIG_COMPAT
#define compat_machine_to_phys_mapping ((unsigned int
*)RDWR_COMPAT_MPT_VIRT_START)
-#define set_gpfn_from_mfn(mfn, pfn) ({ \
+#define _set_gpfn_from_mfn(mfn, pfn) ({ \
struct domain *d = page_get_owner(__mfn_to_page(mfn)); \
unsigned long entry = (d && (d == dom_cow)) ? \
SHARED_M2P_ENTRY : (pfn); \
@@ -478,7 +478,7 @@ TYPE_SAFE(unsigned long,mfn);
machine_to_phys_mapping[(mfn)] = (entry)); \
})
#else
-#define set_gpfn_from_mfn(mfn, pfn) ({ \
+#define _set_gpfn_from_mfn(mfn, pfn) ({ \
struct domain *d = page_get_owner(__mfn_to_page(mfn)); \
if(d && (d == dom_cow)) \
machine_to_phys_mapping[(mfn)] = SHARED_M2P_ENTRY; \
@@ -486,6 +486,17 @@ TYPE_SAFE(unsigned long,mfn);
machine_to_phys_mapping[(mfn)] = (pfn); \
})
#endif
+
+/*
+ * Disable some users of set_gpfn_from_mfn() (e.g., free_heap_pages()) until
+ * the machine_to_phys_mapping is actually set up.
+ */
+extern bool_t machine_to_phys_mapping_valid;
+#define set_gpfn_from_mfn(mfn, pfn) do { \
+ if ( machine_to_phys_mapping_valid ) \
+ _set_gpfn_from_mfn(mfn, pfn); \
+} while (0)
+
#define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
#define mfn_to_gmfn(_d, mfn) \
++++++
23507-xenpaging_update_machine_to_phys_mapping_during_page_deallocation.patch
++++++
changeset: 23507:0a29c8c3ddf7
user: Keir Fraser <keir@xxxxxxx>
date: Fri Jun 10 08:19:07 2011 +0100
files: xen/common/page_alloc.c
description:
xenpaging: update machine_to_phys_mapping[] during page deallocation
The machine_to_phys_mapping[] array needs updating during page
deallocation. If that page is allocated again, a call to
get_gpfn_from_mfn() will still return an old gfn from another guest.
This will cause trouble because this gfn number has no or different
meaning in the context of the current guest.
This happens when the entire guest ram is paged-out before
xen_vga_populate_vram() runs. Then XENMEM_populate_physmap is called
with gfn 0xff000. A new page is allocated with alloc_domheap_pages.
This new page does not have a gfn yet. However, in
guest_physmap_add_entry() the passed mfn maps still to an old gfn
(perhaps from another old guest). This old gfn is in paged-out state
in this guests context and has no mfn anymore. As a result, the
ASSERT() triggers because p2m_is_ram() is true for p2m_ram_paging*
types. If the machine_to_phys_mapping[] array is updated properly,
both loops in guest_physmap_add_entry() turn into no-ops for the new
page and the mfn/gfn mapping will be done at the end of the function.
If XENMEM_add_to_physmap is used with XENMAPSPACE_gmfn,
get_gpfn_from_mfn() will return an appearently valid gfn. As a
result, guest_physmap_remove_page() is called. The ASSERT in
p2m_remove_page triggers because the passed mfn does not match the old
mfn for the passed gfn.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/page_alloc.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/common/page_alloc.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/page_alloc.c
+++ xen-4.1.2-testing/xen/common/page_alloc.c
@@ -528,7 +528,7 @@ static int reserve_offlined_page(struct
static void free_heap_pages(
struct page_info *pg, unsigned int order)
{
- unsigned long mask;
+ unsigned long mask, mfn = page_to_mfn(pg);
unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
unsigned int zone = page_to_zone(pg);
@@ -539,6 +539,10 @@ static void free_heap_pages(
for ( i = 0; i < (1 << order); i++ )
{
+ /* This page is not a guest frame any more. */
+ page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
+ set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY);
+
/*
* Cannot assume that count_info == 0, as there are some corner cases
* where it isn't the case and yet it isn't a bug:
++++++ 23508-vmx-proc-based-ctls-probe.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1307691167 -3600
# Node ID 2ef6bbee50371e1135236035ed1a9a7b8748e09f
# Parent 0a29c8c3ddf7395ea8e68c5f4cd8633023490022
x86/vmx: Small fixes to MSR_IA32_VMX_PROCBASED_CTLS feature probing.
Should check for VIRTUAL_INTR_PENDING as we unconditionally make use
of it. Also check for CR8 exiting unconditionally on x86/64, as this
is of use to nestedvmx, and every 64-bit cpu should support it.
Signed-off-by: Eddie Dong <eddie.dong@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/vmx/vmcs.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/vmx/vmcs.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/vmx/vmcs.c
@@ -148,6 +148,11 @@ static int vmx_init_vmcs_config(void)
MSR_IA32_VMX_PINBASED_CTLS, &mismatch);
min = (CPU_BASED_HLT_EXITING |
+ CPU_BASED_VIRTUAL_INTR_PENDING |
+#ifdef __x86_64__
+ CPU_BASED_CR8_LOAD_EXITING |
+ CPU_BASED_CR8_STORE_EXITING |
+#endif
CPU_BASED_INVLPG_EXITING |
CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING |
@@ -166,13 +171,9 @@ static int vmx_init_vmcs_config(void)
MSR_IA32_VMX_PROCBASED_CTLS, &mismatch);
_vmx_cpu_based_exec_control &= ~CPU_BASED_RDTSC_EXITING;
#ifdef __x86_64__
- if ( !(_vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) )
- {
- min |= CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING;
- _vmx_cpu_based_exec_control = adjust_vmx_controls(
- "CPU-Based Exec Control", min, opt,
- MSR_IA32_VMX_PROCBASED_CTLS, &mismatch);
- }
+ if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
+ _vmx_cpu_based_exec_control &=
+ ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
#endif
if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
++++++ 23509-x86_32_Fix_build_Define_machine_to_phys_mapping_valid.patch ++++++
changeset: 23509:782bc7b2661a
user: Keir Fraser <keir@xxxxxxx>
date: Fri Jun 10 13:51:39 2011 +0100
files: xen/arch/x86/x86_32/mm.c
description:
x86_32: Fix build: Define machine_to_phys_mapping_valid
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/x86_32/mm.c | 4 ++++
1 file changed, 4 insertions(+)
Index: xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_32/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
@@ -39,6 +39,8 @@ extern l1_pgentry_t l1_identmap[L1_PAGET
unsigned int __read_mostly PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
unsigned int __read_mostly PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
+bool_t __read_mostly machine_to_phys_mapping_valid = 0;
+
static unsigned long __read_mostly mpt_size;
void *alloc_xen_pagetable(void)
@@ -123,6 +125,8 @@ void __init paging_init(void)
#undef CNT
#undef MFN
+ machine_to_phys_mapping_valid = 1;
+
/* Create page tables for ioremap()/map_domain_page_global(). */
for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
{
++++++ 23511-amd-fam15-no-flush-for-C3.patch ++++++
References: FATE#309893
# HG changeset patch
# User Mark Langsdorf <mark.langsdorf@xxxxxxx>
# Date 1308051989 -3600
# Node ID 450f1d198e1e299b69489d513f591f0301cc5166
# Parent 864a3dd1d9b4664f1ece44c9eaf390969253b7a8
x86/amd: Eliminate cache flushing when entering C3 on select AMD processors
AMD Fam15h processors have a shared cache. It does not need
to be be flushed when entering C3 and doing so causes reduces
performance. Modify acpi_processor_power_init_bm_check to
prevent these processors from flushing when entering C3.
Signed-off-by: Mark Langsdorf <mark.langsdorf@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/acpi/cpu_idle.c
+++ xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
@@ -710,7 +710,8 @@ static void acpi_processor_power_init_bm
flags->bm_check = 0;
if ( num_online_cpus() == 1 )
flags->bm_check = 1;
- else if ( c->x86_vendor == X86_VENDOR_INTEL )
+ else if ( (c->x86_vendor == X86_VENDOR_INTEL) ||
+ ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 0x15)) )
{
/*
* Today all MP CPUs that support C3 share cache.
++++++ 23562-xenpaging_remove_unused_spinlock_in_pager.patch ++++++
changeset: 23562:8a7f52c59d64
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:02 2011 +0200
files: tools/xenpaging/mem_event.h tools/xenpaging/spinlock.h
tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: remove unused spinlock in pager
The spinlock code in the pager is a no-op because xenpaging is a single
threaded application. There is no locking when put_response() places a
response into the ringbuffer.
The only locking is inside the hypervisor, where mem_event_put_request() and
mem_event_get_response() lock the ringbuffer to protect multiple vcpus from
each other.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/mem_event.h | 5 ---
tools/xenpaging/spinlock.h | 69 --------------------------------------------
tools/xenpaging/xenpaging.c | 12 -------
tools/xenpaging/xenpaging.h | 1
4 files changed, 87 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/mem_event.h
+++ xen-4.1.2-testing/tools/xenpaging/mem_event.h
@@ -25,7 +25,6 @@
#define __XEN_MEM_EVENT_H__
-#include "spinlock.h"
#include "xc.h"
#include <xc_private.h>
@@ -33,9 +32,6 @@
#include <xen/mem_event.h>
-#define mem_event_ring_lock_init(_m) spin_lock_init(&(_m)->ring_lock)
-#define mem_event_ring_lock(_m) spin_lock(&(_m)->ring_lock)
-#define mem_event_ring_unlock(_m) spin_unlock(&(_m)->ring_lock)
typedef struct mem_event {
@@ -45,7 +41,6 @@ typedef struct mem_event {
mem_event_back_ring_t back_ring;
mem_event_shared_page_t *shared_page;
void *ring_page;
- spinlock_t ring_lock;
} mem_event_t;
Index: xen-4.1.2-testing/tools/xenpaging/spinlock.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/spinlock.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/******************************************************************************
- * tools/xenpaging/spinlock.h
- *
- * Spinlock implementation.
- *
- * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#ifndef __SPINLOCK_H__
-#define __SPINLOCK_H__
-
-
-#include "bitops.h"
-
-
-#define SPIN_LOCK_UNLOCKED 0
-
-
-typedef int spinlock_t;
-
-
-static inline void spin_lock(spinlock_t *lock)
-{
- while ( test_and_set_bit(1, lock) );
-}
-
-static inline void spin_lock_init(spinlock_t *lock)
-{
- *lock = SPIN_LOCK_UNLOCKED;
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
- *lock = SPIN_LOCK_UNLOCKED;
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
- return !test_and_set_bit(1, lock);
-}
-
-
-#endif // __SPINLOCK_H__
-
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -32,7 +32,6 @@
#include <xen/mem_event.h>
#include "bitops.h"
-#include "spinlock.h"
#include "file_ops.h"
#include "xc.h"
@@ -127,9 +126,6 @@ static xenpaging_t *xenpaging_init(domid
BACK_RING_INIT(&paging->mem_event.back_ring,
(mem_event_sring_t *)paging->mem_event.ring_page,
PAGE_SIZE);
-
- /* Initialise lock */
- mem_event_ring_lock_init(&paging->mem_event);
/* Initialise Xen */
rc = xc_mem_event_enable(xch, paging->mem_event.domain_id,
@@ -302,8 +298,6 @@ static int get_request(mem_event_t *mem_
mem_event_back_ring_t *back_ring;
RING_IDX req_cons;
- mem_event_ring_lock(mem_event);
-
back_ring = &mem_event->back_ring;
req_cons = back_ring->req_cons;
@@ -315,8 +309,6 @@ static int get_request(mem_event_t *mem_
back_ring->req_cons = req_cons;
back_ring->sring->req_event = req_cons + 1;
- mem_event_ring_unlock(mem_event);
-
return 0;
}
@@ -325,8 +317,6 @@ static int put_response(mem_event_t *mem
mem_event_back_ring_t *back_ring;
RING_IDX rsp_prod;
- mem_event_ring_lock(mem_event);
-
back_ring = &mem_event->back_ring;
rsp_prod = back_ring->rsp_prod_pvt;
@@ -338,8 +328,6 @@ static int put_response(mem_event_t *mem
back_ring->rsp_prod_pvt = rsp_prod;
RING_PUSH_RESPONSES(back_ring);
- mem_event_ring_unlock(mem_event);
-
return 0;
}
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -25,7 +25,6 @@
#define __XEN_PAGING2_H__
-#include "spinlock.h"
#include "xc.h"
#include <xc_private.h>
++++++ 23571-vtd-fault-verbosity.patch ++++++
# HG changeset patch
# User Allen Kay <allen.m.kay@xxxxxxxxx>
# Date 1308823884 -3600
# Node ID d3ac71f22e8621d9a7604f82f3976337e6c97a9a
# Parent 065ca14be963fe4da55d629ed0b3692a14253a86
[VTD] print out debug message in vt-d fault handler only when iommu=debug is set
Print out debug messages in vtd_page_fault() handler only when
iommu=debug is set xen boot parameter.
Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx>
Index: xen-4.1.2-testing/xen/drivers/passthrough/amd/iommu_acpi.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/amd/iommu_acpi.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -822,7 +822,7 @@ static int __init parse_ivrs_table(struc
BUG_ON(!table);
- if ( amd_iommu_debug )
+ if ( iommu_debug )
dump_acpi_table_header(table);
/* parse IVRS blocks */
Index: xen-4.1.2-testing/xen/drivers/passthrough/iommu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/iommu.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/iommu.c
@@ -48,7 +48,7 @@ bool_t __read_mostly iommu_snoop = 1;
bool_t __read_mostly iommu_qinval = 1;
bool_t __read_mostly iommu_intremap = 1;
bool_t __read_mostly iommu_hap_pt_share;
-bool_t __read_mostly amd_iommu_debug;
+bool_t __read_mostly iommu_debug;
bool_t __read_mostly amd_iommu_perdev_intremap;
static void __init parse_iommu_param(char *s)
@@ -74,8 +74,8 @@ static void __init parse_iommu_param(cha
iommu_qinval = 0;
else if ( !strcmp(s, "no-intremap") )
iommu_intremap = 0;
- else if ( !strcmp(s, "amd-iommu-debug") )
- amd_iommu_debug = 1;
+ else if ( !strcmp(s, "debug") )
+ iommu_debug = 1;
else if ( !strcmp(s, "amd-iommu-perdev-intremap") )
amd_iommu_perdev_intremap = 1;
else if ( !strcmp(s, "dom0-passthrough") )
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/iommu.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.c
@@ -821,7 +821,7 @@ static int iommu_page_fault_do_one(struc
if ( fault_type == DMA_REMAP )
{
- dprintk(XENLOG_WARNING VTDPREFIX,
+ INTEL_IOMMU_DEBUG(
"DMAR:[%s] Request device [%02x:%02x.%d] "
"fault addr %"PRIx64", iommu reg = %p\n"
"DMAR:[fault reason %02xh] %s\n",
@@ -830,12 +830,13 @@ static int iommu_page_fault_do_one(struc
PCI_FUNC(source_id & 0xFF), addr, iommu->reg,
fault_reason, reason);
#ifndef __i386__ /* map_domain_page() cannot be used in this context */
- print_vtd_entries(iommu, (source_id >> 8),
+ if (iommu_debug)
+ print_vtd_entries(iommu, (source_id >> 8),
(source_id & 0xff), (addr >> PAGE_SHIFT));
#endif
}
else
- dprintk(XENLOG_WARNING VTDPREFIX,
+ INTEL_IOMMU_DEBUG(
"INTR-REMAP: Request device [%02x:%02x.%d] "
"fault index %"PRIx64", iommu reg = %p\n"
"INTR-REMAP:[fault reason %02xh] %s\n",
@@ -849,26 +850,19 @@ static int iommu_page_fault_do_one(struc
static void iommu_fault_status(u32 fault_status)
{
if ( fault_status & DMA_FSTS_PFO )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Fault Overflow\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Fault Overflow\n");
if ( fault_status & DMA_FSTS_PPF )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Primary Pending Fault\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Primary Pending Fault\n");
if ( fault_status & DMA_FSTS_AFO )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Advanced Fault Overflow\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Advanced Fault Overflow\n");
if ( fault_status & DMA_FSTS_APF )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Advanced Pending Fault\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Advanced Pending Fault\n");
if ( fault_status & DMA_FSTS_IQE )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Invalidation Queue Error\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Queue Error\n");
if ( fault_status & DMA_FSTS_ICE )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Invalidation Completion Error\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Completion
Error\n");
if ( fault_status & DMA_FSTS_ITE )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Invalidation Time-out Error\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Time-out Error\n");
}
#define PRIMARY_FAULT_REG_LEN (16)
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.h
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/iommu.h
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.h
@@ -512,4 +512,11 @@ struct intel_iommu {
struct acpi_drhd_unit *drhd;
};
+#define INTEL_IOMMU_DEBUG(fmt, args...) \
+ do \
+ { \
+ if ( iommu_debug ) \
+ dprintk(XENLOG_WARNING VTDPREFIX, fmt, ## args); \
+ } while(0)
+
#endif
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -34,7 +34,7 @@
#define AMD_IOMMU_DEBUG(fmt, args...) \
do \
{ \
- if ( amd_iommu_debug ) \
+ if ( iommu_debug ) \
printk(XENLOG_INFO "AMD-Vi: " fmt, ## args); \
} while(0)
Index: xen-4.1.2-testing/xen/include/xen/iommu.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/iommu.h
+++ xen-4.1.2-testing/xen/include/xen/iommu.h
@@ -31,7 +31,7 @@ extern bool_t force_iommu, iommu_verbose
extern bool_t iommu_workaround_bios_bug, iommu_passthrough;
extern bool_t iommu_snoop, iommu_qinval, iommu_intremap;
extern bool_t iommu_hap_pt_share;
-extern bool_t amd_iommu_debug;
+extern bool_t iommu_debug;
extern bool_t amd_iommu_perdev_intremap;
/* Does this domain have a P2M table we can use as its IOMMU pagetable? */
++++++ 23574-x86-dom0-compressed-ELF.patch ++++++
References: fate#311376, fate#311529, bnc#578927, bnc#628554
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1308825237 -3600
# Node ID d7644abc218d3232b9d957ce94fc4b4bcc1f456e
# Parent 584c2e5e03d96f912cdfe90f8e9f910d5d661706
x86: allow Dom0 image to be compressed ELF
Rather than being able to decompress only the payloads of bzImage
containers, extend the logic to also decompress simple compressed ELF
images. At once, allow uncompressed bzImage payloads.
This is a prerequisite for native EFI booting support (where, in the
absence of a capable secondary boot loader, the image will always be
in compressed form).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/bzimage.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/bzimage.c
+++ xen-4.1.2-testing/xen/arch/x86/bzimage.c
@@ -5,6 +5,7 @@
#include <xen/string.h>
#include <xen/types.h>
#include <xen/decompress.h>
+#include <xen/libelf.h>
#include <asm/bzimage.h>
#define HEAPORDER 3
@@ -199,25 +200,36 @@ static __init int bzimage_check(struct s
return 1;
}
-int __init bzimage_headroom(char *image_start, unsigned long image_length)
+static unsigned long __initdata orig_image_len;
+
+unsigned long __init bzimage_headroom(char *image_start,
+ unsigned long image_length)
{
struct setup_header *hdr = (struct setup_header *)image_start;
- char *img;
- int err, headroom;
+ int err;
+ unsigned long headroom;
err = bzimage_check(hdr, image_length);
- if (err < 1)
+ if ( err < 0 )
return 0;
- img = image_start + (hdr->setup_sects+1) * 512;
- img += hdr->payload_offset;
+ if ( err > 0 )
+ {
+ image_start += (hdr->setup_sects + 1) * 512 + hdr->payload_offset;
+ image_length = hdr->payload_length;
+ }
+
+ if ( elf_is_elfbinary(image_start) )
+ return 0;
- headroom = output_length(img, hdr->payload_length);
- if (gzip_check(img, hdr->payload_length)) {
+ orig_image_len = image_length;
+ headroom = output_length(image_start, image_length);
+ if (gzip_check(image_start, image_length))
+ {
headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */
headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */
} else
- headroom += hdr->payload_length;
+ headroom += image_length;
headroom = (headroom + 4095) & ~4095;
return headroom;
@@ -229,18 +241,24 @@ int __init bzimage_parse(char *image_bas
int err = bzimage_check(hdr, *image_len);
unsigned long output_len;
- if (err < 1)
+ if ( err < 0 )
return err;
+ if ( err > 0 )
+ {
+ *image_start += (hdr->setup_sects + 1) * 512 + hdr->payload_offset;
+ *image_len = hdr->payload_length;
+ }
+
+ if ( elf_is_elfbinary(*image_start) )
+ return 0;
+
BUG_ON(!(image_base < *image_start));
- *image_start += (hdr->setup_sects+1) * 512;
- *image_start += hdr->payload_offset;
- *image_len = hdr->payload_length;
- output_len = output_length(*image_start, *image_len);
+ output_len = output_length(*image_start, orig_image_len);
- if ( (err = perform_gunzip(image_base, *image_start, *image_len)) > 0 )
- err = decompress(*image_start, *image_len, image_base);
+ if ( (err = perform_gunzip(image_base, *image_start, orig_image_len)) > 0 )
+ err = decompress(*image_start, orig_image_len, image_base);
if ( !err )
{
Index: xen-4.1.2-testing/xen/include/asm-x86/bzimage.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/bzimage.h
+++ xen-4.1.2-testing/xen/include/asm-x86/bzimage.h
@@ -4,10 +4,9 @@
#include <xen/config.h>
#include <xen/init.h>
-int __init bzimage_headroom(char *image_start, unsigned long image_length);
+unsigned long bzimage_headroom(char *image_start, unsigned long image_length);
-int __init bzimage_parse(char *image_base,
- char **image_start,
- unsigned long *image_len);
+int bzimage_parse(char *image_base, char **image_start,
+ unsigned long *image_len);
#endif /* __X86_BZIMAGE_H__ */
++++++ 23575-x86-DMI.patch ++++++
References: fate#311376, fate#311529, bnc#578927, bnc#628554
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1308825280 -3600
# Node ID 4d9598a6a7777c50e109d7e2eb6d1cb28bcb4509
# Parent d7644abc218d3232b9d957ce94fc4b4bcc1f456e
x86/DMI: use proper structures instead of byte offsets
Besides being (in my eyes) desirable cleanup, this at once represents
another prerequisite for native EFI booting support.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/dmi_scan.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/dmi_scan.c
+++ xen-4.1.2-testing/xen/arch/x86/dmi_scan.c
@@ -10,11 +10,31 @@
#include <asm/system.h>
#include <xen/dmi.h>
-#define bt_ioremap(b,l) ((u8 *)__acpi_map_table(b,l))
+#define bt_ioremap(b,l) ((void *)__acpi_map_table(b,l))
#define bt_iounmap(b,l) ((void)0)
#define memcpy_fromio memcpy
#define alloc_bootmem(l) xmalloc_bytes(l)
+struct dmi_eps {
+ char anchor[5]; /* "_DMI_" */
+ u8 checksum;
+ u16 size;
+ u32 address;
+ u16 num_structures;
+ u8 revision;
+} __attribute__((packed));
+
+struct smbios_eps {
+ char anchor[4]; /* "_SM_" */
+ u8 checksum;
+ u8 length;
+ u8 major, minor;
+ u16 max_size;
+ u8 revision;
+ u8 _rsrvd_[5];
+ struct dmi_eps dmi;
+} __attribute__((packed));
+
struct dmi_header
{
u8 type;
@@ -90,62 +110,70 @@ static int __init dmi_table(u32 base, in
}
-inline static int __init dmi_checksum(u8 *buf)
+static inline bool_t __init dmi_checksum(const void __iomem *buf,
+ unsigned int len)
{
- u8 sum=0;
- int a;
+ u8 sum = 0;
+ const u8 *p = buf;
+ unsigned int a;
- for(a=0; a<15; a++)
- sum+=buf[a];
- return (sum==0);
+ for (a = 0; a < len; a++)
+ sum += p[a];
+ return sum == 0;
}
int __init dmi_get_table(u32 *base, u32 *len)
{
- u8 buf[15];
+ struct dmi_eps eps;
char __iomem *p, *q;
p = maddr_to_virt(0xF0000);
for (q = p; q < p + 0x10000; q += 16) {
- memcpy_fromio(buf, q, 15);
- if (memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf)) {
- *base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
- *len=buf[7]<<8|buf[6];
+ memcpy_fromio(&eps, q, 15);
+ if (memcmp(eps.anchor, "_DMI_", 5) == 0 &&
+ dmi_checksum(&eps, sizeof(eps))) {
+ *base = eps.address;
+ *len = eps.size;
return 0;
}
}
return -1;
}
+static int __init _dmi_iterate(const struct dmi_eps *dmi,
+ const struct smbios_eps __iomem *smbios,
+ void (*decode)(struct dmi_header *))
+{
+ u16 num = dmi->num_structures;
+ u16 len = dmi->size;
+ u32 base = dmi->address;
+
+ /*
+ * DMI version 0.0 means that the real version is taken from
+ * the SMBIOS version, which we may not know at this point.
+ */
+ if (dmi->revision)
+ printk(KERN_INFO "DMI %d.%d present.\n",
+ dmi->revision >> 4, dmi->revision & 0x0f);
+ else if (!smbios)
+ printk(KERN_INFO "DMI present.\n");
+ dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
+ num, len));
+ dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base));
+ return dmi_table(base, len, num, decode);
+}
+
static int __init dmi_iterate(void (*decode)(struct dmi_header *))
{
- u8 buf[15];
+ struct dmi_eps eps;
char __iomem *p, *q;
p = maddr_to_virt(0xF0000);
for (q = p; q < p + 0x10000; q += 16) {
- memcpy_fromio(buf, q, 15);
- if (memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf)) {
- u16 num=buf[13]<<8|buf[12];
- u16 len=buf[7]<<8|buf[6];
- u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
-
- /*
- * DMI version 0.0 means that the real version is taken
from
- * the SMBIOS version, which we don't know at this
point.
- */
- if(buf[14]!=0)
- printk(KERN_INFO "DMI %d.%d present.\n",
- buf[14]>>4, buf[14]&0x0F);
- else
- printk(KERN_INFO "DMI present.\n");
- dmi_printk((KERN_INFO "%d structures occupying %d
bytes.\n",
- num, len));
- dmi_printk((KERN_INFO "DMI table at 0x%08X.\n",
- base));
- if(dmi_table(base,len, num, decode)==0)
- return 0;
- }
+ memcpy_fromio(&eps, q, sizeof(eps));
+ if (memcmp(eps.anchor, "_DMI_", 5) == 0 &&
+ dmi_checksum(&eps, sizeof(eps)))
+ return _dmi_iterate(&eps, NULL, decode);
}
return -1;
}
++++++ 23576-x86_show_page_walk_also_for_early_page_faults.patch ++++++
changeset: 23576:e2235fe267eb
user: Jan Beulich <jbeulich@xxxxxxxxxx>
date: Thu Jun 23 11:35:55 2011 +0100
files: xen/arch/x86/mm.c xen/arch/x86/traps.c xen/arch/x86/x86_32/mm.c
xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/traps.c
description:
x86: show page walk also for early page faults
At once, move the common (between 32- and 64-bit) definition of
machine_to_phys_mapping_valid to a common location.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
xen/arch/x86/mm.c | 2 ++
xen/arch/x86/traps.c | 1 +
xen/arch/x86/x86_32/mm.c | 2 --
xen/arch/x86/x86_32/traps.c | 9 ++++++---
xen/arch/x86/x86_64/mm.c | 2 --
xen/arch/x86/x86_64/traps.c | 12 ++++++++----
6 files changed, 17 insertions(+), 11 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -151,6 +151,8 @@ unsigned long __read_mostly pdx_group_va
(FRAMETABLE_SIZE / sizeof(*frame_table) + PDX_GROUP_COUNT - 1)
/ PDX_GROUP_COUNT)] = { [0] = 1 };
+bool_t __read_mostly machine_to_phys_mapping_valid = 0;
+
#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
bool_t __read_mostly opt_allow_superpage;
Index: xen-4.1.2-testing/xen/arch/x86/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/traps.c
@@ -1428,6 +1428,7 @@ asmlinkage void __init do_early_page_fau
unsigned long *stk = (unsigned long *)regs;
printk("Early fatal page fault at %04x:%p (cr2=%p, ec=%04x)\n",
regs->cs, _p(regs->eip), _p(cr2), regs->error_code);
+ show_page_walk(cr2);
printk("Stack dump: ");
while ( ((long)stk & ((PAGE_SIZE - 1) & ~(BYTES_PER_LONG - 1))) != 0 )
printk("%p ", _p(*stk++));
Index: xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_32/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
@@ -39,8 +39,6 @@ extern l1_pgentry_t l1_identmap[L1_PAGET
unsigned int __read_mostly PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
unsigned int __read_mostly PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
-bool_t __read_mostly machine_to_phys_mapping_valid = 0;
-
static unsigned long __read_mostly mpt_size;
void *alloc_xen_pagetable(void)
Index: xen-4.1.2-testing/xen/arch/x86/x86_32/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_32/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_32/traps.c
@@ -164,7 +164,8 @@ void show_page_walk(unsigned long addr)
l3t += (cr3 & 0xFE0UL) >> 3;
l3e = l3t[l3_table_offset(addr)];
mfn = l3e_get_pfn(l3e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L3[0x%03lx] = %"PRIpte" %08lx\n",
l3_table_offset(addr), l3e_get_intpte(l3e), pfn);
unmap_domain_page(l3t);
@@ -175,7 +176,8 @@ void show_page_walk(unsigned long addr)
l2t = map_domain_page(mfn);
l2e = l2t[l2_table_offset(addr)];
mfn = l2e_get_pfn(l2e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L2[0x%03lx] = %"PRIpte" %08lx %s\n",
l2_table_offset(addr), l2e_get_intpte(l2e), pfn,
(l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
@@ -188,7 +190,8 @@ void show_page_walk(unsigned long addr)
l1t = map_domain_page(mfn);
l1e = l1t[l1_table_offset(addr)];
mfn = l1e_get_pfn(l1e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L1[0x%03lx] = %"PRIpte" %08lx\n",
l1_table_offset(addr), l1e_get_intpte(l1e), pfn);
unmap_domain_page(l1t);
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
@@ -47,8 +47,6 @@ unsigned int __read_mostly pfn_pdx_hole_
unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
-bool_t __read_mostly machine_to_phys_mapping_valid = 0;
-
/* Top-level master (and idle-domain) page directory. */
l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
idle_pg_table[L4_PAGETABLE_ENTRIES];
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/traps.c
@@ -176,7 +176,8 @@ void show_page_walk(unsigned long addr)
l4t = mfn_to_virt(mfn);
l4e = l4t[l4_table_offset(addr)];
mfn = l4e_get_pfn(l4e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L4[0x%03lx] = %"PRIpte" %016lx\n",
l4_table_offset(addr), l4e_get_intpte(l4e), pfn);
if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ||
@@ -186,7 +187,8 @@ void show_page_walk(unsigned long addr)
l3t = mfn_to_virt(mfn);
l3e = l3t[l3_table_offset(addr)];
mfn = l3e_get_pfn(l3e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n",
l3_table_offset(addr), l3e_get_intpte(l3e), pfn,
(l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : "");
@@ -198,7 +200,8 @@ void show_page_walk(unsigned long addr)
l2t = mfn_to_virt(mfn);
l2e = l2t[l2_table_offset(addr)];
mfn = l2e_get_pfn(l2e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L2[0x%03lx] = %"PRIpte" %016lx %s\n",
l2_table_offset(addr), l2e_get_intpte(l2e), pfn,
(l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
@@ -210,7 +213,8 @@ void show_page_walk(unsigned long addr)
l1t = mfn_to_virt(mfn);
l1e = l1t[l1_table_offset(addr)];
mfn = l1e_get_pfn(l1e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L1[0x%03lx] = %"PRIpte" %016lx\n",
l1_table_offset(addr), l1e_get_intpte(l1e), pfn);
}
++++++ 23577-tools_merge_several_bitop_functions_into_xc_bitops.h.patch ++++++
++++ 1023 lines (skipped)
++++++ 23578-xenpaging_add_xs_handle_to_struct_xenpaging.patch ++++++
changeset: 23578:7299a9a44b35
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Jun 22 14:47:09 2011 +0100
files: tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: add xs_handle to struct xenpaging
A xs_handle is currently used in the xc_mem_paging_flush_ioemu_cache()
function and will be used by a subsequent patch.
Add it to struct xenpaging.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
---
tools/xenpaging/xenpaging.c | 14 ++++++++++++++
tools/xenpaging/xenpaging.h | 1 +
2 files changed, 15 insertions(+)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -28,6 +28,7 @@
#include <signal.h>
#include <unistd.h>
#include <xc_private.h>
+#include <xs.h>
#include <xen/mem_event.h>
@@ -92,6 +93,14 @@ static xenpaging_t *xenpaging_init(domid
paging = malloc(sizeof(xenpaging_t));
memset(paging, 0, sizeof(xenpaging_t));
+ /* Open connection to xenstore */
+ paging->xs_handle = xs_open(0);
+ if ( paging->xs_handle == NULL )
+ {
+ ERROR("Error initialising xenstore connection");
+ goto err;
+ }
+
p = getenv("XENPAGING_POLICY_MRU_SIZE");
if ( p && *p )
{
@@ -221,6 +230,8 @@ static xenpaging_t *xenpaging_init(domid
err:
if ( paging )
{
+ if ( paging->xs_handle )
+ xs_close(paging->xs_handle);
xc_interface_close(xch);
if ( paging->mem_event.shared_page )
{
@@ -277,6 +288,9 @@ static int xenpaging_teardown(xenpaging_
}
paging->mem_event.xce_handle = NULL;
+ /* Close connection to xenstore */
+ xs_close(paging->xs_handle);
+
/* Close connection to Xen */
rc = xc_interface_close(xch);
if ( rc != 0 )
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -36,6 +36,7 @@
typedef struct xenpaging {
xc_interface *xc_handle;
+ struct xs_handle *xs_handle;
xc_platform_info_t *platform_info;
xc_domaininfo_t *domain_info;
++++++ 23579-xenpaging_drop_xc.c_remove_ASSERT.patch ++++++
changeset: 23579:868c8c898f73
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:06 2011 +0200
files: tools/xenpaging/policy_default.c tools/xenpaging/xc.h
description:
xenpaging: drop xc.c, remove ASSERT
The ASSERT is not needed, victim is never NULL.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy_default.c | 1 -
tools/xenpaging/xc.h | 7 -------
2 files changed, 8 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -78,7 +78,6 @@ int policy_choose_victim(xenpaging_t *pa
{
xc_interface *xch = paging->xc_handle;
unsigned long wrap = current_gfn;
- ASSERT(victim != NULL);
do
{
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ xen-4.1.2-testing/tools/xenpaging/xc.h
@@ -30,13 +30,6 @@
#include <xen/mem_event.h>
-#if 1
-#define ASSERT(_p) \
- if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
- __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
++++++ 23580-xenpaging_drop_xc.c_remove_xc_platform_info_t.patch ++++++
changeset: 23580:771b6984aa2a
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:07 2011 +0200
files: tools/xenpaging/xc.c tools/xenpaging/xc.h
tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: drop xc.c, remove xc_platform_info_t
xc_platform_info_t is not used in xenpaging.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xc.c | 10 ----------
tools/xenpaging/xc.h | 8 --------
tools/xenpaging/xenpaging.c | 17 -----------------
tools/xenpaging/xenpaging.h | 1 -
4 files changed, 36 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.c
+++ xen-4.1.2-testing/tools/xenpaging/xc.c
@@ -26,7 +26,6 @@
#include <stdarg.h>
#include <sys/poll.h>
#include <xc_private.h>
-#include <xg_save_restore.h>
#include <xs.h>
#include "xc.h"
@@ -97,15 +96,6 @@ int xc_wait_for_event(xc_interface *xch,
return xc_wait_for_event_or_timeout(xch, xce, -1);
}
-int xc_get_platform_info(xc_interface *xc_handle, domid_t domain_id,
- xc_platform_info_t *platform_info)
-{
- return get_platform_info(xc_handle, domain_id,
- &platform_info->max_mfn,
- &platform_info->hvirt_start,
- &platform_info->pt_levels,
- &platform_info->guest_width);
-}
/*
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ xen-4.1.2-testing/tools/xenpaging/xc.h
@@ -34,12 +34,6 @@
-typedef struct xc_platform_info {
- unsigned long max_mfn;
- unsigned long hvirt_start;
- unsigned int pt_levels;
- unsigned int guest_width;
-} xc_platform_info_t;
@@ -47,8 +41,6 @@ int xc_mem_paging_flush_ioemu_cache(domi
int xc_wait_for_event(xc_interface *xch, xc_evtchn *xce);
int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms);
-int xc_get_platform_info(xc_interface *xc_handle, domid_t domain_id,
- xc_platform_info_t *platform_info);
#endif // __XC_H__
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -176,22 +176,6 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.port = rc;
- /* Get platform info */
- paging->platform_info = malloc(sizeof(xc_platform_info_t));
- if ( paging->platform_info == NULL )
- {
- ERROR("Error allocating memory for platform info");
- goto err;
- }
-
- rc = xc_get_platform_info(xch, paging->mem_event.domain_id,
- paging->platform_info);
- if ( rc != 1 )
- {
- ERROR("Error getting platform info");
- goto err;
- }
-
/* Get domaininfo */
paging->domain_info = malloc(sizeof(xc_domaininfo_t));
if ( paging->domain_info == NULL )
@@ -246,7 +230,6 @@ static xenpaging_t *xenpaging_init(domid
}
free(paging->bitmap);
- free(paging->platform_info);
free(paging->domain_info);
free(paging);
}
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -38,7 +38,6 @@ typedef struct xenpaging {
xc_interface *xc_handle;
struct xs_handle *xs_handle;
- xc_platform_info_t *platform_info;
xc_domaininfo_t *domain_info;
unsigned long *bitmap;
++++++ 23581-xenpaging_drop_xc.c_remove_xc_wait_for_event.patch ++++++
changeset: 23581:9ce56626a5ab
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:08 2011 +0200
files: tools/xenpaging/xc.c tools/xenpaging/xc.h
description:
xenpaging: drop xc.c, remove xc_wait_for_event
xc_wait_for_event is not used in xenpaging.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xc.c | 4 ----
tools/xenpaging/xc.h | 1 -
2 files changed, 5 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.c
+++ xen-4.1.2-testing/tools/xenpaging/xc.c
@@ -91,10 +91,6 @@ int xc_wait_for_event_or_timeout(xc_inte
return -errno;
}
-int xc_wait_for_event(xc_interface *xch, xc_evtchn *xce)
-{
- return xc_wait_for_event_or_timeout(xch, xce, -1);
-}
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ xen-4.1.2-testing/tools/xenpaging/xc.h
@@ -38,7 +38,6 @@
int xc_mem_paging_flush_ioemu_cache(domid_t domain_id);
-int xc_wait_for_event(xc_interface *xch, xc_evtchn *xce);
int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms);
++++++ 23582-xenpaging_drop_xc.c_move_xc_mem_paging_flush_ioemu_cache.patch
++++++
changeset: 23582:480e548fe76b
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:10 2011 +0200
files: tools/xenpaging/xc.c tools/xenpaging/xc.h
tools/xenpaging/xenpaging.c
description:
xenpaging: drop xc.c, move xc_mem_paging_flush_ioemu_cache
Move xc_mem_paging_flush_ioemu_cache() into xenpaging and massage it a bit to
use the required members from xenpaging_t.
Also update type of rc to match xs_write() return value.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xc.c | 18 ------------------
tools/xenpaging/xc.h | 1 -
tools/xenpaging/xenpaging.c | 16 +++++++++++++++-
3 files changed, 15 insertions(+), 20 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.c
+++ xen-4.1.2-testing/tools/xenpaging/xc.c
@@ -31,24 +31,6 @@
-int xc_mem_paging_flush_ioemu_cache(domid_t domain_id)
-{
- struct xs_handle *xsh = NULL;
- char path[80];
- int rc;
-
- sprintf(path, "/local/domain/0/device-model/%u/command", domain_id);
-
- xsh = xs_daemon_open();
- if ( xsh == NULL )
- return -EIO;
-
- rc = xs_write(xsh, XBT_NULL, path, "flush-cache", strlen("flush-cache"));
-
- xs_daemon_close(xsh);
-
- return rc ? 0 : -1;
-}
int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms)
{
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ xen-4.1.2-testing/tools/xenpaging/xc.h
@@ -37,7 +37,6 @@
-int xc_mem_paging_flush_ioemu_cache(domid_t domain_id);
int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms);
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -48,6 +48,20 @@ static void close_handler(int sig)
unlink(filename);
}
+static int xenpaging_mem_paging_flush_ioemu_cache(xenpaging_t *paging)
+{
+ struct xs_handle *xsh = paging->xs_handle;
+ domid_t domain_id = paging->mem_event.domain_id;
+ char path[80];
+ bool rc;
+
+ sprintf(path, "/local/domain/0/device-model/%u/command", domain_id);
+
+ rc = xs_write(xsh, XBT_NULL, path, "flush-cache", strlen("flush-cache"));
+
+ return rc == true ? 0 : -1;
+}
+
static void *init_page(void)
{
void *buffer;
@@ -484,7 +498,7 @@ static int evict_victim(xenpaging_t *pag
else
{
if ( j++ % 1000 == 0 )
- if (
xc_mem_paging_flush_ioemu_cache(paging->mem_event.domain_id) )
+ if ( xenpaging_mem_paging_flush_ioemu_cache(paging) )
ERROR("Error flushing ioemu cache");
}
}
++++++ 23583-xenpaging_drop_xc.c_move_xc_wait_for_event_or_timeout.patch ++++++
changeset: 23583:235d8fdcb3a9
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:11 2011 +0200
files: tools/xenpaging/xc.c tools/xenpaging/xc.h
tools/xenpaging/xenpaging.c
description:
xenpaging: drop xc.c, move xc_wait_for_event_or_timeout
Move xc_wait_for_event_or_timeout() into xenpaging and massage it a bit for
further changes in subsequent patches.
Include poll.h instead of sys/poll.h.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xc.c | 40 ------------------------------------
tools/xenpaging/xc.h | 1
tools/xenpaging/xenpaging.c | 48 +++++++++++++++++++++++++++++++++++++++++---
3 files changed, 45 insertions(+), 44 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.c
+++ xen-4.1.2-testing/tools/xenpaging/xc.c
@@ -32,46 +32,6 @@
-int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms)
-{
- struct pollfd fd = { .fd = xc_evtchn_fd(xce), .events = POLLIN | POLLERR };
- int port;
- int rc;
-
- rc = poll(&fd, 1, ms);
- if ( rc == -1 )
- {
- if (errno == EINTR)
- return 0;
-
- ERROR("Poll exited with an error");
- goto err;
- }
-
- if ( rc == 1 )
- {
- port = xc_evtchn_pending(xce);
- if ( port == -1 )
- {
- ERROR("Failed to read port from event channel");
- goto err;
- }
-
- rc = xc_evtchn_unmask(xce, port);
- if ( rc != 0 )
- {
- ERROR("Failed to unmask event channel port");
- goto err;
- }
- }
- else
- port = -1;
-
- return port;
-
- err:
- return -errno;
-}
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ xen-4.1.2-testing/tools/xenpaging/xc.h
@@ -37,7 +37,6 @@
-int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms);
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -27,6 +27,7 @@
#include <time.h>
#include <signal.h>
#include <unistd.h>
+#include <poll.h>
#include <xc_private.h>
#include <xs.h>
@@ -62,6 +63,47 @@ static int xenpaging_mem_paging_flush_io
return rc == true ? 0 : -1;
}
+static int xenpaging_wait_for_event_or_timeout(xenpaging_t *paging)
+{
+ xc_interface *xch = paging->xc_handle;
+ xc_evtchn *xce = paging->mem_event.xce_handle;
+ struct pollfd fd[1];
+ int port;
+ int rc;
+
+ fd[0].fd = xc_evtchn_fd(xce);
+ fd[0].events = POLLIN | POLLERR;
+ rc = poll(fd, 1, 100);
+ if ( rc < 0 )
+ {
+ if (errno == EINTR)
+ return 0;
+
+ ERROR("Poll exited with an error");
+ return -errno;
+ }
+
+ if ( rc && fd[0].revents & POLLIN )
+ {
+ DPRINTF("Got event from evtchn\n");
+ port = xc_evtchn_pending(xce);
+ if ( port == -1 )
+ {
+ ERROR("Failed to read port from event channel");
+ rc = -1;
+ goto err;
+ }
+
+ rc = xc_evtchn_unmask(xce, port);
+ if ( rc < 0 )
+ {
+ ERROR("Failed to unmask event channel port");
+ }
+ }
+err:
+ return rc;
+}
+
static void *init_page(void)
{
void *buffer;
@@ -598,13 +640,13 @@ int main(int argc, char *argv[])
while ( !interrupted )
{
/* Wait for Xen to signal that a page needs paged in */
- rc = xc_wait_for_event_or_timeout(xch, paging->mem_event.xce_handle,
100);
- if ( rc < -1 )
+ rc = xenpaging_wait_for_event_or_timeout(paging);
+ if ( rc < 0 )
{
ERROR("Error getting event");
goto out;
}
- else if ( rc != -1 )
+ else if ( rc != 0 )
{
DPRINTF("Got event from Xen\n");
}
++++++ 23584-xenpaging_drop_xc.c_remove_xc_files.patch ++++++
changeset: 23584:e30cff57b146
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:12 2011 +0200
files: tools/xenpaging/Makefile tools/xenpaging/mem_event.h
tools/xenpaging/xc.c tools/xenpaging/xc.h tools/xenpaging/xenpaging.c
tools/xenpaging/xenpaging.h
description:
xenpaging: drop xc.c, remove xc files
Finally remove xc.c/xc.h and its references since both are empty now.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/Makefile | 2 -
tools/xenpaging/mem_event.h | 1
tools/xenpaging/xc.c | 47 --------------------------------------
tools/xenpaging/xc.h | 54 --------------------------------------------
tools/xenpaging/xenpaging.c | 1
tools/xenpaging/xenpaging.h | 1
6 files changed, 1 insertion(+), 105 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/Makefile
+++ xen-4.1.2-testing/tools/xenpaging/Makefile
@@ -9,7 +9,7 @@ LDLIBS += $(LDLIBS_libxenctrl) $(LDLIBS
POLICY = default
SRC :=
-SRCS += file_ops.c xc.c xenpaging.c policy_$(POLICY).c
+SRCS += file_ops.c xenpaging.c policy_$(POLICY).c
CFLAGS += -Werror
CFLAGS += -Wno-unused
Index: xen-4.1.2-testing/tools/xenpaging/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/mem_event.h
+++ xen-4.1.2-testing/tools/xenpaging/mem_event.h
@@ -25,7 +25,6 @@
#define __XEN_MEM_EVENT_H__
-#include "xc.h"
#include <xc_private.h>
#include <xen/event_channel.h>
Index: xen-4.1.2-testing/tools/xenpaging/xc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/******************************************************************************
- * tools/xenpaging/lib/xc.c
- *
- * libxc-type add-ons for paging support.
- *
- * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#include <errno.h>
-#include <string.h>
-#include <stdarg.h>
-#include <sys/poll.h>
-#include <xc_private.h>
-#include <xs.h>
-#include "xc.h"
-
-
-
-
-
-
-
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/******************************************************************************
- * tools/xenpaging/lib/xc.h
- *
- * libxc add-ons.
- *
- * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#ifndef __XC_H__
-#define __XC_H__
-
-
-#include <stdarg.h>
-#include <xc_private.h>
-#include <xen/mem_event.h>
-
-
-
-
-
-
-
-
-
-
-
-
-#endif // __XC_H__
-
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -35,7 +35,6 @@
#include "xc_bitops.h"
#include "file_ops.h"
-#include "xc.h"
#include "policy.h"
#include "xenpaging.h"
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -25,7 +25,6 @@
#define __XEN_PAGING2_H__
-#include "xc.h"
#include <xc_private.h>
#include <xen/event_channel.h>
++++++
23585-xenpaging_correct_dropping_of_pages_to_avoid_full_ring_buffer.patch ++++++
changeset: 23585:b4d18ac00a46
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:14 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: correct dropping of pages to avoid full ring buffer
Doing a one-way channel from Xen to xenpaging is not possible with the
current ring buffer implementation. xenpaging uses the mem_event ring
buffer, which expects request/response pairs to make progress. The
previous patch, which tried to establish a one-way communication from
Xen to xenpaging, stalled the guest once the buffer was filled up with
requests. Correct page-dropping by taking the slow path and let
p2m_mem_paging_resume() consume the response from xenpaging. This makes
room for yet another request/response pair and avoids hanging guests.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -690,19 +690,19 @@ int main(int argc, char *argv[])
ERROR("Error populating page");
goto out;
}
+ }
- /* Prepare the response */
- rsp.gfn = req.gfn;
- rsp.p2mt = req.p2mt;
- rsp.vcpu_id = req.vcpu_id;
- rsp.flags = req.flags;
+ /* Prepare the response */
+ rsp.gfn = req.gfn;
+ rsp.p2mt = req.p2mt;
+ rsp.vcpu_id = req.vcpu_id;
+ rsp.flags = req.flags;
- rc = xenpaging_resume_page(paging, &rsp, 1);
- if ( rc != 0 )
- {
- ERROR("Error resuming page");
- goto out;
- }
+ rc = xenpaging_resume_page(paging, &rsp, 1);
+ if ( rc != 0 )
+ {
+ ERROR("Error resuming page");
+ goto out;
}
/* Evict a new page to replace the one we just paged in */
++++++ 23586-xenpaging_do_not_bounce_p2mt_back_to_the_hypervisor.patch ++++++
changeset: 23586:bbdd7413a50a
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Jun 22 14:47:13 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: do not bounce p2mt back to the hypervisor
do not bounce p2mt back to the hypervisor because p2m_mem_paging_populate()
and p2m_mem_paging_resume() dont make use of p2mt.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 4 ----
1 file changed, 4 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -694,7 +694,6 @@ int main(int argc, char *argv[])
/* Prepare the response */
rsp.gfn = req.gfn;
- rsp.p2mt = req.p2mt;
rsp.vcpu_id = req.vcpu_id;
rsp.flags = req.flags;
@@ -711,10 +710,8 @@ int main(int argc, char *argv[])
else
{
DPRINTF("page already populated (domain = %d; vcpu = %d;"
- " p2mt = %x;"
" gfn = %"PRIx64"; paused = %d)\n",
paging->mem_event.domain_id, req.vcpu_id,
- req.p2mt,
req.gfn, req.flags & MEM_EVENT_FLAG_VCPU_PAUSED);
/* Tell Xen to resume the vcpu */
@@ -723,7 +720,6 @@ int main(int argc, char *argv[])
{
/* Prepare the response */
rsp.gfn = req.gfn;
- rsp.p2mt = req.p2mt;
rsp.vcpu_id = req.vcpu_id;
rsp.flags = req.flags;
++++++ 23587-xenpaging_remove_srand_call.patch ++++++
changeset: 23587:926febc8bd98
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:16 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: remove srand call
The policy uses now a linear algorithm instead of a random one.
Remove the call to srand().
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 3 ---
1 file changed, 3 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -581,9 +581,6 @@ int main(int argc, char *argv[])
domain_id = atoi(argv[1]);
num_pages = atoi(argv[2]);
- /* Seed random-number generator */
- srand(time(NULL));
-
/* Initialise domain paging */
paging = xenpaging_init(domain_id);
if ( paging == NULL )
++++++
23588-xenpaging_remove_return_values_from_functions_that_can_not_fail.patch
++++++
changeset: 23588:e48535e70145
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:18 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: remove return values from functions that can not fail
get_request() and put_response() can not fail, remove return value
and update calling functions.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 19 ++++---------------
1 file changed, 4 insertions(+), 15 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -342,7 +342,7 @@ static int xenpaging_teardown(xenpaging_
return -1;
}
-static int get_request(mem_event_t *mem_event, mem_event_request_t *req)
+static void get_request(mem_event_t *mem_event, mem_event_request_t *req)
{
mem_event_back_ring_t *back_ring;
RING_IDX req_cons;
@@ -357,11 +357,9 @@ static int get_request(mem_event_t *mem_
/* Update ring */
back_ring->req_cons = req_cons;
back_ring->sring->req_event = req_cons + 1;
-
- return 0;
}
-static int put_response(mem_event_t *mem_event, mem_event_response_t *rsp)
+static void put_response(mem_event_t *mem_event, mem_event_response_t *rsp)
{
mem_event_back_ring_t *back_ring;
RING_IDX rsp_prod;
@@ -376,8 +374,6 @@ static int put_response(mem_event_t *mem
/* Update ring */
back_ring->rsp_prod_pvt = rsp_prod;
RING_PUSH_RESPONSES(back_ring);
-
- return 0;
}
static int xenpaging_evict_page(xenpaging_t *paging,
@@ -437,9 +433,7 @@ static int xenpaging_resume_page(xenpagi
int ret;
/* Put the page info on the ring */
- ret = put_response(&paging->mem_event, rsp);
- if ( ret != 0 )
- goto out;
+ put_response(&paging->mem_event, rsp);
/* Notify policy of page being paged in */
if ( notify_policy )
@@ -649,12 +643,7 @@ int main(int argc, char *argv[])
while ( RING_HAS_UNCONSUMED_REQUESTS(&paging->mem_event.back_ring) )
{
- rc = get_request(&paging->mem_event, &req);
- if ( rc != 0 )
- {
- ERROR("Error getting request");
- goto out;
- }
+ get_request(&paging->mem_event, &req);
/* Check if the page has already been paged in */
if ( test_and_clear_bit(req.gfn, paging->bitmap) )
++++++ 23589-xenpaging_catch_xc_mem_paging_resume_errors.patch ++++++
changeset: 23589:49cb290ede16
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:19 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: catch xc_mem_paging_resume errors
In the unlikely event that xc_mem_paging_resume() fails, do not overwrite the
error with the return value from xc_evtchn_notify()
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -442,8 +442,9 @@ static int xenpaging_resume_page(xenpagi
/* Tell Xen page is ready */
ret = xc_mem_paging_resume(paging->xc_handle, paging->mem_event.domain_id,
rsp->gfn);
- ret = xc_evtchn_notify(paging->mem_event.xce_handle,
- paging->mem_event.port);
+ if ( ret == 0 )
+ ret = xc_evtchn_notify(paging->mem_event.xce_handle,
+ paging->mem_event.port);
out:
return ret;
++++++ 23590-xenpaging_remove_local_domain_id_variable.patch ++++++
changeset: 23590:d957acb8bee6
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:20 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: remove local domain_id variable
Remove the local domain_id variable, it is already fetched from
paging->mem_event in other places.
Update the sprintf format string to use unsigned argument.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -552,7 +552,6 @@ static int evict_victim(xenpaging_t *pag
int main(int argc, char *argv[])
{
struct sigaction act;
- domid_t domain_id;
int num_pages;
xenpaging_t *paging;
xenpaging_victim_t *victims;
@@ -573,11 +572,10 @@ int main(int argc, char *argv[])
return -1;
}
- domain_id = atoi(argv[1]);
num_pages = atoi(argv[2]);
/* Initialise domain paging */
- paging = xenpaging_init(domain_id);
+ paging = xenpaging_init(atoi(argv[1]));
if ( paging == NULL )
{
fprintf(stderr, "Error initialising paging");
@@ -585,10 +583,10 @@ int main(int argc, char *argv[])
}
xch = paging->xc_handle;
- DPRINTF("starting %s %u %d\n", argv[0], domain_id, num_pages);
+ DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
num_pages);
/* Open file */
- sprintf(filename, "page_cache_%d", domain_id);
+ sprintf(filename, "page_cache_%u", paging->mem_event.domain_id);
fd = open(filename, open_flags, open_mode);
if ( fd < 0 )
{
++++++ 23591-xenpaging_move_num_pages_into_xenpaging_struct.patch ++++++
changeset: 23591:4aaa90c1db42
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:22 2011 +0200
files: tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: move num_pages into xenpaging struct
Move num_pages into struct xenpaging.
num_pages will be used by the policy in a subsequent patch.
Also remove a memset, the victims array is allocated with calloc.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 30 ++++++++++++++----------------
tools/xenpaging/xenpaging.h | 1 +
2 files changed, 15 insertions(+), 16 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -128,7 +128,7 @@ static void *init_page(void)
return NULL;
}
-static xenpaging_t *xenpaging_init(domid_t domain_id)
+static xenpaging_t *xenpaging_init(domid_t domain_id, int num_pages)
{
xenpaging_t *paging;
xc_interface *xch;
@@ -256,6 +256,13 @@ static xenpaging_t *xenpaging_init(domid
}
DPRINTF("max_pages = %"PRIx64"\n", paging->domain_info->max_pages);
+ if ( num_pages < 0 || num_pages > paging->domain_info->max_pages )
+ {
+ num_pages = paging->domain_info->max_pages;
+ DPRINTF("setting num_pages to %d\n", num_pages);
+ }
+ paging->num_pages = num_pages;
+
/* Initialise policy */
rc = policy_init(paging);
if ( rc != 0 )
@@ -552,7 +559,6 @@ static int evict_victim(xenpaging_t *pag
int main(int argc, char *argv[])
{
struct sigaction act;
- int num_pages;
xenpaging_t *paging;
xenpaging_victim_t *victims;
mem_event_request_t req;
@@ -572,10 +578,8 @@ int main(int argc, char *argv[])
return -1;
}
- num_pages = atoi(argv[2]);
-
/* Initialise domain paging */
- paging = xenpaging_init(atoi(argv[1]));
+ paging = xenpaging_init(atoi(argv[1]), atoi(argv[2]));
if ( paging == NULL )
{
fprintf(stderr, "Error initialising paging");
@@ -583,7 +587,7 @@ int main(int argc, char *argv[])
}
xch = paging->xc_handle;
- DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
num_pages);
+ DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
paging->num_pages);
/* Open file */
sprintf(filename, "page_cache_%u", paging->mem_event.domain_id);
@@ -594,12 +598,7 @@ int main(int argc, char *argv[])
return 2;
}
- if ( num_pages < 0 || num_pages > paging->domain_info->max_pages )
- {
- num_pages = paging->domain_info->max_pages;
- DPRINTF("setting num_pages to %d\n", num_pages);
- }
- victims = calloc(num_pages, sizeof(xenpaging_victim_t));
+ victims = calloc(paging->num_pages, sizeof(xenpaging_victim_t));
/* ensure that if we get a signal, we'll do cleanup, then exit */
act.sa_handler = close_handler;
@@ -611,8 +610,7 @@ int main(int argc, char *argv[])
sigaction(SIGALRM, &act, NULL);
/* Evict pages */
- memset(victims, 0, sizeof(xenpaging_victim_t) * num_pages);
- for ( i = 0; i < num_pages; i++ )
+ for ( i = 0; i < paging->num_pages; i++ )
{
rc = evict_victim(paging, &victims[i], fd, i);
if ( rc == -ENOSPC )
@@ -648,13 +646,13 @@ int main(int argc, char *argv[])
if ( test_and_clear_bit(req.gfn, paging->bitmap) )
{
/* Find where in the paging file to read from */
- for ( i = 0; i < num_pages; i++ )
+ for ( i = 0; i < paging->num_pages; i++ )
{
if ( victims[i].gfn == req.gfn )
break;
}
- if ( i >= num_pages )
+ if ( i >= paging->num_pages )
{
DPRINTF("Couldn't find page %"PRIx64"\n", req.gfn);
goto out;
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -42,6 +42,7 @@ typedef struct xenpaging {
unsigned long *bitmap;
mem_event_t mem_event;
+ int num_pages;
int policy_mru_size;
} xenpaging_t;
++++++ 23592-xenpaging_start_paging_in_the_middle_of_gfn_range.patch ++++++
changeset: 23592:1e44e75d889c
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:23 2011 +0200
files: tools/xenpaging/policy_default.c
description:
xenpaging: start paging in the middle of gfn range
Set the starting gfn to somewhere in the middle of the gfn range to
avoid paging during BIOS startup. This can speedup booting of a guest.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy_default.c | 4 ++++
1 file changed, 4 insertions(+)
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -69,6 +69,10 @@ int policy_init(xenpaging_t *paging)
/* Don't page out page 0 */
set_bit(0, bitmap);
+ /* Start in the middle to avoid paging during BIOS startup */
+ current_gfn = max_pages / 2;
+ current_gfn -= paging->num_pages / 2;
+
rc = 0;
out:
return rc;
++++++ 23593-xenpaging_pass_integer_to_xenpaging_populate_page.patch ++++++
changeset: 23593:7d72475641fa
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:24 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: pass integer to xenpaging_populate_page
Pass gfn as integer to xenpaging_populate_page(). xc_map_foreign_pages()
takes a pointer to a list of gfns, but its a const pointer. So writing
the value back to the caller is not needed.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -458,27 +458,24 @@ static int xenpaging_resume_page(xenpagi
}
static int xenpaging_populate_page(xenpaging_t *paging,
- uint64_t *gfn, int fd, int i)
+ xen_pfn_t gfn, int fd, int i)
{
xc_interface *xch = paging->xc_handle;
- unsigned long _gfn;
void *page;
int ret;
unsigned char oom = 0;
- _gfn = *gfn;
- DPRINTF("populate_page < gfn %lx pageslot %d\n", _gfn, i);
+ DPRINTF("populate_page < gfn %"PRI_xen_pfn" pageslot %d\n", gfn, i);
do
{
/* Tell Xen to allocate a page for the domain */
- ret = xc_mem_paging_prep(xch, paging->mem_event.domain_id,
- _gfn);
+ ret = xc_mem_paging_prep(xch, paging->mem_event.domain_id, gfn);
if ( ret != 0 )
{
if ( errno == ENOMEM )
{
if ( oom++ == 0 )
- DPRINTF("ENOMEM while preparing gfn %lx\n", _gfn);
+ DPRINTF("ENOMEM while preparing gfn %"PRI_xen_pfn"\n",
gfn);
sleep(1);
continue;
}
@@ -491,8 +488,7 @@ static int xenpaging_populate_page(xenpa
/* Map page */
ret = -EFAULT;
page = xc_map_foreign_pages(xch, paging->mem_event.domain_id,
- PROT_READ | PROT_WRITE, &_gfn, 1);
- *gfn = _gfn;
+ PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
ERROR("Error mapping page: page is null");
@@ -667,7 +663,7 @@ int main(int argc, char *argv[])
else
{
/* Populate the page */
- rc = xenpaging_populate_page(paging, &req.gfn, fd, i);
+ rc = xenpaging_populate_page(paging, req.gfn, fd, i);
if ( rc != 0 )
{
ERROR("Error populating page");
++++++ 23594-xenpaging_add_helper_function_for_unlinking_pagefile.patch ++++++
changeset: 23594:2fe46305a00d
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:25 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: add helper function for unlinking pagefile
Unlink pagefile in the signal handler and also in the exit path.
This does not leave a stale pagefile if an error occoured.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -41,11 +41,20 @@
static char filename[80];
static int interrupted;
-static void close_handler(int sig)
+
+static void unlink_pagefile(void)
{
- interrupted = sig;
if ( filename[0] )
+ {
unlink(filename);
+ filename[0] = '\0';
+ }
+}
+
+static void close_handler(int sig)
+{
+ interrupted = sig;
+ unlink_pagefile();
}
static int xenpaging_mem_paging_flush_ioemu_cache(xenpaging_t *paging)
@@ -716,6 +725,7 @@ int main(int argc, char *argv[])
out:
close(fd);
+ unlink_pagefile();
free(victims);
/* Tear down domain paging */
++++++ 23595-xenpaging_add_watch_thread_to_catch_guest_shutdown.patch ++++++
changeset: 23595:389c8bf31688
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:27 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: add watch thread to catch guest shutdown
If xenpaging is started manually then no event is sent to xenpaging when
the guest is shutdown or rebooted. Add a watch on the @releaseDomain
node to leave the loop and gracefully shutdown the pager.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 40 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 38 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -39,6 +39,7 @@
#include "policy.h"
#include "xenpaging.h"
+static char watch_token[16];
static char filename[80];
static int interrupted;
@@ -75,13 +76,19 @@ static int xenpaging_wait_for_event_or_t
{
xc_interface *xch = paging->xc_handle;
xc_evtchn *xce = paging->mem_event.xce_handle;
- struct pollfd fd[1];
+ char **vec;
+ unsigned int num;
+ struct pollfd fd[2];
int port;
int rc;
+ /* Wait for event channel and xenstore */
fd[0].fd = xc_evtchn_fd(xce);
fd[0].events = POLLIN | POLLERR;
- rc = poll(fd, 1, 100);
+ fd[1].fd = xs_fileno(paging->xs_handle);
+ fd[1].events = POLLIN | POLLERR;
+
+ rc = poll(fd, 2, 100);
if ( rc < 0 )
{
if (errno == EINTR)
@@ -91,6 +98,27 @@ static int xenpaging_wait_for_event_or_t
return -errno;
}
+ /* First check for guest shutdown */
+ if ( rc && fd[1].revents & POLLIN )
+ {
+ DPRINTF("Got event from xenstore\n");
+ vec = xs_read_watch(paging->xs_handle, &num);
+ if ( vec )
+ {
+ if ( strcmp(vec[XS_WATCH_TOKEN], watch_token) == 0 )
+ {
+ /* If our guest disappeared, set interrupt flag and fall
through */
+ if ( xs_is_domain_introduced(paging->xs_handle,
paging->mem_event.domain_id) == false )
+ {
+ xs_unwatch(paging->xs_handle, "@releaseDomain",
watch_token);
+ interrupted = SIGQUIT;
+ rc = 0;
+ }
+ }
+ free(vec);
+ }
+ }
+
if ( rc && fd[0].revents & POLLIN )
{
DPRINTF("Got event from evtchn\n");
@@ -165,6 +193,14 @@ static xenpaging_t *xenpaging_init(domid
goto err;
}
+ /* write domain ID to watch so we can ignore other domain shutdowns */
+ snprintf(watch_token, sizeof(watch_token), "%u", domain_id);
+ if ( xs_watch(paging->xs_handle, "@releaseDomain", watch_token) == false )
+ {
+ ERROR("Could not bind to shutdown watch\n");
+ goto err;
+ }
+
p = getenv("XENPAGING_POLICY_MRU_SIZE");
if ( p && *p )
{
++++++
23596-xenpaging_implement_stopping_of_pager_by_sending_SIGTERM-SIGINT.patch
++++++
changeset: 23596:c49e22648d0e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:28 2011 +0200
files: tools/xenpaging/Makefile tools/xenpaging/pagein.c
tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: implement stopping of pager by sending SIGTERM/SIGINT
Write all paged-out pages back into the guest if the pager is
interrupted by ctrl-c or if it receives SIGTERM.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/Makefile | 1
tools/xenpaging/pagein.c | 68 ++++++++++++++++++++++++++++++++++++++++++++
tools/xenpaging/xenpaging.c | 35 ++++++++++++++++++++--
tools/xenpaging/xenpaging.h | 3 +
4 files changed, 104 insertions(+), 3 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/Makefile
+++ xen-4.1.2-testing/tools/xenpaging/Makefile
@@ -10,6 +10,7 @@ POLICY = default
SRC :=
SRCS += file_ops.c xenpaging.c policy_$(POLICY).c
+SRCS += pagein.c
CFLAGS += -Werror
CFLAGS += -Wno-unused
Index: xen-4.1.2-testing/tools/xenpaging/pagein.c
===================================================================
--- /dev/null
+++ xen-4.1.2-testing/tools/xenpaging/pagein.c
@@ -0,0 +1,68 @@
+/* Trigger a page-in in a separate thread-of-execution to avoid deadlock */
+#include <pthread.h>
+#include "xc_private.h"
+
+struct page_in_args {
+ domid_t dom;
+ xc_interface *xch;
+};
+
+static struct page_in_args page_in_args;
+static unsigned long page_in_gfn;
+static unsigned int page_in_possible;
+
+static pthread_t page_in_thread;
+static pthread_cond_t page_in_cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t page_in_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void *page_in(void *arg)
+{
+ struct page_in_args *pia = arg;
+ void *page;
+ xen_pfn_t gfn;
+
+ while (1)
+ {
+ pthread_mutex_lock(&page_in_mutex);
+ while (!page_in_gfn)
+ pthread_cond_wait(&page_in_cond, &page_in_mutex);
+ gfn = page_in_gfn;
+ page_in_gfn = 0;
+ pthread_mutex_unlock(&page_in_mutex);
+
+ /* Ignore errors */
+ page = xc_map_foreign_pages(pia->xch, pia->dom, PROT_READ, &gfn, 1);
+ if (page)
+ munmap(page, PAGE_SIZE);
+ }
+ page_in_possible = 0;
+ pthread_exit(NULL);
+}
+
+void page_in_trigger(unsigned long gfn)
+{
+ if (!page_in_possible)
+ return;
+
+ pthread_mutex_lock(&page_in_mutex);
+ page_in_gfn = gfn;
+ pthread_mutex_unlock(&page_in_mutex);
+ pthread_cond_signal(&page_in_cond);
+}
+
+void create_page_in_thread(domid_t domain_id, xc_interface *xch)
+{
+ page_in_args.dom = domain_id;
+ page_in_args.xch = xch;
+ if (pthread_create(&page_in_thread, NULL, page_in, &page_in_args) == 0)
+ page_in_possible = 1;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -650,6 +650,9 @@ int main(int argc, char *argv[])
sigaction(SIGINT, &act, NULL);
sigaction(SIGALRM, &act, NULL);
+ /* listen for page-in events to stop pager */
+ create_page_in_thread(paging->mem_event.domain_id, xch);
+
/* Evict pages */
for ( i = 0; i < paging->num_pages; i++ )
{
@@ -665,7 +668,7 @@ int main(int argc, char *argv[])
DPRINTF("%d pages evicted. Done.\n", i);
/* Swap pages in and out */
- while ( !interrupted )
+ while ( 1 )
{
/* Wait for Xen to signal that a page needs paged in */
rc = xenpaging_wait_for_event_or_timeout(paging);
@@ -728,8 +731,12 @@ int main(int argc, char *argv[])
goto out;
}
- /* Evict a new page to replace the one we just paged in */
- evict_victim(paging, &victims[i], fd, i);
+ /* Evict a new page to replace the one we just paged in,
+ * or clear this pagefile slot on exit */
+ if ( interrupted )
+ victims[i].gfn = INVALID_MFN;
+ else
+ evict_victim(paging, &victims[i], fd, i);
}
else
{
@@ -756,6 +763,28 @@ int main(int argc, char *argv[])
}
}
}
+
+ /* Write all pages back into the guest */
+ if ( interrupted == SIGTERM || interrupted == SIGINT )
+ {
+ for ( i = 0; i < paging->domain_info->max_pages; i++ )
+ {
+ if ( test_bit(i, paging->bitmap) )
+ {
+ page_in_trigger(i);
+ break;
+ }
+ }
+ /* If no more pages to process, exit loop */
+ if ( i == paging->domain_info->max_pages )
+ break;
+ }
+ else
+ {
+ /* Exit on any other signal */
+ if ( interrupted )
+ break;
+ }
}
DPRINTF("xenpaging got signal %d\n", interrupted);
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -53,6 +53,9 @@ typedef struct xenpaging_victim {
} xenpaging_victim_t;
+extern void create_page_in_thread(domid_t domain_id, xc_interface *xch);
+extern void page_in_trigger(unsigned long gfn);
+
#endif // __XEN_PAGING_H__
++++++ 23597-xenpaging_remove_private_mem_event.h.patch ++++++
changeset: 23597:3dcb553f3ba9
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:29 2011 +0200
files: tools/xenpaging/mem_event.h tools/xenpaging/xenpaging.c
tools/xenpaging/xenpaging.h
description:
xenpaging: remove private mem_event.h
tools/xenpaging/mem_event.h is only included in xenpaging.h.
Add the contents into that file and remove mem_event.h.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/mem_event.h | 57 --------------------------------------------
tools/xenpaging/xenpaging.c | 3 --
tools/xenpaging/xenpaging.h | 11 ++++++--
3 files changed, 8 insertions(+), 63 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/mem_event.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/******************************************************************************
- * tools/xenpaging/mem_event.h
- *
- * Memory event structures.
- *
- * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#ifndef __XEN_MEM_EVENT_H__
-#define __XEN_MEM_EVENT_H__
-
-
-#include <xc_private.h>
-
-#include <xen/event_channel.h>
-#include <xen/mem_event.h>
-
-
-
-
-typedef struct mem_event {
- domid_t domain_id;
- xc_evtchn *xce_handle;
- int port;
- mem_event_back_ring_t back_ring;
- mem_event_shared_page_t *shared_page;
- void *ring_page;
-} mem_event_t;
-
-
-#endif // __XEN_MEM_EVENT_H__
-
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -31,11 +31,8 @@
#include <xc_private.h>
#include <xs.h>
-#include <xen/mem_event.h>
-
#include "xc_bitops.h"
#include "file_ops.h"
-
#include "policy.h"
#include "xenpaging.h"
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -26,12 +26,17 @@
#include <xc_private.h>
-
#include <xen/event_channel.h>
#include <xen/mem_event.h>
-#include "mem_event.h"
-
+typedef struct mem_event {
+ domid_t domain_id;
+ xc_evtchn *xce_handle;
+ int port;
+ mem_event_back_ring_t back_ring;
+ mem_event_shared_page_t *shared_page;
+ void *ring_page;
+} mem_event_t;
typedef struct xenpaging {
xc_interface *xc_handle;
++++++ 23599-tools_fix_build_after_recent_xenpaging_changes.patch ++++++
changeset: 23599:d3027374a8c0
user: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
date: Mon Jun 27 14:48:57 2011 +0100
files: tools/xenpaging/Makefile
description:
tools: fix build after recent xenpaging changes
xenpaging now uses pthreads, so must link appropriately.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
tools/xenpaging/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/Makefile
+++ xen-4.1.2-testing/tools/xenpaging/Makefile
@@ -4,7 +4,7 @@ include $(XEN_ROOT)/tools/Rules.mk
CFLAGS += -I $(XEN_XC)
CFLAGS += -I ./
CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenstore)
-LDLIBS += $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore)
+LDLIBS += $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore) -pthread
POLICY = default
++++++ 23610-x86-topology-info.patch ++++++
References: fate#309894
# HG changeset patch
# User Wei Huang <wei.huang2@xxxxxxx>
# Date 1309248811 -3600
# Node ID 87c2013c2aa2d4874f892507e6cc7b52219a3acf
# Parent 819c315a919daec434f80ffb6e790ac492cbd2a7
x86: consolidate cpu_core_id and phys_proc_id into cpuinfo_x86 struct
This patch moves cpu_core_id and phys_proc_id into cpuinfo_x86
structure. This is similar to upstream Linux kernel's approach.
Signed-off-by: Wei Huang <wei.huang2@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/cpu/amd.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/amd.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/amd.c
@@ -579,11 +579,11 @@ static void __devinit init_amd(struct cp
while ((1 << bits) < c->x86_max_cores)
bits++;
}
- cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
- phys_proc_id[cpu] >>= bits;
+ c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
+ c->phys_proc_id >>= bits;
if (opt_cpu_info)
printk("CPU %d(%d) -> Core %d\n",
- cpu, c->x86_max_cores, cpu_core_id[cpu]);
+ cpu, c->x86_max_cores, c->cpu_core_id);
}
#endif
Index: xen-4.1.2-testing/xen/arch/x86/cpu/common.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/common.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/common.c
@@ -326,7 +326,7 @@ void __cpuinit generic_identify(struct c
early_intel_workaround(c);
#ifdef CONFIG_X86_HT
- phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+ c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
#endif
}
@@ -362,6 +362,8 @@ void __cpuinit identify_cpu(struct cpuin
c->x86_max_cores = 1;
c->x86_num_siblings = 1;
c->x86_clflush_size = 0;
+ c->phys_proc_id = BAD_APICID;
+ c->cpu_core_id = BAD_APICID;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
if (!have_cpuid_p()) {
@@ -510,7 +512,6 @@ void __cpuinit detect_extended_topology(
unsigned int ht_mask_width, core_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
unsigned int initial_apicid;
- int cpu = smp_processor_id();
if ( c->cpuid_level < 0xb )
return;
@@ -545,9 +546,9 @@ void __cpuinit detect_extended_topology(
core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
- cpu_core_id[cpu] = phys_pkg_id(initial_apicid, ht_mask_width)
+ c->cpu_core_id = phys_pkg_id(initial_apicid, ht_mask_width)
& core_select_mask;
- phys_proc_id[cpu] = phys_pkg_id(initial_apicid, core_plus_mask_width);
+ c->phys_proc_id = phys_pkg_id(initial_apicid, core_plus_mask_width);
c->apicid = phys_pkg_id(initial_apicid, 0);
c->x86_max_cores = (core_level_siblings / c->x86_num_siblings);
@@ -555,10 +556,10 @@ void __cpuinit detect_extended_topology(
if ( opt_cpu_info )
{
printk("CPU: Physical Processor ID: %d\n",
- phys_proc_id[cpu]);
+ c->phys_proc_id);
if ( c->x86_max_cores > 1 )
printk("CPU: Processor Core ID: %d\n",
- cpu_core_id[cpu]);
+ c->cpu_core_id);
}
}
@@ -567,7 +568,6 @@ void __cpuinit detect_ht(struct cpuinfo_
{
u32 eax, ebx, ecx, edx;
int index_msb, core_bits;
- int cpu = smp_processor_id();
cpuid(1, &eax, &ebx, &ecx, &edx);
@@ -590,11 +590,11 @@ void __cpuinit detect_ht(struct cpuinfo_
}
index_msb = get_count_order(c->x86_num_siblings);
- phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+ c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
if (opt_cpu_info)
printk("CPU: Physical Processor ID: %d\n",
- phys_proc_id[cpu]);
+ c->phys_proc_id);
c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores;
@@ -602,12 +602,12 @@ void __cpuinit detect_ht(struct cpuinfo_
core_bits = get_count_order(c->x86_max_cores);
- cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+ c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
((1 << core_bits) - 1);
if (opt_cpu_info && c->x86_max_cores > 1)
printk("CPU: Processor Core ID: %d\n",
- cpu_core_id[cpu]);
+ c->cpu_core_id);
}
}
#endif
Index: xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/mce.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/mcheck/mce.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/mce.c
@@ -1041,9 +1041,9 @@ void x86_mc_get_cpu_info(unsigned cpu, u
if (nthreads != NULL)
*nthreads = 1;
} else {
- *chipid = phys_proc_id[cpu];
+ *chipid = c->phys_proc_id;
if (c->x86_max_cores > 1)
- *coreid = cpu_core_id[cpu];
+ *coreid = c->cpu_core_id;
else
*coreid = 0;
*threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1);
Index: xen-4.1.2-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/smpboot.c
+++ xen-4.1.2-testing/xen/arch/x86/smpboot.c
@@ -51,12 +51,6 @@
/* Set if we find a B stepping CPU */
static int smp_b_stepping;
-/* Package ID of each logical CPU */
-int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-
-/* Core ID of each logical CPU */
-int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_sibling_map);
/* representing HT and core siblings of each logical CPU */
@@ -257,8 +251,8 @@ static void set_cpu_sibling_map(int cpu)
{
for_each_cpu_mask ( i, cpu_sibling_setup_map )
{
- if ( (phys_proc_id[cpu] == phys_proc_id[i]) &&
- (cpu_core_id[cpu] == cpu_core_id[i]) )
+ if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+ (c[cpu].cpu_core_id == c[i].cpu_core_id) )
{
cpu_set(i, per_cpu(cpu_sibling_map, cpu));
cpu_set(cpu, per_cpu(cpu_sibling_map, i));
@@ -281,7 +275,7 @@ static void set_cpu_sibling_map(int cpu)
for_each_cpu_mask ( i, cpu_sibling_setup_map )
{
- if ( phys_proc_id[cpu] == phys_proc_id[i] )
+ if ( c[cpu].phys_proc_id == c[i].phys_proc_id )
{
cpu_set(i, per_cpu(cpu_core_map, cpu));
cpu_set(cpu, per_cpu(cpu_core_map, i));
@@ -842,8 +836,8 @@ remove_siblinginfo(int cpu)
cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
cpus_clear(per_cpu(cpu_sibling_map, cpu));
cpus_clear(per_cpu(cpu_core_map, cpu));
- phys_proc_id[cpu] = BAD_APICID;
- cpu_core_id[cpu] = BAD_APICID;
+ c[cpu].phys_proc_id = BAD_APICID;
+ c[cpu].cpu_core_id = BAD_APICID;
cpu_clear(cpu, cpu_sibling_setup_map);
}
Index: xen-4.1.2-testing/xen/include/asm-x86/processor.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/processor.h
+++ xen-4.1.2-testing/xen/include/asm-x86/processor.h
@@ -175,6 +175,8 @@ struct cpuinfo_x86 {
__u32 x86_max_cores; /* cpuid returned max cores value */
__u32 booted_cores; /* number of cores as seen by OS */
__u32 x86_num_siblings; /* cpuid logical cpus per chip value */
+ int phys_proc_id; /* package ID of each logical CPU */
+ int cpu_core_id; /* core ID of each logical CPU*/
__u32 apicid;
unsigned short x86_clflush_size;
} __cacheline_aligned;
@@ -196,8 +198,6 @@ extern struct cpuinfo_x86 cpu_data[];
extern void set_cpuid_faulting(bool_t enable);
extern u64 host_pat;
-extern int phys_proc_id[NR_CPUS];
-extern int cpu_core_id[NR_CPUS];
extern bool_t opt_cpu_info;
/* Maximum width of physical addresses supported by the hardware */
@@ -217,8 +217,8 @@ extern void detect_ht(struct cpuinfo_x86
static always_inline void detect_ht(struct cpuinfo_x86 *c) {}
#endif
-#define cpu_to_core(_cpu) (cpu_core_id[_cpu])
-#define cpu_to_socket(_cpu) (phys_proc_id[_cpu])
+#define cpu_to_core(_cpu) (cpu_data[_cpu].cpu_core_id)
+#define cpu_to_socket(_cpu) (cpu_data[_cpu].phys_proc_id)
/*
* Generic CPUID function
++++++ 23611-amd-fam15-topology.patch ++++++
References: fate#309894
# HG changeset patch
# User Wei Huang <wei.huang2@xxxxxxx>
# Date 1309248833 -3600
# Node ID c2c12b2dafb5b1d3bfcc4c05a60ca4f9051c90e7
# Parent 87c2013c2aa2d4874f892507e6cc7b52219a3acf
x86: AMD core-pair topology detection code
This patch is to support core-pair topology introduced by AMD CPUs,
which introduces a new concept of [core, compute unit]. There is a new
feature bit for topology extension in CPUID:0x80000001. Also a new
CPUID 0x8000001E is introduced for CPU topology enumeration. This
patch collects the sibling information from the new CPUID and will be
stored in the sibling map in Xen hypervisor.
Signed-off-by: Wei Huang <wei.huang2@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/cpu/amd.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/amd.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/amd.c
@@ -344,6 +344,49 @@ static void check_syscfg_dram_mod_en(voi
wrmsrl(MSR_K8_SYSCFG, syscfg);
}
+static void __devinit amd_get_topology(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_HT
+ int cpu;
+ unsigned bits;
+
+ if (c->x86_max_cores <= 1)
+ return;
+ /*
+ * On a AMD multi core setup the lower bits of the APIC id
+ * distingush the cores.
+ */
+ cpu = smp_processor_id();
+ bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
+
+ if (bits == 0) {
+ while ((1 << bits) < c->x86_max_cores)
+ bits++;
+ }
+
+ /* Low order bits define the core id */
+ c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
+ /* Convert local APIC ID into the socket ID */
+ c->phys_proc_id >>= bits;
+ /* Collect compute unit ID if available */
+ if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+ c->compute_unit_id = ebx & 0xFF;
+ c->x86_num_siblings = ((ebx >> 8) & 0x3) + 1;
+ }
+
+ if (opt_cpu_info)
+ printk("CPU %d(%d) -> Processor %d, %s %d\n",
+ cpu, c->x86_max_cores, c->phys_proc_id,
+ cpu_has(c, X86_FEATURE_TOPOEXT) ? "Compute Unit" :
+ "Core",
+ cpu_has(c, X86_FEATURE_TOPOEXT) ? c->compute_unit_id :
+ c->cpu_core_id);
+#endif
+}
+
static void __devinit init_amd(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -566,26 +609,7 @@ static void __devinit init_amd(struct cp
}
}
-#ifdef CONFIG_X86_HT
- /*
- * On a AMD multi core setup the lower bits of the APIC id
- * distingush the cores.
- */
- if (c->x86_max_cores > 1) {
- int cpu = smp_processor_id();
- unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
-
- if (bits == 0) {
- while ((1 << bits) < c->x86_max_cores)
- bits++;
- }
- c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
- c->phys_proc_id >>= bits;
- if (opt_cpu_info)
- printk("CPU %d(%d) -> Core %d\n",
- cpu, c->x86_max_cores, c->cpu_core_id);
- }
-#endif
+ amd_get_topology(c);
/* Pointless to use MWAIT on Family10 as it does not deep sleep. */
if (c->x86 >= 0x10 && !force_mwait)
Index: xen-4.1.2-testing/xen/arch/x86/cpu/common.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/common.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/common.c
@@ -364,6 +364,7 @@ void __cpuinit identify_cpu(struct cpuin
c->x86_clflush_size = 0;
c->phys_proc_id = BAD_APICID;
c->cpu_core_id = BAD_APICID;
+ c->compute_unit_id = BAD_APICID;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
if (!have_cpuid_p()) {
Index: xen-4.1.2-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/smpboot.c
+++ xen-4.1.2-testing/xen/arch/x86/smpboot.c
@@ -240,6 +240,14 @@ static int booting_cpu;
/* CPUs for which sibling maps can be computed. */
static cpumask_t cpu_sibling_setup_map;
+static void link_thread_siblings(int cpu1, int cpu2)
+{
+ cpu_set(cpu1, per_cpu(cpu_sibling_map, cpu2));
+ cpu_set(cpu2, per_cpu(cpu_sibling_map, cpu1));
+ cpu_set(cpu1, per_cpu(cpu_core_map, cpu2));
+ cpu_set(cpu2, per_cpu(cpu_core_map, cpu1));
+}
+
static void set_cpu_sibling_map(int cpu)
{
int i;
@@ -251,13 +259,13 @@ static void set_cpu_sibling_map(int cpu)
{
for_each_cpu_mask ( i, cpu_sibling_setup_map )
{
- if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
- (c[cpu].cpu_core_id == c[i].cpu_core_id) )
- {
- cpu_set(i, per_cpu(cpu_sibling_map, cpu));
- cpu_set(cpu, per_cpu(cpu_sibling_map, i));
- cpu_set(i, per_cpu(cpu_core_map, cpu));
- cpu_set(cpu, per_cpu(cpu_core_map, i));
+ if ( cpu_has(c, X86_FEATURE_TOPOEXT) ) {
+ if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+ (c[cpu].compute_unit_id == c[i].compute_unit_id) )
+ link_thread_siblings(cpu, i);
+ } else if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+ (c[cpu].cpu_core_id == c[i].cpu_core_id) ) {
+ link_thread_siblings(cpu, i);
}
}
}
@@ -838,6 +846,7 @@ remove_siblinginfo(int cpu)
cpus_clear(per_cpu(cpu_core_map, cpu));
c[cpu].phys_proc_id = BAD_APICID;
c[cpu].cpu_core_id = BAD_APICID;
+ c[cpu].compute_unit_id = BAD_APICID;
cpu_clear(cpu, cpu_sibling_setup_map);
}
Index: xen-4.1.2-testing/xen/include/asm-x86/processor.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/processor.h
+++ xen-4.1.2-testing/xen/include/asm-x86/processor.h
@@ -175,9 +175,10 @@ struct cpuinfo_x86 {
__u32 x86_max_cores; /* cpuid returned max cores value */
__u32 booted_cores; /* number of cores as seen by OS */
__u32 x86_num_siblings; /* cpuid logical cpus per chip value */
+ __u32 apicid;
int phys_proc_id; /* package ID of each logical CPU */
int cpu_core_id; /* core ID of each logical CPU*/
- __u32 apicid;
+ int compute_unit_id; /* AMD compute unit ID of each logical CPU */
unsigned short x86_clflush_size;
} __cacheline_aligned;
++++++ 23613-EFI-headers.patch ++++++
++++ 2741 lines (skipped)
++++++ 23614-x86_64-EFI-boot.patch ++++++
++++ 2579 lines (skipped)
++++++ 23615-x86_64-EFI-runtime.patch ++++++
++++ 850 lines (skipped)
++++++ 23616-x86_64-EFI-MPS.patch ++++++
References: fate#311376, fate#311529, bnc#578927, bnc#628554
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1309249288 -3600
# Node ID dffcd8b4c197b58d2acb914d0e07a100e340f7ae
# Parent d19e778442673050bba8ea8cf61585902ff81162
x86-64: EFI MPS support
It's not clear this is needed - Linux doesn't use the MPS table even
if available, and no system having one was seen so far.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1309268736 -3600
# Node ID d22b64ccf088db8bfce1d6c4830f08e3e834ec84
# Parent 6d404796a8e587eb648a66f2859991d385b65eb6
x86_32: Fix build after EFI MPS patch.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/efi/boot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/efi/boot.c
+++ xen-4.1.2-testing/xen/arch/x86/efi/boot.c
@@ -897,12 +897,15 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
{
static EFI_GUID __initdata acpi2_guid = ACPI_20_TABLE_GUID;
static EFI_GUID __initdata acpi_guid = ACPI_TABLE_GUID;
+ static EFI_GUID __initdata mps_guid = MPS_TABLE_GUID;
static EFI_GUID __initdata smbios_guid = SMBIOS_TABLE_GUID;
if ( match_guid(&acpi2_guid, &efi_ct[i].VendorGuid) )
efi.acpi20 = (long)efi_ct[i].VendorTable;
if ( match_guid(&acpi_guid, &efi_ct[i].VendorGuid) )
efi.acpi = (long)efi_ct[i].VendorTable;
+ if ( match_guid(&mps_guid, &efi_ct[i].VendorGuid) )
+ efi.mps = (long)efi_ct[i].VendorTable;
if ( match_guid(&smbios_guid, &efi_ct[i].VendorGuid) )
efi.smbios = (long)efi_ct[i].VendorTable;
}
Index: xen-4.1.2-testing/xen/arch/x86/efi/runtime.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/efi/runtime.c
+++ xen-4.1.2-testing/xen/arch/x86/efi/runtime.c
@@ -29,6 +29,7 @@ void *__read_mostly efi_memmap;
struct efi __read_mostly efi = {
.acpi = EFI_INVALID_TABLE_ADDR,
.acpi20 = EFI_INVALID_TABLE_ADDR,
+ .mps = EFI_INVALID_TABLE_ADDR,
.smbios = EFI_INVALID_TABLE_ADDR,
};
Index: xen-4.1.2-testing/xen/arch/x86/mpparse.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mpparse.c
+++ xen-4.1.2-testing/xen/arch/x86/mpparse.c
@@ -19,6 +19,8 @@
#include <xen/init.h>
#include <xen/acpi.h>
#include <xen/delay.h>
+#include <xen/efi.h>
+#include <xen/pfn.h>
#include <xen/sched.h>
#include <asm/mc146818rtc.h>
@@ -655,6 +657,14 @@ static inline void __init construct_defa
}
}
+#define FIX_EFI_MPF FIX_KEXEC_BASE_0
+
+static __init void efi_unmap_mpf(void)
+{
+ if (efi_enabled)
+ __set_fixmap(FIX_EFI_MPF, 0, 0);
+}
+
static struct intel_mp_floating *mpf_found;
/*
@@ -669,6 +679,7 @@ void __init get_smp_config (void)
* processors, where MPS only supports physical.
*/
if (acpi_lapic && acpi_ioapic) {
+ efi_unmap_mpf();
printk(KERN_INFO "Using ACPI (MADT) for SMP configuration
information\n");
return;
}
@@ -699,6 +710,7 @@ void __init get_smp_config (void)
* override the defaults.
*/
if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) {
+ efi_unmap_mpf();
smp_found_config = 0;
printk(KERN_ERR "BIOS bug, MP table errors
detected!...\n");
printk(KERN_ERR "... disabling SMP support. (tell your
hw vendor)\n");
@@ -725,6 +737,8 @@ void __init get_smp_config (void)
} else
BUG();
+ efi_unmap_mpf();
+
printk(KERN_INFO "Processors: %d\n", num_processors);
/*
* Only use the first configuration found.
@@ -779,10 +793,37 @@ static int __init smp_scan_config (unsig
return 0;
}
+static void __init efi_check_config(void)
+{
+ struct intel_mp_floating *mpf;
+
+ if (efi.mps == EFI_INVALID_TABLE_ADDR)
+ return;
+
+ __set_fixmap(FIX_EFI_MPF, PFN_DOWN(efi.mps), __PAGE_HYPERVISOR);
+ mpf = (void *)fix_to_virt(FIX_EFI_MPF) + ((long)efi.mps &
(PAGE_SIZE-1));
+
+ if (memcmp(mpf->mpf_signature, "_MP_", 4) == 0 &&
+ mpf->mpf_length == 1 &&
+ mpf_checksum((void *)mpf, 16) &&
+ (mpf->mpf_specification == 1 || mpf->mpf_specification == 4)) {
+ smp_found_config = 1;
+ printk(KERN_INFO "SMP MP-table at %08lx\n", efi.mps);
+ mpf_found = mpf;
+ }
+ else
+ efi_unmap_mpf();
+}
+
void __init find_smp_config (void)
{
unsigned int address;
+ if (efi_enabled) {
+ efi_check_config();
+ return;
+ }
+
/*
* FIXME: Linux assumes you have 640K of base ram..
* this continues the error...
Index: xen-4.1.2-testing/xen/include/xen/efi.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/efi.h
+++ xen-4.1.2-testing/xen/include/xen/efi.h
@@ -17,6 +17,7 @@ extern const bool_t efi_enabled;
/* Add fields here only if they need to be referenced from non-EFI code. */
struct efi {
+ unsigned long mps; /* MPS table */
unsigned long acpi; /* ACPI table (IA64 ext 0.71) */
unsigned long acpi20; /* ACPI table (ACPI 2.0) */
unsigned long smbios; /* SM BIOS table */
++++++ 23643-xentrace_Allow_tracing_to_be_enabled_at_boot.patch ++++++
changeset: 23643:335e96664589
user: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
date: Fri Jul 01 20:31:18 2011 +0100
files: xen/common/trace.c
description:
xentrace: Allow tracing to be enabled at boot
Add a "tevt_mask" parameter to the xen command-line, allowing
trace records to be gathered early in boot. They will be placed
into the trace buffers, and read when the user runs "xentrace".
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -45,7 +45,9 @@ CHECK_t_buf;
/* opt_tbuf_size: trace buffer size (in pages) for each cpu */
static unsigned int opt_tbuf_size;
+static unsigned int opt_tevt_mask;
integer_param("tbuf_size", opt_tbuf_size);
+integer_param("tevt_mask", opt_tevt_mask);
/* Pointers to the meta-data objects for all system trace buffers */
static struct t_info *t_info;
@@ -338,11 +340,21 @@ void __init init_trace_bufs(void)
{
register_cpu_notifier(&cpu_nfb);
- if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) )
+ if ( opt_tbuf_size )
{
- printk(XENLOG_INFO "xentrace: allocation size %d failed, disabling\n",
- opt_tbuf_size);
- opt_tbuf_size = 0;
+ if ( alloc_trace_bufs(opt_tbuf_size) )
+ {
+ printk("xentrace: allocation size %d failed, disabling\n",
+ opt_tbuf_size);
+ opt_tbuf_size = 0;
+ }
+ else if ( opt_tevt_mask )
+ {
+ printk("xentrace: Starting tracing, enabling mask %x\n",
+ opt_tevt_mask);
+ tb_event_mask = opt_tevt_mask;
+ tb_init_done=1;
+ }
}
}
++++++ 23676-x86_64-image-map-bounds.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1310631973 -3600
# Node ID 80c9db90bba96e443a22d268c06948fdef9c6a75
# Parent 88823213a4780ebced6d7adcb1ffd2dda6a339ca
x86-64: properly handle alias mappings beyond _end
Changeset 19632:b0966b6f5180 wasn't really complete: The Xen image
mapping doesn't end at _end, but a full 16Mb gets mapped during boot
(and never got unmapped so far), hence all of this space was subject
to alias mappings when it comes to cache attribute changes. Unmap all
full large pages between _end and the 16Mb boundary, and include all
other pages beyond _end when checking for aliases.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -778,7 +778,7 @@ static int update_xen_mappings(unsigned
int err = 0;
#ifdef __x86_64__
bool_t alias = mfn >= PFN_DOWN(xen_phys_start) &&
- mfn < PFN_UP(xen_phys_start + (unsigned long)_end - XEN_VIRT_START);
+ mfn < PFN_UP(xen_phys_start + xen_virt_end - XEN_VIRT_START);
unsigned long xen_va =
XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT);
Index: xen-4.1.2-testing/xen/arch/x86/setup.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/setup.c
+++ xen-4.1.2-testing/xen/arch/x86/setup.c
@@ -99,6 +99,8 @@ unsigned long __read_mostly xen_phys_sta
/* Limits of Xen heap, used to initialise the allocator. */
unsigned long __initdata xenheap_initial_phys_start;
unsigned long __read_mostly xenheap_phys_end;
+#else
+unsigned long __read_mostly xen_virt_end;
#endif
DEFINE_PER_CPU(struct tss_struct, init_tss);
@@ -1098,6 +1100,9 @@ void __init __start_xen(unsigned long mb
map_pages_to_xen((unsigned long)__va(kexec_crash_area.start),
kexec_crash_area.start >> PAGE_SHIFT,
PFN_UP(kexec_crash_area.size), PAGE_HYPERVISOR);
+ xen_virt_end = ((unsigned long)_end + (1UL << L2_PAGETABLE_SHIFT) - 1) &
+ ~((1UL << L2_PAGETABLE_SHIFT) - 1);
+ destroy_xen_mappings(xen_virt_end, XEN_VIRT_START + BOOTSTRAP_MAP_BASE);
#endif
memguard_init();
Index: xen-4.1.2-testing/xen/include/asm-x86/x86_64/page.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/x86_64/page.h
+++ xen-4.1.2-testing/xen/include/asm-x86/x86_64/page.h
@@ -34,6 +34,8 @@
#include <xen/config.h>
#include <asm/types.h>
+extern unsigned long xen_virt_end;
+
extern unsigned long max_pdx;
extern unsigned long pfn_pdx_bottom_mask, ma_va_bottom_mask;
extern unsigned int pfn_pdx_hole_shift;
++++++ 23719-xentrace_update___trace_var_comment.patch ++++++
changeset: 23719:c2888876abd3
user: Olaf Hering <olaf@xxxxxxxxx>
date: Tue Jul 19 08:22:19 2011 +0100
files: xen/common/trace.c
description:
xentrace: update __trace_var comment
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -657,13 +657,13 @@ static DECLARE_SOFTIRQ_TASKLET(trace_not
trace_notify_dom0, 0);
/**
- * trace - Enters a trace tuple into the trace buffer for the current CPU.
+ * __trace_var - Enters a trace tuple into the trace buffer for the current
CPU.
* @event: the event type being logged
- * @d1...d5: the data items for the event being logged
+ * @cycles: include tsc timestamp into trace record
+ * @extra: size of additional trace data in bytes
+ * @extra_data: pointer to additional trace data
*
- * Logs a trace record into the appropriate buffer. Returns nonzero on
- * failure, otherwise 0. Failure occurs only if the trace buffers are not yet
- * initialised.
+ * Logs a trace record into the appropriate buffer.
*/
void __trace_var(u32 event, bool_t cycles, unsigned int extra,
const void *extra_data)
++++++ 23723-x86-CMOS-lock.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311081053 -3600
# Node ID 18653a163b1e8e10b4353272bcb9e8302bfd2e19
# Parent 7bc5825e471db5a3a989f47d21334ef63a6b5610
x86: consistently serialize CMOS/RTC accesses on rtc_lock
Since RTC/CMOS accesses aren't atomic, there are possible races
between code paths setting the index register and subsequently
reading/writing the data register. This is supposed to be dealt with
by acquiring rtc_lock, but two places up to now lacked respective
synchronization: Accesses to the EFI time functions and
smpboot_{setup,restore}_warm_reset_vector().
This in turn requires no longer directly passing through guest writes
to the index register, but instead using a machanism similar to that
for PCI config space method 1 accesses.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/efi/runtime.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/efi/runtime.c
+++ xen-4.1.2-testing/xen/arch/x86/efi/runtime.c
@@ -3,6 +3,7 @@
#include <xen/errno.h>
#include <xen/guest_access.h>
#include <xen/time.h>
+#include <asm/mc146818rtc.h>
DEFINE_XEN_GUEST_HANDLE(CHAR16);
@@ -80,9 +81,11 @@ unsigned long efi_get_time(void)
{
EFI_TIME time;
EFI_STATUS status;
- unsigned long cr3 = efi_rs_enter();
+ unsigned long cr3 = efi_rs_enter(), flags;
+ spin_lock_irqsave(&rtc_lock, flags);
status = efi_rs->GetTime(&time, NULL);
+ spin_unlock_irqrestore(&rtc_lock, flags);
efi_rs_leave(cr3);
if ( EFI_ERROR(status) )
@@ -223,7 +226,7 @@ static inline EFI_GUID *cast_guid(struct
int efi_runtime_call(struct xenpf_efi_runtime_call *op)
{
- unsigned long cr3;
+ unsigned long cr3, flags;
EFI_STATUS status = EFI_NOT_STARTED;
int rc = 0;
@@ -237,7 +240,9 @@ int efi_runtime_call(struct xenpf_efi_ru
return -EINVAL;
cr3 = efi_rs_enter();
+ spin_lock_irqsave(&rtc_lock, flags);
status = efi_rs->GetTime(cast_time(&op->u.get_time.time), &caps);
+ spin_unlock_irqrestore(&rtc_lock, flags);
efi_rs_leave(cr3);
if ( !EFI_ERROR(status) )
@@ -255,7 +260,9 @@ int efi_runtime_call(struct xenpf_efi_ru
return -EINVAL;
cr3 = efi_rs_enter();
+ spin_lock_irqsave(&rtc_lock, flags);
status = efi_rs->SetTime(cast_time(&op->u.set_time));
+ spin_unlock_irqrestore(&rtc_lock, flags);
efi_rs_leave(cr3);
break;
@@ -267,8 +274,10 @@ int efi_runtime_call(struct xenpf_efi_ru
return -EINVAL;
cr3 = efi_rs_enter();
+ spin_lock_irqsave(&rtc_lock, flags);
status = efi_rs->GetWakeupTime(&enabled, &pending,
cast_time(&op->u.get_wakeup_time));
+ spin_unlock_irqrestore(&rtc_lock, flags);
efi_rs_leave(cr3);
if ( !EFI_ERROR(status) )
@@ -287,12 +296,14 @@ int efi_runtime_call(struct xenpf_efi_ru
return -EINVAL;
cr3 = efi_rs_enter();
+ spin_lock_irqsave(&rtc_lock, flags);
status = efi_rs->SetWakeupTime(!!(op->misc &
XEN_EFI_SET_WAKEUP_TIME_ENABLE),
(op->misc &
XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY) ?
NULL :
cast_time(&op->u.set_wakeup_time));
+ spin_unlock_irqrestore(&rtc_lock, flags);
efi_rs_leave(cr3);
op->misc = 0;
Index: xen-4.1.2-testing/xen/arch/x86/hpet.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hpet.c
+++ xen-4.1.2-testing/xen/arch/x86/hpet.c
@@ -525,18 +525,10 @@ static void hpet_detach_channel(int cpu,
#include <asm/mc146818rtc.h>
-void (*pv_rtc_handler)(unsigned int port, uint8_t value);
+void (*__read_mostly pv_rtc_handler)(uint8_t index, uint8_t value);
-static void handle_rtc_once(unsigned int port, uint8_t value)
+static void handle_rtc_once(uint8_t index, uint8_t value)
{
- static int index;
-
- if ( port == 0x70 )
- {
- index = value;
- return;
- }
-
if ( index != RTC_REG_B )
return;
Index: xen-4.1.2-testing/xen/arch/x86/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/traps.c
@@ -67,6 +67,8 @@
#include <asm/hypercall.h>
#include <asm/mce.h>
#include <asm/apic.h>
+#include <asm/mc146818rtc.h>
+#include <asm/hpet.h>
#include <public/arch-x86/cpuid.h>
/*
@@ -1630,6 +1632,10 @@ static int admin_io_okay(
if ( (port == 0xcf8) && (bytes == 4) )
return 0;
+ /* We also never permit direct access to the RTC/CMOS registers. */
+ if ( ((port & ~1) == RTC_PORT(0)) )
+ return 0;
+
return ioports_access_permitted(v->domain, port, port + bytes - 1);
}
@@ -1659,6 +1665,21 @@ static uint32_t guest_io_read(
{
sub_data = pv_pit_handler(port, 0, 0);
}
+ else if ( (port == RTC_PORT(0)) )
+ {
+ sub_data = v->domain->arch.cmos_idx;
+ }
+ else if ( (port == RTC_PORT(1)) &&
+ ioports_access_permitted(v->domain, RTC_PORT(0),
+ RTC_PORT(1)) )
+ {
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ outb(v->domain->arch.cmos_idx & 0x7f, RTC_PORT(0));
+ sub_data = inb(RTC_PORT(1));
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ }
else if ( (port == 0xcf8) && (bytes == 4) )
{
size = 4;
@@ -1684,8 +1705,6 @@ static uint32_t guest_io_read(
return data;
}
-extern void (*pv_rtc_handler)(unsigned int port, uint8_t value);
-
static void guest_io_write(
unsigned int port, unsigned int bytes, uint32_t data,
struct vcpu *v, struct cpu_user_regs *regs)
@@ -1694,8 +1713,6 @@ static void guest_io_write(
{
switch ( bytes ) {
case 1:
- if ( ((port == 0x70) || (port == 0x71)) && pv_rtc_handler )
- pv_rtc_handler(port, (uint8_t)data);
outb((uint8_t)data, port);
if ( pv_post_outb_hook )
pv_post_outb_hook(port, (uint8_t)data);
@@ -1718,6 +1735,23 @@ static void guest_io_write(
{
pv_pit_handler(port, (uint8_t)data, 1);
}
+ else if ( (port == RTC_PORT(0)) )
+ {
+ v->domain->arch.cmos_idx = data;
+ }
+ else if ( (port == RTC_PORT(1)) &&
+ ioports_access_permitted(v->domain, RTC_PORT(0),
+ RTC_PORT(1)) )
+ {
+ unsigned long flags;
+
+ if ( pv_rtc_handler )
+ pv_rtc_handler(v->domain->arch.cmos_idx & 0x7f, data);
+ spin_lock_irqsave(&rtc_lock, flags);
+ outb(v->domain->arch.cmos_idx & 0x7f, RTC_PORT(0));
+ outb(data, RTC_PORT(1));
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ }
else if ( (port == 0xcf8) && (bytes == 4) )
{
size = 4;
@@ -2083,10 +2117,6 @@ static int emulate_privileged_op(struct
goto fail;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
- if ( (op_bytes == 1) &&
- ((port == 0x71) || (port == 0x70)) &&
- pv_rtc_handler )
- pv_rtc_handler(port, regs->eax);
io_emul(regs);
if ( (op_bytes == 1) && pv_post_outb_hook )
pv_post_outb_hook(port, regs->eax);
Index: xen-4.1.2-testing/xen/include/asm-x86/domain.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/domain.h
+++ xen-4.1.2-testing/xen/include/asm-x86/domain.h
@@ -251,6 +251,7 @@ struct arch_domain
/* I/O-port admin-specified access capabilities. */
struct rangeset *ioport_caps;
uint32_t pci_cf8;
+ uint8_t cmos_idx;
struct list_head pdev_list;
struct hvm_domain hvm_domain;
Index: xen-4.1.2-testing/xen/include/asm-x86/hpet.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hpet.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hpet.h
@@ -52,6 +52,7 @@
#define HPET_TN_FSB_CAP 0x8000
#define HPET_TN_ROUTE_SHIFT 9
+extern void (*pv_rtc_handler)(uint8_t reg, uint8_t value);
#define hpet_read32(x) \
(*(volatile u32 *)(fix_to_virt(FIX_HPET_BASE) + (x)))
Index: xen-4.1.2-testing/xen/include/asm-x86/mach-default/smpboot_hooks.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/mach-default/smpboot_hooks.h
+++ xen-4.1.2-testing/xen/include/asm-x86/mach-default/smpboot_hooks.h
@@ -3,7 +3,11 @@
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0xa, 0xf);
+ spin_unlock_irqrestore(&rtc_lock, flags);
flush_tlb_local();
Dprintk("1.\n");
*((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
@@ -14,6 +18,8 @@ static inline void smpboot_setup_warm_re
static inline void smpboot_restore_warm_reset_vector(void)
{
+ unsigned long flags;
+
/*
* Install writable page 0 entry to set BIOS data area.
*/
@@ -23,7 +29,9 @@ static inline void smpboot_restore_warm_
* Paranoid: Set warm reset code and vector here back
* to default values.
*/
+ spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0, 0xf);
+ spin_unlock_irqrestore(&rtc_lock, flags);
*((volatile int *) maddr_to_virt(0x467)) = 0;
}
++++++ 23724-x86-smpboot-x2apic.patch ++++++
# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxx>
# Date 1311081181 -3600
# Node ID b3434f24b0827c5ef34e4b4a72893288e2ffbe40
# Parent 18653a163b1e8e10b4353272bcb9e8302bfd2e19
x86: Remove timeouts from INIT-SIPI-SIPI sequence when using x2apic.
Some of the timeouts are pointless since they're waiting for the ICR
to ack the IPI delivery and that doesn't happen on x2apic.
The others should be benign (and are suggested in the SDM) but
removing them makes AP bringup much more reliable on some test boxes.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/smpboot.c
+++ xen-4.1.2-testing/xen/arch/x86/smpboot.c
@@ -447,29 +447,30 @@ static int wakeup_secondary_cpu(int phys
apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
phys_apicid);
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- if ( !x2apic_enabled )
+ if ( !x2apic_enabled )
+ {
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while ( send_status && (timeout++ < 1000) );
+ } while ( send_status && (timeout++ < 1000) );
- mdelay(10);
+ mdelay(10);
- Dprintk("Deasserting INIT.\n");
+ Dprintk("Deasserting INIT.\n");
- apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- if ( !x2apic_enabled )
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while ( send_status && (timeout++ < 1000) );
+ } while ( send_status && (timeout++ < 1000) );
+ }
/*
* Should we send STARTUP IPIs ?
@@ -498,22 +499,24 @@ static int wakeup_secondary_cpu(int phys
*/
apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), phys_apicid);
- /* Give the other CPU some time to accept the IPI. */
- udelay(300);
-
- Dprintk("Startup point 1.\n");
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- if ( !x2apic_enabled )
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while ( send_status && (timeout++ < 1000) );
-
- /* Give the other CPU some time to accept the IPI. */
- udelay(200);
+ if ( !x2apic_enabled )
+ {
+ /* Give the other CPU some time to accept the IPI. */
+ udelay(300);
+
+ Dprintk("Startup point 1.\n");
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while ( send_status && (timeout++ < 1000) );
+
+ /* Give the other CPU some time to accept the IPI. */
+ udelay(200);
+ }
/* Due to the Pentium erratum 3AP. */
if ( maxlvt > 3 )
++++++ 23726-x86-intel-flexmigration-v2.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311081291 -3600
# Node ID fd97ca086df6808bffc6ecf3f79cebca64c60bc3
# Parent 4dc6a9ba90d60fdf0cc0898fc9a8fe84ae9030fc
x86: update Intel CPUID masking code to latest spec
..., which adds masking of the xsave feature leaf.
Also fix the printing (to actually make it do what it was supposed to
do from the beginning) of what specific masking couldn't be done in
case the user requested something the hardware doesn't support.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311255291 -3600
# Node ID 48f72b389b04cfa8d44924577a69ed59e48fbe77
# Parent dd5eecf739d152fb16bd44897875ea878d4c9d59
x86: add change missing in c/s 23726:fd97ca086df6
The early "do we need to do anything" check needs adjustment, too.
Thanks to Haitao Shan for pointing this out.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -27,10 +27,15 @@ boolean_param("noserialnumber", disable_
static bool_t __cpuinitdata use_xsave = 1;
boolean_param("xsave", use_xsave);
+
unsigned int __devinitdata opt_cpuid_mask_ecx = ~0u;
integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx);
unsigned int __devinitdata opt_cpuid_mask_edx = ~0u;
integer_param("cpuid_mask_edx", opt_cpuid_mask_edx);
+
+unsigned int __devinitdata opt_cpuid_mask_xsave_eax = ~0u;
+integer_param("cpuid_mask_xsave_eax", opt_cpuid_mask_xsave_eax);
+
unsigned int __devinitdata opt_cpuid_mask_ext_ecx = ~0u;
integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
unsigned int __devinitdata opt_cpuid_mask_ext_edx = ~0u;
--- a/xen/arch/x86/cpu/cpu.h
+++ b/xen/arch/x86/cpu/cpu.h
@@ -22,6 +22,7 @@ struct cpu_dev {
extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
extern unsigned int opt_cpuid_mask_ecx, opt_cpuid_mask_edx;
+extern unsigned int opt_cpuid_mask_xsave_eax;
extern unsigned int opt_cpuid_mask_ext_ecx, opt_cpuid_mask_ext_edx;
extern int get_model_name(struct cpuinfo_x86 *c);
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -59,10 +59,12 @@ void set_cpuid_faulting(bool_t enable)
*/
static void __devinit set_cpuidmask(const struct cpuinfo_x86 *c)
{
+ u32 eax, edx;
const char *extra = "";
if (!~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx &
- opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx))
+ opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx &
+ opt_cpuid_mask_xsave_eax))
return;
/* Only family 6 supports this feature */
@@ -75,9 +77,12 @@ static void __devinit set_cpuidmask(cons
wrmsr(MSR_INTEL_CPUID_FEATURE_MASK,
opt_cpuid_mask_ecx,
opt_cpuid_mask_edx);
- if (!~(opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx))
+ if (~(opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx))
+ extra = "extended ";
+ else if (~opt_cpuid_mask_xsave_eax)
+ extra = "xsave ";
+ else
return;
- extra = "extended ";
break;
/*
* CPU supports this feature if the processor signature meets the following:
@@ -97,11 +102,25 @@ static void __devinit set_cpuidmask(cons
wrmsr(MSR_INTEL_CPUID80000001_FEATURE_MASK,
opt_cpuid_mask_ext_ecx,
opt_cpuid_mask_ext_edx);
+ if (!~opt_cpuid_mask_xsave_eax)
+ return;
+ extra = "xsave ";
+ break;
+ case 0x2a:
+ wrmsr(MSR_INTEL_CPUID1_FEATURE_MASK_V2,
+ opt_cpuid_mask_ecx,
+ opt_cpuid_mask_edx);
+ rdmsr(MSR_INTEL_CPUIDD_01_FEATURE_MASK, eax, edx);
+ wrmsr(MSR_INTEL_CPUIDD_01_FEATURE_MASK,
+ opt_cpuid_mask_xsave_eax, edx);
+ wrmsr(MSR_INTEL_CPUID80000001_FEATURE_MASK_V2,
+ opt_cpuid_mask_ext_ecx,
+ opt_cpuid_mask_ext_edx);
return;
}
- printk(XENLOG_ERR "Cannot set CPU feature mask on CPU#%d\n",
- smp_processor_id());
+ printk(XENLOG_ERR "Cannot set CPU %sfeature mask on CPU#%d\n",
+ extra, smp_processor_id());
}
void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -488,6 +488,10 @@
#define MSR_INTEL_CPUID1_FEATURE_MASK 0x00000130
#define MSR_INTEL_CPUID80000001_FEATURE_MASK 0x00000131
+#define MSR_INTEL_CPUID1_FEATURE_MASK_V2 0x00000132
+#define MSR_INTEL_CPUID80000001_FEATURE_MASK_V2 0x00000133
+#define MSR_INTEL_CPUIDD_01_FEATURE_MASK 0x00000134
+
/* Intel cpuid faulting MSRs */
#define MSR_INTEL_PLATFORM_INFO 0x000000ce
#define MSR_INTEL_MISC_FEATURES_ENABLES 0x00000140
++++++ 23735-guest-dom0-cap.patch ++++++
References: bnc#702407
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311407355 -3600
# Node ID 537918f518eec3d8e2e2dad403fce40303321523
# Parent 42edf1481c5704c8ce1eb171a713b5411df0551a
add privileged (dom0) kernel feature indication
With our switching away from supporting 32-bit Dom0 operation, users
complained that attempts (perhaps due to lack of knowledge of that
change) to boot the no longer privileged kernel in Dom0 resulted in
apparently silent failure. To make the mismatch explicit and visible,
add dom0 feature flag that the kernel can set to indicate operation as
dom0 is supported.
Due to the way elf_xen_parse_features() worked up to now (getting
fixed here), adding features indications to the old, string based ELF
note would make the respective kernel unusable on older hypervisors.
For that reason, a new ELF Note is being introduced that allows
specifying supported features as a bit array instead (with features
unknown to the hypervisor simply ignored, as now also done by
elf_xen_parse_features(), whereas here unknown kernel-required
features still keep the kernel [and hence VM] from booting).
Introduce and use elf_note_numeric_array() to be forward
compatible (or else an old hypervisor wouldn't be able to parse kernel
specified features occupying more than 64 bits - thanks, Ian!).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311598088 -3600
# Node ID 50ddc200a60cad3929a79a992f09145fd39af49d
# Parent d8725d9fb8657874011d2f2772f5e970b24dfe9b
fix regression from c/s 23735:537918f518ee
This was checking presence of the wrong (old) ELF note. I don't really
understand how this failed consistently only for one of the xen-boot
tests...
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/tools/libxc/xc_dom_elfloader.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_dom_elfloader.c
+++ xen-4.1.2-testing/tools/libxc/xc_dom_elfloader.c
@@ -286,6 +286,13 @@ static int xc_dom_parse_elf_kernel(struc
if ( (rc = elf_xen_parse(elf, &dom->parms)) != 0 )
return rc;
+ if ( elf_xen_feature_get(XENFEAT_dom0, dom->parms.f_required) )
+ {
+ xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Kernel does not"
+ " support unprivileged (DomU) operation", __FUNCTION__);
+ return -EINVAL;
+ }
+
/* find kernel segment */
dom->kernel_seg.vstart = dom->parms.virt_kstart;
dom->kernel_seg.vend = dom->parms.virt_kend;
Index: xen-4.1.2-testing/xen/arch/ia64/xen/domain.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/ia64/xen/domain.c
+++ xen-4.1.2-testing/xen/arch/ia64/xen/domain.c
@@ -2164,6 +2164,13 @@ int __init construct_dom0(struct domain
return -1;
}
+ if (parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type !=
XEN_ENT_NONE &&
+ !test_bit(XENFEAT_dom0, parms.f_supported))
+ {
+ printk("Kernel does not support Dom0 operation\n");
+ return -1;
+ }
+
p_start = parms.virt_base;
pkern_start = parms.virt_kstart;
pkern_end = parms.virt_kend;
Index: xen-4.1.2-testing/xen/arch/x86/domain_build.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/domain_build.c
+++ xen-4.1.2-testing/xen/arch/x86/domain_build.c
@@ -417,6 +417,13 @@ int __init construct_dom0(
return -EINVAL;
}
+ if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE
&&
+ !test_bit(XENFEAT_dom0, parms.f_supported) )
+ {
+ printk("Kernel does not support Dom0 operation\n");
+ return -EINVAL;
+ }
+
#if defined(__x86_64__)
if ( compat32 )
{
Index: xen-4.1.2-testing/xen/common/kernel.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/kernel.c
+++ xen-4.1.2-testing/xen/common/kernel.c
@@ -287,6 +287,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
(1U << XENFEAT_auto_translated_physmap);
if ( supervisor_mode_kernel )
fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
+ if ( current->domain == dom0 )
+ fi.submap |= 1U << XENFEAT_dom0;
#ifdef CONFIG_X86
if ( !is_hvm_vcpu(current) )
fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) |
Index: xen-4.1.2-testing/xen/common/libelf/libelf-dominfo.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/libelf/libelf-dominfo.c
+++ xen-4.1.2-testing/xen/common/libelf/libelf-dominfo.c
@@ -26,7 +26,8 @@ static const char *const elf_xen_feature
[XENFEAT_writable_descriptor_tables] = "writable_descriptor_tables",
[XENFEAT_auto_translated_physmap] = "auto_translated_physmap",
[XENFEAT_supervisor_mode_kernel] = "supervisor_mode_kernel",
- [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb"
+ [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb",
+ [XENFEAT_dom0] = "dom0"
};
static const int elf_xen_features =
sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]);
@@ -82,7 +83,7 @@ int elf_xen_parse_features(const char *f
}
}
}
- if ( i == elf_xen_features )
+ if ( i == elf_xen_features && required && feature[0] == '!' )
return -1;
}
@@ -113,6 +114,7 @@ int elf_xen_parse_note(struct elf_binary
[XEN_ELFNOTE_LOADER] = { "LOADER", 1},
[XEN_ELFNOTE_PAE_MODE] = { "PAE_MODE", 1},
[XEN_ELFNOTE_FEATURES] = { "FEATURES", 1},
+ [XEN_ELFNOTE_SUPPORTED_FEATURES] = { "SUPPORTED_FEATURES", 0},
[XEN_ELFNOTE_BSD_SYMTAB] = { "BSD_SYMTAB", 1},
[XEN_ELFNOTE_SUSPEND_CANCEL] = { "SUSPEND_CANCEL", 0 },
[XEN_ELFNOTE_MOD_START_PFN] = { "MOD_START_PFN", 0 },
@@ -121,6 +123,7 @@ int elf_xen_parse_note(struct elf_binary
const char *str = NULL;
uint64_t val = 0;
+ unsigned int i;
int type = elf_uval(elf, note, type);
if ( (type >= sizeof(note_desc) / sizeof(note_desc[0])) ||
@@ -199,6 +202,12 @@ int elf_xen_parse_note(struct elf_binary
return -1;
break;
+ case XEN_ELFNOTE_SUPPORTED_FEATURES:
+ for ( i = 0; i < XENFEAT_NR_SUBMAPS; ++i )
+ parms->f_supported[i] |= elf_note_numeric_array(
+ elf, note, sizeof(*parms->f_supported), i);
+ break;
+
}
return 0;
}
Index: xen-4.1.2-testing/xen/common/libelf/libelf-tools.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/libelf/libelf-tools.c
+++ xen-4.1.2-testing/xen/common/libelf/libelf-tools.c
@@ -227,6 +227,27 @@ uint64_t elf_note_numeric(struct elf_bin
return 0;
}
}
+
+uint64_t elf_note_numeric_array(struct elf_binary *elf, const elf_note *note,
+ unsigned int unitsz, unsigned int idx)
+{
+ const void *desc = elf_note_desc(elf, note);
+ int descsz = elf_uval(elf, note, descsz);
+
+ if ( descsz % unitsz || idx >= descsz / unitsz )
+ return 0;
+ switch (unitsz)
+ {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ return elf_access_unsigned(elf, desc, idx * unitsz, unitsz);
+ default:
+ return 0;
+ }
+}
+
const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note)
{
int namesz = (elf_uval(elf, note, namesz) + 3) & ~3;
Index: xen-4.1.2-testing/xen/include/public/elfnote.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/elfnote.h
+++ xen-4.1.2-testing/xen/include/public/elfnote.h
@@ -179,9 +179,22 @@
#define XEN_ELFNOTE_MOD_START_PFN 16
/*
+ * The features supported by this kernel (numeric).
+ *
+ * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a
+ * kernel to specify support for features that older hypervisors don't
+ * know about. The set of features 4.2 and newer hypervisors will
+ * consider supported by the kernel is the combination of the sets
+ * specified through this and the string note.
+ *
+ * LEGACY: FEATURES
+ */
+#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
+
+/*
* The number of the highest elfnote defined.
*/
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_MOD_START_PFN
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES
/*
* System information exported through crash notes.
Index: xen-4.1.2-testing/xen/include/public/features.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/features.h
+++ xen-4.1.2-testing/xen/include/public/features.h
@@ -75,7 +75,10 @@
#define XENFEAT_hvm_safe_pvclock 9
/* x86: pirq can be used by HVM guests */
-#define XENFEAT_hvm_pirqs 10
+#define XENFEAT_hvm_pirqs 10
+
+/* operation as Dom0 is supported */
+#define XENFEAT_dom0 11
#define XENFEAT_NR_SUBMAPS 1
Index: xen-4.1.2-testing/xen/include/xen/libelf.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/libelf.h
+++ xen-4.1.2-testing/xen/include/xen/libelf.h
@@ -179,6 +179,8 @@ const elf_sym *elf_sym_by_index(struct e
const char *elf_note_name(struct elf_binary *elf, const elf_note * note);
const void *elf_note_desc(struct elf_binary *elf, const elf_note * note);
uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note);
+uint64_t elf_note_numeric_array(struct elf_binary *, const elf_note *,
+ unsigned int unitsz, unsigned int idx);
const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note);
int elf_is_elfbinary(const void *image);
++++++ 23747-mmcfg-base-address.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311608539 -3600
# Node ID b07b6fa766562c990b1d1e59af032feda15c2edb
# Parent aa54b8175954bd6ffeb3bcf72e782e133896b388
x86-64/MMCFG: correct base address computation for regions not starting at bus 0
As per the specification, the base address reported by ACPI is the one
that would be used if the region started at bus 0. Hence the
start_bus_number offset needs to be added not only to the virtual
address, but also the physical one when establishing the mapping, and
it then needs to be subtracted when obtaining the virtual address for
doing accesses.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mmconfig_64.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c
@@ -25,7 +25,7 @@ struct mmcfg_virt {
static struct mmcfg_virt *pci_mmcfg_virt;
static int __initdata mmcfg_pci_segment_shift;
-static char __iomem *get_virt(unsigned int seg, unsigned bus)
+static char __iomem *get_virt(unsigned int seg, unsigned int *bus)
{
struct acpi_mcfg_allocation *cfg;
int cfg_num;
@@ -33,9 +33,11 @@ static char __iomem *get_virt(unsigned i
for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
cfg = pci_mmcfg_virt[cfg_num].cfg;
if (cfg->pci_segment == seg &&
- (cfg->start_bus_number <= bus) &&
- (cfg->end_bus_number >= bus))
+ (cfg->start_bus_number <= *bus) &&
+ (cfg->end_bus_number >= *bus)) {
+ *bus -= cfg->start_bus_number;
return pci_mmcfg_virt[cfg_num].virt;
+ }
}
/* Fall back to type 0 */
@@ -46,7 +48,7 @@ static char __iomem *pci_dev_base(unsign
{
char __iomem *addr;
- addr = get_virt(seg, bus);
+ addr = get_virt(seg, &bus);
if (!addr)
return NULL;
return addr + ((bus << 20) | (devfn << 12));
@@ -121,8 +123,11 @@ static void __iomem * __init mcfg_iorema
if (virt + size < virt || virt + size > PCI_MCFG_VIRT_END)
return NULL;
- map_pages_to_xen(virt, cfg->address >> PAGE_SHIFT,
- size >> PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE);
+ if (map_pages_to_xen(virt,
+ (cfg->address >> PAGE_SHIFT) +
+ (cfg->start_bus_number << (20 - PAGE_SHIFT)),
+ size >> PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE))
+ return NULL;
return (void __iomem *) virt;
}
++++++ 23749-mmcfg-reservation.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311608606 -3600
# Node ID e8d1c8f074babcb0e4511393106e80a918a38204
# Parent e1717d180897e6e7a04d83a41d86b35ac16912b9
x86-64/MMCFG: pass down firmware (ACPI) reservation status of used memory space
Reserving the MMCFG address range(s) in E820 is specified to only be
optional for the firmware to do. The requirement is to have them
reserved in ACPI resources. Those, however, aren't directly visible to
Xen as they require the ACPI interpreter to be active. Thus, if a
range isn't reserved in E820, we should not completely disable use of
MMCFG on the respective bus range, but rather keep it disabled until
Dom0 can pass down information on the ACPI reservation status (though
a new physdevop hypercall).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1322813126 -3600
# Node ID 60d4e257d04ba0bd663bbef5e93a97b6d8b66e54
# Parent 3f815406feb25a9348d8be9bc49fdc8c93ccb7c2
x86-64/mmcfg: remove __initdata annotation overlooked in 23749:e8d1c8f074ba
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -16,6 +16,10 @@
#include <xsm/xsm.h>
#include <asm/p2m.h>
+#ifdef CONFIG_X86_64
+#include "x86_64/mmconfig.h"
+#endif
+
#ifndef COMPAT
typedef long ret_t;
#endif
@@ -512,6 +516,24 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
break;
}
+#ifdef __x86_64__
+ case PHYSDEVOP_pci_mmcfg_reserved: {
+ struct physdev_pci_mmcfg_reserved info;
+
+ ret = -EPERM;
+ if ( !IS_PRIV(current->domain) )
+ break;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&info, arg, 1) )
+ break;
+
+ ret = pci_mmcfg_reserved(info.address, info.segment,
+ info.start_bus, info.end_bus, info.flags);
+ break;
+ }
+#endif
+
case PHYSDEVOP_restore_msi: {
struct physdev_restore_msi restore_msi;
struct pci_dev *pdev;
--- a/xen/arch/x86/x86_64/mmconfig.h
+++ b/xen/arch/x86/x86_64/mmconfig.h
@@ -84,6 +84,11 @@ extern int pci_mmcfg_config_num;
extern struct acpi_mcfg_allocation *pci_mmcfg_config;
/* function prototypes */
+struct acpi_table_header;
int acpi_parse_mcfg(struct acpi_table_header *header);
+int pci_mmcfg_reserved(uint64_t address, unsigned int segment,
+ unsigned int start_bus, unsigned int end_bus,
+ unsigned int flags);
int pci_mmcfg_arch_init(void);
-void pci_mmcfg_arch_free(void);
+int pci_mmcfg_arch_enable(unsigned int);
+void pci_mmcfg_arch_disable(unsigned int);
--- a/xen/arch/x86/x86_64/mmconfig-shared.c
+++ b/xen/arch/x86/x86_64/mmconfig-shared.c
@@ -22,10 +22,10 @@
#include <asm/e820.h>
#include <asm/msr.h>
#include <asm/msr-index.h>
+#include <public/physdev.h>
#include "mmconfig.h"
-static int __initdata known_bridge;
unsigned int pci_probe = PCI_PROBE_CONF1 | PCI_PROBE_MMCONF;
static void __init parse_mmcfg(char *s)
@@ -316,26 +316,21 @@ static int __init pci_mmcfg_check_hostbr
return name != NULL;
}
-typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type);
-
static int __init is_mmconf_reserved(
- check_reserved_t is_reserved,
u64 addr, u64 size, int i,
- typeof(pci_mmcfg_config[0]) *cfg, int with_e820)
+ typeof(pci_mmcfg_config[0]) *cfg)
{
u64 old_size = size;
int valid = 0;
- while (!is_reserved(addr, addr + size - 1, E820_RESERVED)) {
+ while (!e820_all_mapped(addr, addr + size - 1, E820_RESERVED)) {
size >>= 1;
if (size < (16UL<<20))
break;
}
if (size >= (16UL<<20) || size == old_size) {
- printk(KERN_NOTICE
- "PCI: MCFG area at %lx reserved in %s\n",
- addr, with_e820?"E820":"ACPI motherboard resources");
+ printk(KERN_NOTICE "PCI: MCFG area at %lx reserved in E820\n", addr);
valid = 1;
if (old_size != size) {
@@ -352,15 +347,16 @@ static int __init is_mmconf_reserved(
return valid;
}
-static void __init pci_mmcfg_reject_broken(void)
+static bool_t __init pci_mmcfg_reject_broken(void)
{
typeof(pci_mmcfg_config[0]) *cfg;
int i;
+ bool_t valid = 1;
if ((pci_mmcfg_config_num == 0) ||
(pci_mmcfg_config == NULL) ||
(pci_mmcfg_config[0].address == 0))
- return;
+ return 0;
cfg = &pci_mmcfg_config[0];
@@ -374,27 +370,25 @@ static void __init pci_mmcfg_reject_brok
size = cfg->end_bus_number + 1 - cfg->start_bus_number;
size <<= 20;
printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx "
- "segment %hu buses %u - %u\n",
+ "segment %04x buses %02x - %02x\n",
i, (unsigned long)cfg->address, cfg->pci_segment,
(unsigned int)cfg->start_bus_number,
(unsigned int)cfg->end_bus_number);
- if (!is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1))
- goto reject;
+ if (!is_mmconf_reserved(addr, size, i, cfg) ||
+ pci_mmcfg_arch_enable(i)) {
+ pci_mmcfg_arch_disable(i);
+ valid = 0;
+ }
}
- return;
-
-reject:
- printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
- pci_mmcfg_arch_free();
- xfree(pci_mmcfg_config);
- pci_mmcfg_config = NULL;
- pci_mmcfg_config_num = 0;
+ return valid;
}
void __init acpi_mmcfg_init(void)
{
+ bool_t valid = 1;
+
/* MMCONFIG disabled */
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
return;
@@ -403,16 +397,17 @@ void __init acpi_mmcfg_init(void)
if (!(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF))
return;
- /* for late to exit */
- if (known_bridge)
- return;
-
- if (pci_mmcfg_check_hostbridge())
- known_bridge = 1;
+ if (pci_mmcfg_check_hostbridge()) {
+ unsigned int i;
- if (!known_bridge) {
+ pci_mmcfg_arch_init();
+ for (i = 0; i < pci_mmcfg_config_num; ++i)
+ if (pci_mmcfg_arch_enable(i))
+ valid = 0;
+ } else {
acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
- pci_mmcfg_reject_broken();
+ pci_mmcfg_arch_init();
+ valid = pci_mmcfg_reject_broken();
}
if ((pci_mmcfg_config_num == 0) ||
@@ -420,9 +415,41 @@ void __init acpi_mmcfg_init(void)
(pci_mmcfg_config[0].address == 0))
return;
- if (pci_mmcfg_arch_init()) {
+ if (valid)
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+}
+
+int pci_mmcfg_reserved(uint64_t address, unsigned int segment,
+ unsigned int start_bus, unsigned int end_bus,
+ unsigned int flags)
+{
+ unsigned int i;
+
+ if (flags & ~XEN_PCI_MMCFG_RESERVED)
+ return -EINVAL;
+
+ for (i = 0; i < pci_mmcfg_config_num; ++i) {
+ const typeof(pci_mmcfg_config[0]) *cfg = &pci_mmcfg_config[i];
+
+ if (cfg->pci_segment == segment &&
+ cfg->start_bus_number == start_bus &&
+ cfg->end_bus_number == end_bus) {
+ if (cfg->address != address) {
+ printk(KERN_WARNING
+ "Base address presented for segment %04x bus %02x-%02x"
+ " (%08" PRIx64 ") does not match previously obtained"
+ " one (%08" PRIx64 ")\n",
+ segment, start_bus, end_bus, address, cfg->address);
+ return -EIO;
+ }
+ if (flags & XEN_PCI_MMCFG_RESERVED)
+ return pci_mmcfg_arch_enable(i);
+ pci_mmcfg_arch_disable(i);
+ return 0;
+ }
}
+
+ return -ENODEV;
}
/**
--- a/xen/arch/x86/x86_64/mmconfig_64.c
+++ b/xen/arch/x86/x86_64/mmconfig_64.c
@@ -23,7 +23,7 @@ struct mmcfg_virt {
char __iomem *virt;
};
static struct mmcfg_virt *pci_mmcfg_virt;
-static int __initdata mmcfg_pci_segment_shift;
+static unsigned int mmcfg_pci_segment_shift;
static char __iomem *get_virt(unsigned int seg, unsigned int *bus)
{
@@ -112,7 +112,8 @@ int pci_mmcfg_write(unsigned int seg, un
return 0;
}
-static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
+static void __iomem *mcfg_ioremap(const struct acpi_mcfg_allocation *cfg,
+ unsigned int prot)
{
unsigned long virt, size;
@@ -126,19 +127,55 @@ static void __iomem * __init mcfg_iorema
if (map_pages_to_xen(virt,
(cfg->address >> PAGE_SHIFT) +
(cfg->start_bus_number << (20 - PAGE_SHIFT)),
- size >> PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE))
+ size >> PAGE_SHIFT, prot))
return NULL;
return (void __iomem *) virt;
}
+int pci_mmcfg_arch_enable(unsigned int idx)
+{
+ const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
+
+ if (pci_mmcfg_virt[idx].virt)
+ return 0;
+ pci_mmcfg_virt[idx].virt = mcfg_ioremap(cfg, PAGE_HYPERVISOR_NOCACHE);
+ if (!pci_mmcfg_virt[idx].virt) {
+ printk(KERN_ERR "PCI: Cannot map MCFG aperture for segment %04x\n",
+ cfg->pci_segment);
+ return -ENOMEM;
+ }
+ printk(KERN_INFO "PCI: Using MCFG for segment %04x bus %02x-%02x\n",
+ cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
+ return 0;
+}
+
+void pci_mmcfg_arch_disable(unsigned int idx)
+{
+ const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
+
+ pci_mmcfg_virt[idx].virt = NULL;
+ /*
+ * Don't use destroy_xen_mappings() here, or make sure that at least
+ * the necessary L4 entries get populated (so that they get properly
+ * propagated to guest domains' page tables).
+ */
+ mcfg_ioremap(cfg, 0);
+ printk(KERN_WARNING "PCI: Not using MCFG for segment %04x bus %02x-%02x\n",
+ cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
+}
+
int __init pci_mmcfg_arch_init(void)
{
int i;
+ if (pci_mmcfg_virt)
+ return 0;
+
pci_mmcfg_virt = xmalloc_array(struct mmcfg_virt, pci_mmcfg_config_num);
if (pci_mmcfg_virt == NULL) {
printk(KERN_ERR "PCI: Can not allocate memory for mmconfig
structures\n");
+ pci_mmcfg_config_num = 0;
return 0;
}
memset(pci_mmcfg_virt, 0, sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num);
@@ -149,34 +186,5 @@ int __init pci_mmcfg_arch_init(void)
++mmcfg_pci_segment_shift;
}
mmcfg_pci_segment_shift += 20;
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
- pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]);
- if (!pci_mmcfg_virt[i].virt) {
- printk(KERN_ERR "PCI: Cannot map mmconfig aperture for "
- "segment %d\n",
- pci_mmcfg_config[i].pci_segment);
- pci_mmcfg_arch_free();
- return 0;
- }
- }
return 1;
}
-
-void __init pci_mmcfg_arch_free(void)
-{
- int i;
-
- if (pci_mmcfg_virt == NULL)
- return;
-
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
- if (pci_mmcfg_virt[i].virt) {
- iounmap(pci_mmcfg_virt[i].virt);
- pci_mmcfg_virt[i].virt = NULL;
- pci_mmcfg_virt[i].cfg = NULL;
- }
- }
-
- xfree(pci_mmcfg_virt);
- pci_mmcfg_virt = NULL;
-}
--- a/xen/arch/x86/x86_64/physdev.c
+++ b/xen/arch/x86/x86_64/physdev.c
@@ -54,6 +54,10 @@
#define physdev_get_free_pirq compat_physdev_get_free_pirq
#define physdev_get_free_pirq_t physdev_get_free_pirq_compat_t
+#define xen_physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved
+CHECK_physdev_pci_mmcfg_reserved;
+#undef xen_physdev_pci_mmcfg_reserved
+
#define COMPAT
#undef guest_handle_okay
#define guest_handle_okay compat_handle_okay
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -255,6 +255,19 @@ struct physdev_get_free_pirq {
typedef struct physdev_get_free_pirq physdev_get_free_pirq_t;
DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t);
+#define XEN_PCI_MMCFG_RESERVED 0x1
+
+#define PHYSDEVOP_pci_mmcfg_reserved 24
+struct physdev_pci_mmcfg_reserved {
+ uint64_t address;
+ uint16_t segment;
+ uint8_t start_bus;
+ uint8_t end_bus;
+ uint32_t flags;
+};
+typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t);
+
/*
* Notify that some PIRQ-bound event channels have been unmasked.
* ** This command is obsolete since interface version 0x00030202 and is **
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -60,6 +60,7 @@
! memory_map memory.h
! memory_reservation memory.h
! pod_target memory.h
+? physdev_pci_mmcfg_reserved physdev.h
! sched_poll sched.h
? sched_remote_shutdown sched.h
? sched_shutdown sched.h
++++++ 23752-x86-shared-IRQ-vector-maps.patch ++++++
References: bnc#713503
# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1311701818 -3600
# Node ID ef9ed3d2aa870a37ed5e611be9c524d526a2d604
# Parent 590aadf7c46ae979da3552332f592f9492ce6d8b
xen: Infrastructure to allow irqs to share vector maps
Laying the groundwork for per-device vector maps. This generic
code allows any irq to point to a vector map; all irqs sharing the
same vector map will avoid sharing vectors.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1314026133 -3600
# Node ID 3a05da2dc7c0a5fc0fcfc40c535d1fcb71203625
# Parent d1cd78a73a79e0e648937322cdb8d92a7f86327a
x86: Fix up irq vector map logic
We need to make sure that cfg->used_vector is only cleared once;
otherwise there may be a race condition that allows the same vector to
be assigned twice, defeating the whole purpose of the map.
This makes two changes:
* __clear_irq_vector() only clears the vector if the irq is not being
moved
* smp_iqr_move_cleanup_interrupt() only clears used_vector if this
is the last place it's being used (move_cleanup_count==0 after
decrement).
Also make use of asserts more consistent, to catch this kind of logic
bug in the future.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -548,6 +548,13 @@ fastcall void smp_irq_move_cleanup_inter
}
__get_cpu_var(vector_irq)[vector] = -1;
cfg->move_cleanup_count--;
+
+ if ( cfg->move_cleanup_count == 0
+ && cfg->used_vectors )
+ {
+ ASSERT(test_bit(vector, cfg->used_vectors));
+ clear_bit(vector, cfg->used_vectors);
+ }
unlock:
spin_unlock(&desc->lock);
}
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -94,6 +94,11 @@ static int __init __bind_irq_vector(int
per_cpu(vector_irq, cpu)[vector] = irq;
cfg->vector = vector;
cfg->cpu_mask = online_mask;
+ if ( cfg->used_vectors )
+ {
+ ASSERT(!test_bit(vector, cfg->used_vectors));
+ set_bit(vector, cfg->used_vectors);
+ }
irq_status[irq] = IRQ_USED;
if (IO_APIC_IRQ(irq))
irq_vector[irq] = vector;
@@ -159,6 +164,7 @@ static void dynamic_irq_cleanup(unsigned
desc->depth = 1;
desc->msi_desc = NULL;
desc->handler = &no_irq_type;
+ desc->chip_data->used_vectors=NULL;
cpus_setall(desc->affinity);
spin_unlock_irqrestore(&desc->lock, flags);
@@ -191,6 +197,7 @@ static void __clear_irq_vector(int irq)
if (likely(!cfg->move_in_progress))
return;
+
cpus_and(tmp_mask, cfg->old_cpu_mask, cpu_online_map);
for_each_cpu_mask(cpu, tmp_mask) {
for (vector = FIRST_DYNAMIC_VECTOR; vector <= LAST_DYNAMIC_VECTOR;
@@ -202,6 +209,12 @@ static void __clear_irq_vector(int irq)
}
}
+ if ( cfg->used_vectors )
+ {
+ ASSERT(test_bit(vector, cfg->used_vectors));
+ clear_bit(vector, cfg->used_vectors);
+ }
+
cfg->move_in_progress = 0;
}
@@ -261,6 +274,7 @@ static void init_one_irq_cfg(struct irq_
cfg->vector = IRQ_VECTOR_UNASSIGNED;
cpus_clear(cfg->cpu_mask);
cpus_clear(cfg->old_cpu_mask);
+ cfg->used_vectors = NULL;
}
int init_irq_data(void)
@@ -387,6 +401,10 @@ next:
if (test_bit(vector, used_vectors))
goto next;
+ if (cfg->used_vectors
+ && test_bit(vector, cfg->used_vectors) )
+ goto next;
+
for_each_cpu_mask(new_cpu, tmp_mask)
if (per_cpu(vector_irq, new_cpu)[vector] != -1)
goto next;
@@ -402,6 +420,11 @@ next:
per_cpu(vector_irq, new_cpu)[vector] = irq;
cfg->vector = vector;
cpus_copy(cfg->cpu_mask, tmp_mask);
+ if ( cfg->used_vectors )
+ {
+ ASSERT(!test_bit(vector, cfg->used_vectors));
+ set_bit(vector, cfg->used_vectors);
+ }
irq_status[irq] = IRQ_USED;
if (IO_APIC_IRQ(irq))
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -23,11 +23,16 @@
#define irq_to_desc(irq) (&irq_desc[irq])
#define irq_cfg(irq) (&irq_cfg[irq])
+typedef struct {
+ DECLARE_BITMAP(_bits,NR_VECTORS);
+} vmask_t;
+
struct irq_cfg {
int vector;
cpumask_t cpu_mask;
cpumask_t old_cpu_mask;
unsigned move_cleanup_count;
+ vmask_t *used_vectors;
u8 move_in_progress : 1;
};
++++++ 23754-AMD-perdev-vector-map.patch ++++++
References: bnc#713503
# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1311701836 -3600
# Node ID 2e0cf9428554da666616982cd0074024ff85b221
# Parent ef9ed3d2aa870a37ed5e611be9c524d526a2d604
xen: Option to allow per-device vector maps for MSI IRQs
Add a vector-map to pci_dev, and add an option to point MSI-related
IRQs to the vector-map of the device.
This prevents irqs from the same device from being assigned
the same vector on different pcpus. This is required for systems
using an AMD IOMMU, since the intremap tables on AMD only look at
vector, and not destination ID.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1311701852 -3600
# Node ID fa4e2ca9ecffbc432b451f495ad0a403644a6be8
# Parent 2e0cf9428554da666616982cd0074024ff85b221
xen: AMD IOMMU: Automatically enable per-device vector maps
Automatically enable per-device vector maps when using IOMMU,
unless disabled specifically by an IOMMU parameter.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1315231215 -3600
# Node ID 32814ad7458dc842a7c588eee13e5c4ee11709a3
# Parent f1349a968a5ac5577d67ad4a3f3490c580dbe264
xen: Add global irq_vector_map option, set if using AMD global intremap tables
As mentioned in previous changesets, AMD IOMMU interrupt
remapping tables only look at the vector, not the destination
id of an interrupt. This means that all IRQs going through
the same interrupt remapping table need to *not* share vectors.
The irq "vector map" functionality was originally introduced
after a patch which disabled global AMD IOMMUs entirely. That
patch has since been reverted, meaning that AMD intremap tables
can either be per-device or global.
This patch therefore introduces a global irq vector map option,
and enables it if we're using an AMD IOMMU with a global
interrupt remapping table.
This patch removes the "irq-perdev-vector-map" boolean
command-line optino and replaces it with "irq_vector_map",
which can have one of three values: none, global, or per-device.
Setting the irq_vector_map to any value will override the
default that the AMD code sets.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1317730316 -7200
# Node ID a99d75671a911f9c0d5d11e0fe88a0a65863cb44
# Parent 3d1664cc9e458809e399320204aca8536e401ee1
AMD-IOMMU: remove dead variable references
These got orphaned up by recent changes.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/docs/src/user.tex
+++ b/docs/src/user.tex
@@ -4197,6 +4197,10 @@ writing to the VGA console after domain
\item [ vcpu\_migration\_delay=$<$minimum\_time$>$] Set minimum time of
vcpu migration in microseconds (default 0). This parameter avoids agressive
vcpu migration. For example, the linux kernel uses 0.5ms by default.
+\item [ irq_vector_map=xxx ] Enable irq vector non-sharing maps. Setting
'global'
+ will ensure that no IRQs will share vectors. Setting 'per-device' will
ensure
+ that no IRQs from the same device will share vectors. Setting to 'none' will
+ disable it entirely, overriding any defaults the IOMMU code may set.
\end{description}
In addition, the following options may be specified on the Xen command
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -24,6 +24,8 @@
#include <asm/mach-generic/mach_apic.h>
#include <public/physdev.h>
+static void parse_irq_vector_map_param(char *s);
+
/* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
bool_t __read_mostly opt_noirqbalance = 0;
boolean_param("noirqbalance", opt_noirqbalance);
@@ -32,6 +34,12 @@ unsigned int __read_mostly nr_irqs_gsi =
unsigned int __read_mostly nr_irqs;
integer_param("nr_irqs", nr_irqs);
+/* This default may be changed by the AMD IOMMU code */
+int __read_mostly opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_DEFAULT;
+custom_param("irq_vector_map", parse_irq_vector_map_param);
+
+vmask_t global_used_vector_map;
+
u8 __read_mostly *irq_vector;
struct irq_desc __read_mostly *irq_desc = NULL;
@@ -60,6 +68,26 @@ static struct timer irq_ratelimit_timer;
static unsigned int __read_mostly irq_ratelimit_threshold = 10000;
integer_param("irq_ratelimit", irq_ratelimit_threshold);
+static void __init parse_irq_vector_map_param(char *s)
+{
+ char *ss;
+
+ do {
+ ss = strchr(s, ',');
+ if ( ss )
+ *ss = '\0';
+
+ if ( !strcmp(s, "none"))
+ opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_NONE;
+ else if ( !strcmp(s, "global"))
+ opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_GLOBAL;
+ else if ( !strcmp(s, "per-device"))
+ opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_PERDEV;
+
+ s = ss + 1;
+ } while ( ss );
+}
+
/* Must be called when irq disabled */
void lock_vector_lock(void)
{
@@ -344,6 +372,41 @@ hw_irq_controller no_irq_type = {
end_none
};
+static vmask_t *irq_get_used_vector_mask(int irq)
+{
+ vmask_t *ret = NULL;
+
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_GLOBAL )
+ {
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ ret = &global_used_vector_map;
+
+ if ( desc->chip_data->used_vectors )
+ {
+ printk(XENLOG_INFO "%s: Strange, unassigned irq %d already has
used_vectors!\n",
+ __func__, irq);
+ }
+ else
+ {
+ int vector;
+
+ vector = irq_to_vector(irq);
+ if ( vector > 0 )
+ {
+ printk(XENLOG_INFO "%s: Strange, irq %d already assigned
vector %d!\n",
+ __func__, irq, vector);
+
+ ASSERT(!test_bit(vector, ret));
+
+ set_bit(vector, ret);
+ }
+ }
+ }
+
+ return ret;
+}
+
int __assign_irq_vector(int irq, struct irq_cfg *cfg, const cpumask_t *mask)
{
/*
@@ -362,6 +425,7 @@ int __assign_irq_vector(int irq, struct
int cpu, err;
unsigned long flags;
cpumask_t tmp_mask;
+ vmask_t *irq_used_vectors = NULL;
old_vector = irq_to_vector(irq);
if (old_vector) {
@@ -376,6 +440,17 @@ int __assign_irq_vector(int irq, struct
return -EAGAIN;
err = -ENOSPC;
+
+ /* This is the only place normal IRQs are ever marked
+ * as "in use". If they're not in use yet, check to see
+ * if we need to assign a global vector mask. */
+ if ( irq_status[irq] == IRQ_USED )
+ {
+ irq_used_vectors = cfg->used_vectors;
+ }
+ else
+ irq_used_vectors = irq_get_used_vector_mask(irq);
+
for_each_cpu_mask(cpu, *mask) {
int new_cpu;
int vector, offset;
@@ -401,8 +476,8 @@ next:
if (test_bit(vector, used_vectors))
goto next;
- if (cfg->used_vectors
- && test_bit(vector, cfg->used_vectors) )
+ if (irq_used_vectors
+ && test_bit(vector, irq_used_vectors) )
goto next;
for_each_cpu_mask(new_cpu, tmp_mask)
@@ -420,15 +495,22 @@ next:
per_cpu(vector_irq, new_cpu)[vector] = irq;
cfg->vector = vector;
cpus_copy(cfg->cpu_mask, tmp_mask);
+
+ irq_status[irq] = IRQ_USED;
+ ASSERT((cfg->used_vectors == NULL)
+ || (cfg->used_vectors == irq_used_vectors));
+ cfg->used_vectors = irq_used_vectors;
+
+ if (IO_APIC_IRQ(irq))
+ irq_vector[irq] = vector;
+
if ( cfg->used_vectors )
{
ASSERT(!test_bit(vector, cfg->used_vectors));
+
set_bit(vector, cfg->used_vectors);
}
- irq_status[irq] = IRQ_USED;
- if (IO_APIC_IRQ(irq))
- irq_vector[irq] = vector;
err = 0;
local_irq_restore(flags);
break;
@@ -1523,7 +1605,7 @@ int map_domain_pirq(
if ( !IS_PRIV(current->domain) &&
!(IS_PRIV_FOR(current->domain, d) &&
- irq_access_permitted(current->domain, pirq)))
+ irq_access_permitted(current->domain, pirq)))
return -EPERM;
if ( pirq < 0 || pirq >= d->nr_pirqs || irq < 0 || irq >= nr_irqs )
@@ -1571,8 +1653,22 @@ int map_domain_pirq(
if ( desc->handler != &no_irq_type )
dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
- d->domain_id, irq);
+ d->domain_id, irq);
desc->handler = &pci_msi_type;
+
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
+ && !desc->chip_data->used_vectors )
+ {
+ desc->chip_data->used_vectors = &pdev->info.used_vectors;
+ if ( desc->chip_data->vector != IRQ_VECTOR_UNASSIGNED )
+ {
+ int vector = desc->chip_data->vector;
+ ASSERT(!test_bit(vector, desc->chip_data->used_vectors));
+
+ set_bit(vector, desc->chip_data->used_vectors);
+ }
+ }
+
d->arch.pirq_irq[pirq] = irq;
d->arch.irq_pirq[irq] = pirq;
setup_msi_irq(pdev, msi_desc, irq);
@@ -1583,9 +1679,12 @@ int map_domain_pirq(
d->arch.pirq_irq[pirq] = irq;
d->arch.irq_pirq[irq] = pirq;
spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV )
+ printk(XENLOG_INFO "Per-device vector maps for GSIs not
implemented yet.\n");
}
- done:
+done:
return ret;
}
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -166,6 +166,35 @@ int __init amd_iov_detect(void)
return -ENODEV;
}
+ /*
+ * AMD IOMMUs don't distinguish between vectors destined for
+ * different cpus when doing interrupt remapping. This means
+ * that interrupts going through the same intremap table
+ * can't share the same vector.
+ *
+ * If irq_vector_map isn't specified, choose a sensible default:
+ * - If we're using per-device interemap tables, per-device
+ * vector non-sharing maps
+ * - If we're using a global interemap table, global vector
+ * non-sharing map
+ */
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT )
+ {
+ if ( amd_iommu_perdev_intremap )
+ {
+ printk("AMD-Vi: Enabling per-device vector maps\n");
+ opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV;
+ }
+ else
+ {
+ printk("AMD-Vi: Enabling global vector map\n");
+ opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
+ }
+ }
+ else
+ {
+ printk("AMD-Vi: Not overriding irq_vector_map setting\n");
+ }
return scan_pci_devices();
}
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -45,6 +45,13 @@ extern u8 *irq_vector;
extern bool_t opt_noirqbalance;
+#define OPT_IRQ_VECTOR_MAP_DEFAULT 0 /* Do the default thing */
+#define OPT_IRQ_VECTOR_MAP_NONE 1 /* None */
+#define OPT_IRQ_VECTOR_MAP_GLOBAL 2 /* One global vector map (no vector
sharing) */
+#define OPT_IRQ_VECTOR_MAP_PERDEV 3 /* Per-device vetor map (no vector
sharing w/in a device) */
+
+extern int opt_irq_vector_map;
+
/*
* Per-cpu current frame pointer - the location of the last exception frame on
* the stack
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -11,6 +11,7 @@
#include <xen/types.h>
#include <xen/list.h>
#include <xen/spinlock.h>
+#include <xen/irq.h>
/*
* The PCI interface treats multi-function devices as independent
@@ -38,6 +39,7 @@ struct pci_dev_info {
u8 bus;
u8 devfn;
} physfn;
+ vmask_t used_vectors;
};
struct pci_dev {
++++++ 23771-x86-ioapic-clear-pin.patch ++++++
References: bnc#701686
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1313503555 -3600
# Node ID fc2be6cb89ad49efd90fe1b650f7efaab72f61b2
# Parent 5c1ebc117f9901bc155d2b92ae902a4144767dfb
x86: simplify (and fix) clear_IO_APIC{,_pin}()
These are used during bootup and (emergency) shutdown only, and their
only purpose is to get the actual IO-APIC's RTE(s) cleared.
Consequently, only the "raw" accessors should be used (and the ones
going through interrupt remapping code can be skipped), with the
exception of determining the delivery mode: This one must always go
through the interrupt remapping path, as in the VT-d case the actual
IO-APIC's RTE will have the delivery mode always set to zero (which
before possibly could have resulted in such an entry getting cleared
in the "raw" pass, though I haven't observed this case in practice).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/io_apic.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/io_apic.c
+++ xen-4.1.2-testing/xen/arch/x86/io_apic.c
@@ -471,14 +471,12 @@ static void eoi_IO_APIC_irq(unsigned int
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-#define clear_IO_APIC_pin(a,p) __clear_IO_APIC_pin(a,p,0)
-#define clear_IO_APIC_pin_raw(a,p) __clear_IO_APIC_pin(a,p,1)
-static void __clear_IO_APIC_pin(unsigned int apic, unsigned int pin, int raw)
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
/* Check delivery_mode to be sure we're not clearing an SMI pin */
- entry = ioapic_read_entry(apic, pin, raw);
+ entry = __ioapic_read_entry(apic, pin, FALSE);
if (entry.delivery_mode == dest_SMI)
return;
@@ -487,7 +485,7 @@ static void __clear_IO_APIC_pin(unsigned
*/
memset(&entry, 0, sizeof(entry));
entry.mask = 1;
- ioapic_write_entry(apic, pin, raw, entry);
+ __ioapic_write_entry(apic, pin, TRUE, entry);
}
static void clear_IO_APIC (void)
@@ -495,10 +493,8 @@ static void clear_IO_APIC (void)
int apic, pin;
for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
clear_IO_APIC_pin(apic, pin);
- clear_IO_APIC_pin_raw(apic, pin);
- }
}
}
++++++ 23772-x86-trampoline.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1313744066 -3600
# Node ID 29aeed4979a78f26519f5fde8a405f8438297ab9
# Parent fc2be6cb89ad49efd90fe1b650f7efaab72f61b2
x86: make run-time part of trampoline relocatable
In order to eliminate an initial hack in the EFI boot code (where
memory for the trampoline was just "claimed" instead of properly
allocated), the trampoline code must no longer make assumption on the
address at which it would be located. For the time being, the fixed
address is being retained for the traditional multiboot path.
As an additional benefit (at least from my pov) it allows confining
the visibility of the BOOT_TRAMPOLINE definition to just the boot
code.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/boot/Makefile
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/boot/Makefile
+++ xen-4.1.2-testing/xen/arch/x86/boot/Makefile
@@ -2,8 +2,8 @@ obj-y += head.o
head.o: reloc.S
-BOOT_TRAMPOLINE := $(shell sed -n
's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p'
$(BASEDIR)/include/asm-x86/config.h)
+BOOT_TRAMPOLINE := $(shell sed -n
's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p' head.S)
%.S: %.c
RELOC=$(BOOT_TRAMPOLINE) $(MAKE) -f build32.mk $@
-reloc.S: $(BASEDIR)/include/asm-x86/config.h
+reloc.S: head.S
Index: xen-4.1.2-testing/xen/arch/x86/boot/head.S
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/boot/head.S
+++ xen-4.1.2-testing/xen/arch/x86/boot/head.S
@@ -9,7 +9,7 @@
.text
.code32
-#undef bootsym_phys
+#define BOOT_TRAMPOLINE 0x7c000
#define sym_phys(sym) ((sym) - __XEN_VIRT_START)
#define bootsym_phys(sym) ((sym) - trampoline_start + BOOT_TRAMPOLINE)
@@ -189,6 +189,17 @@ __start:
mov %edi,sym_phys(idle_pg_table_l2) + (__PAGE_OFFSET>>18)
#endif
+ /* Apply relocations to bootstrap trampoline. */
+ mov $BOOT_TRAMPOLINE,%edx
+ mov $sym_phys(__trampoline_rel_start),%edi
+ mov %edx,sym_phys(trampoline_phys)
+1:
+ mov (%edi),%eax
+ add %edx,(%edi,%eax)
+ add $4,%edi
+ cmp $sym_phys(__trampoline_rel_stop),%edi
+ jb 1b
+
/* Copy bootstrap trampoline to low memory, below 1MB. */
mov $sym_phys(trampoline_start),%esi
mov $bootsym_phys(trampoline_start),%edi
Index: xen-4.1.2-testing/xen/arch/x86/boot/trampoline.S
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/boot/trampoline.S
+++ xen-4.1.2-testing/xen/arch/x86/boot/trampoline.S
@@ -4,6 +4,13 @@
#undef bootsym
#define bootsym(s) ((s)-trampoline_start)
+#define bootsym_rel(sym, off, opnd...) \
+ bootsym(sym),##opnd; \
+111:; \
+ .pushsection .trampoline_rel, "a"; \
+ .long 111b - (off) - .; \
+ .popsection
+
.globl trampoline_realmode_entry
trampoline_realmode_entry:
mov %cs,%ax
@@ -17,11 +24,11 @@ trampoline_realmode_entry:
xor %ax, %ax
inc %ax
lmsw %ax # CR0.PE = 1 (enter protected mode)
- ljmpl $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
+ ljmpl $BOOT_CS32,$bootsym_rel(trampoline_protmode_entry,6)
idt_48: .word 0, 0, 0 # base = limit = 0
gdt_48: .word 6*8-1
- .long bootsym_phys(trampoline_gdt)
+ .long bootsym_rel(trampoline_gdt,4)
trampoline_gdt:
/* 0x0000: unused */
.quad 0x0000000000000000
@@ -32,11 +39,16 @@ trampoline_gdt:
/* 0x0018: ring 0 data */
.quad 0x00cf92000000ffff
/* 0x0020: real-mode code @ BOOT_TRAMPOLINE */
- .long 0x0000ffff | ((BOOT_TRAMPOLINE & 0x00ffff) << 16)
- .long 0x00009a00 | ((BOOT_TRAMPOLINE & 0xff0000) >> 16)
+ .long 0x0000ffff
+ .long 0x00009a00
/* 0x0028: real-mode data @ BOOT_TRAMPOLINE */
- .long 0x0000ffff | ((BOOT_TRAMPOLINE & 0x00ffff) << 16)
- .long 0x00009200 | ((BOOT_TRAMPOLINE & 0xff0000) >> 16)
+ .long 0x0000ffff
+ .long 0x00009200
+
+ .pushsection .trampoline_rel, "a"
+ .long trampoline_gdt + BOOT_PSEUDORM_CS + 2 - .
+ .long trampoline_gdt + BOOT_PSEUDORM_DS + 2 - .
+ .popsection
.globl cpuid_ext_features
cpuid_ext_features:
@@ -66,11 +78,11 @@ trampoline_protmode_entry:
/* Load pagetable base register. */
mov $sym_phys(idle_pg_table),%eax
- add bootsym_phys(trampoline_xen_phys_start),%eax
+ add bootsym_rel(trampoline_xen_phys_start,4,%eax)
mov %eax,%cr3
/* Set up EFER (Extended Feature Enable Register). */
- mov bootsym_phys(cpuid_ext_features),%edi
+ mov bootsym_rel(cpuid_ext_features,4,%edi)
test $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */
jz .Lskip_efer
movl $MSR_EFER,%ecx
@@ -93,7 +105,7 @@ trampoline_protmode_entry:
#if defined(__x86_64__)
/* Now in compatibility mode. Long-jump into 64-bit mode. */
- ljmp $BOOT_CS64,$bootsym_phys(start64)
+ ljmp $BOOT_CS64,$bootsym_rel(start64,6)
.code64
start64:
Index: xen-4.1.2-testing/xen/arch/x86/boot/wakeup.S
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/boot/wakeup.S
+++ xen-4.1.2-testing/xen/arch/x86/boot/wakeup.S
@@ -42,15 +42,13 @@ ENTRY(wakeup_start)
# boot trampoline is under 1M, and shift its start into
# %fs to reference symbols in that area
- movl $BOOT_TRAMPOLINE, %eax
- shrl $4, %eax
- movl %eax, %fs
+ mov wakesym(trampoline_seg), %fs
lidt %fs:bootsym(idt_48)
lgdt %fs:bootsym(gdt_48)
movw $1, %ax
lmsw %ax # Turn on CR0.PE
- ljmpl $BOOT_CS32, $bootsym_phys(wakeup_32)
+ ljmpl $BOOT_CS32, $bootsym_rel(wakeup_32, 6)
/* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
@@ -103,6 +101,10 @@ real_magic: .long 0x12345678
.globl video_mode, video_flags
video_mode: .long 0
video_flags: .long 0
+trampoline_seg: .word BOOT_TRAMPOLINE >> 4
+ .pushsection .trampoline_seg, "a"
+ .long trampoline_seg - .
+ .popsection
.code32
@@ -114,11 +116,11 @@ wakeup_32:
mov $BOOT_DS, %eax
mov %eax, %ds
mov %eax, %ss
- mov $bootsym_phys(early_stack), %esp
+ mov $bootsym_rel(early_stack, 4, %esp)
# check saved magic again
mov $sym_phys(saved_magic), %eax
- add bootsym_phys(trampoline_xen_phys_start), %eax
+ add bootsym_rel(trampoline_xen_phys_start, 4, %eax)
mov (%eax), %eax
cmp $0x9abcdef0, %eax
jne bogus_saved_magic
@@ -131,12 +133,12 @@ wakeup_32:
/* Load pagetable base register */
mov $sym_phys(idle_pg_table),%eax
- add bootsym_phys(trampoline_xen_phys_start),%eax
+ add bootsym_rel(trampoline_xen_phys_start,4,%eax)
mov %eax,%cr3
/* Will cpuid feature change after resume? */
/* Set up EFER (Extended Feature Enable Register). */
- mov bootsym_phys(cpuid_ext_features),%edi
+ mov bootsym_rel(cpuid_ext_features,4,%edi)
test $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */
jz .Lskip_eferw
movl $MSR_EFER,%ecx
@@ -162,7 +164,7 @@ wakeup_32:
#if defined(__x86_64__)
/* Now in compatibility mode. Long-jump to 64-bit mode */
- ljmp $BOOT_CS64, $bootsym_phys(wakeup_64)
+ ljmp $BOOT_CS64, $bootsym_rel(wakeup_64,6)
.code64
wakeup_64:
Index: xen-4.1.2-testing/xen/arch/x86/efi/boot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/efi/boot.c
+++ xen-4.1.2-testing/xen/arch/x86/efi/boot.c
@@ -599,6 +599,9 @@ static void __init relocate_image(unsign
}
}
+extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[];
+extern const s32 __trampoline_seg_start[], __trampoline_seg_stop[];
+
void EFIAPI __init __attribute__((__noreturn__))
efi_start(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
{
@@ -614,9 +617,10 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info;
EFI_FILE_HANDLE dir_handle;
union string section = { NULL }, name;
+ const s32 *trampoline_ptr;
struct e820entry *e;
u64 efer;
- bool_t base_video = 0, trampoline_okay = 0;
+ bool_t base_video = 0;
efi_ih = ImageHandle;
efi_bs = SystemTable->BootServices;
@@ -914,15 +918,27 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
dmi_efi_get_table((void *)(long)efi.smbios);
/* Allocate space for trampoline (in first Mb). */
- cfg.addr = BOOT_TRAMPOLINE;
+ cfg.addr = 0x100000;
cfg.size = trampoline_end - trampoline_start;
- status = efi_bs->AllocatePages(AllocateAddress, EfiLoaderData,
+ status = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData,
PFN_UP(cfg.size), &cfg.addr);
if ( EFI_ERROR(status) )
{
cfg.addr = 0;
- PrintErr(L"Note: Trampoline area is in use\r\n");
+ blexit(L"No memory for trampoline\r\n");
}
+ trampoline_phys = cfg.addr;
+ /* Apply relocations to trampoline. */
+ for ( trampoline_ptr = __trampoline_rel_start;
+ trampoline_ptr < __trampoline_rel_stop;
+ ++trampoline_ptr )
+ *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) +=
+ trampoline_phys;
+ for ( trampoline_ptr = __trampoline_seg_start;
+ trampoline_ptr < __trampoline_seg_stop;
+ ++trampoline_ptr )
+ *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) =
+ trampoline_phys >> 4;
/* Initialise L2 identity-map and xen page table entries (16MB). */
for ( i = 0; i < 8; ++i )
@@ -1096,14 +1112,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
e->type = type;
++e820nr;
}
- if ( type == E820_RAM && e->addr <= BOOT_TRAMPOLINE &&
- e->addr + e->size >= BOOT_TRAMPOLINE + cfg.size )
- trampoline_okay = 1;
}
- if ( !trampoline_okay )
- blexit(L"Trampoline area unavailable\r\n");
-
status = efi_bs->ExitBootServices(ImageHandle, map_key);
if ( EFI_ERROR(status) )
PrintErrMesg(L"Cannot exit boot services", status);
@@ -1117,7 +1127,7 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
efi_fw_vendor = (void *)efi_fw_vendor + DIRECTMAP_VIRT_START;
relocate_image(__XEN_VIRT_START - xen_phys_start);
- memcpy((void *)(long)BOOT_TRAMPOLINE, trampoline_start, cfg.size);
+ memcpy((void *)trampoline_phys, trampoline_start, cfg.size);
/* Set system registers and transfer control. */
asm volatile("pushq $0\n\tpopfq");
Index: xen-4.1.2-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/smpboot.c
+++ xen-4.1.2-testing/xen/arch/x86/smpboot.c
@@ -48,6 +48,8 @@
#define setup_trampoline() (bootsym_phys(trampoline_realmode_entry))
+unsigned long __read_mostly trampoline_phys;
+
/* Set if we find a B stepping CPU */
static int smp_b_stepping;
Index: xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_32/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
@@ -22,6 +22,7 @@
#include <xen/lib.h>
#include <xen/init.h>
#include <xen/mm.h>
+#include <xen/pfn.h>
#include <xen/sched.h>
#include <xen/guest_access.h>
#include <asm/current.h>
@@ -166,8 +167,9 @@ void __init zap_low_mappings(l2_pgentry_
flush_all(FLUSH_TLB_GLOBAL);
/* Replace with mapping of the boot trampoline only. */
- map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
- 0x10, __PAGE_HYPERVISOR);
+ map_pages_to_xen(trampoline_phys, trampoline_phys >> PAGE_SHIFT,
+ PFN_UP(trampoline_end - trampoline_start),
+ __PAGE_HYPERVISOR);
}
void __init subarch_init_memory(void)
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
@@ -830,7 +830,7 @@ void __init zap_low_mappings(void)
flush_local(FLUSH_TLB_GLOBAL);
/* Replace with mapping of the boot trampoline only. */
- map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
+ map_pages_to_xen(trampoline_phys, trampoline_phys >> PAGE_SHIFT,
PFN_UP(trampoline_end - trampoline_start),
__PAGE_HYPERVISOR);
}
Index: xen-4.1.2-testing/xen/arch/x86/xen.lds.S
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/xen.lds.S
+++ xen-4.1.2-testing/xen/arch/x86/xen.lds.S
@@ -103,6 +103,13 @@ SECTIONS
*(.init.data)
*(.init.data.rel)
*(.init.data.rel.*)
+ . = ALIGN(4);
+ __trampoline_rel_start = .;
+ *(.trampoline_rel)
+ __trampoline_rel_stop = .;
+ __trampoline_seg_start = .;
+ *(.trampoline_seg)
+ __trampoline_seg_stop = .;
} :text
. = ALIGN(32);
.init.setup : {
Index: xen-4.1.2-testing/xen/include/asm-x86/config.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/config.h
+++ xen-4.1.2-testing/xen/include/asm-x86/config.h
@@ -95,13 +95,13 @@
/* Primary stack is restricted to 8kB by guard pages. */
#define PRIMARY_STACK_SIZE 8192
-#define BOOT_TRAMPOLINE 0x7c000
+#ifndef __ASSEMBLY__
+extern unsigned long trampoline_phys;
#define bootsym_phys(sym) \
- (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+BOOT_TRAMPOLINE)
+ (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+trampoline_phys)
#define bootsym(sym) \
(*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))), \
- BOOT_TRAMPOLINE-__pa(trampoline_start)))
-#ifndef __ASSEMBLY__
+ trampoline_phys-__pa(trampoline_start)))
extern char trampoline_start[], trampoline_end[];
extern char trampoline_realmode_entry[];
extern unsigned int trampoline_xen_phys_start;
++++++ 23774-x86_64-EFI-EDD.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1313744120 -3600
# Node ID e35c5202625ef5534561f84352833ad9467d986c
# Parent dd90b59cb11c60c48e174c899190e2967341fe32
x86-64/EFI: construct EDD data from device path protocol information
In the absence of a BIOS to handle INT13 requests, this information
must be constructed artificially instead when booted from EFI.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/boot/edd.S
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/boot/edd.S
+++ xen-4.1.2-testing/xen/arch/x86/boot/edd.S
@@ -16,21 +16,13 @@
* Updated and ported for Xen by Keir Fraser <keir@xxxxxxxxxxxxx> June 2007
*/
+#include <asm/edd.h>
+
.code16
/* Offset of disc signature in the MBR. */
#define EDD_MBR_SIG_OFFSET 0x1B8
-/* Maximum number of EDD information structures at boot_edd_info. */
-#define EDD_INFO_MAX 6
-
-/* Maximum number of MBR signatures at boot_mbr_signature. */
-#define EDD_MBR_SIG_MAX 16
-
-/* Size of components of EDD information structure. */
-#define EDDEXTSIZE 8
-#define EDDPARMSIZE 74
-
get_edd:
cmpb $2, bootsym(opt_edd) # edd=off ?
je edd_done
Index: xen-4.1.2-testing/xen/arch/x86/efi/boot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/efi/boot.c
+++ xen-4.1.2-testing/xen/arch/x86/efi/boot.c
@@ -16,6 +16,7 @@
#include <xen/stringify.h>
#include <xen/vga.h>
#include <asm/e820.h>
+#include <asm/edd.h>
#include <asm/mm.h>
#include <asm/msr.h>
#include <asm/processor.h>
@@ -539,6 +540,18 @@ static void __init split_value(char *s)
*s = 0;
}
+static void __init edd_put_string(u8 *dst, size_t n, const char *src)
+{
+ while ( n-- && *src )
+ *dst++ = *src++;
+ if ( *src )
+ PrintErrMesg(L"Internal error populating EDD info",
+ EFI_BUFFER_TOO_SMALL);
+ while ( n-- )
+ *dst++ = ' ';
+}
+#define edd_put_string(d, s) edd_put_string(d, ARRAY_SIZE(d), s)
+
static int __init set_color(u32 mask, int bpp, u8 *pos, u8 *sz)
{
if ( bpp < 0 )
@@ -607,6 +620,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
{
static EFI_GUID __initdata loaded_image_guid = LOADED_IMAGE_PROTOCOL;
static EFI_GUID __initdata gop_guid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+ static EFI_GUID __initdata bio_guid = BLOCK_IO_PROTOCOL;
+ static EFI_GUID __initdata devp_guid = DEVICE_PATH_PROTOCOL;
EFI_LOADED_IMAGE *loaded_image;
EFI_STATUS status;
unsigned int i, argc;
@@ -887,7 +902,148 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
place_string(&mbi.mem_upper, NULL);
- /* XXX Collect EDD info. */
+ /* Collect EDD info. */
+ BUILD_BUG_ON(offsetof(struct edd_info, edd_device_params) != EDDEXTSIZE);
+ BUILD_BUG_ON(sizeof(struct edd_device_params) != EDDPARMSIZE);
+ size = 0;
+ status = efi_bs->LocateHandle(ByProtocol, &bio_guid, NULL, &size, NULL);
+ if ( status == EFI_BUFFER_TOO_SMALL )
+ status = efi_bs->AllocatePool(EfiLoaderData, size, (void **)&handles);
+ if ( !EFI_ERROR(status) )
+ status = efi_bs->LocateHandle(ByProtocol, &bio_guid, NULL, &size,
+ handles);
+ if ( EFI_ERROR(status) )
+ size = 0;
+ for ( i = 0; i < size / sizeof(*handles); ++i )
+ {
+ EFI_BLOCK_IO *bio;
+ EFI_DEV_PATH_PTR devp;
+ struct edd_info *info = boot_edd_info + boot_edd_info_nr;
+ struct edd_device_params *params = &info->edd_device_params;
+ enum { root, acpi, pci, ctrlr } state = root;
+
+ status = efi_bs->HandleProtocol(handles[i], &bio_guid, (void **)&bio);
+ if ( EFI_ERROR(status) ||
+ bio->Media->RemovableMedia ||
+ bio->Media->LogicalPartition )
+ continue;
+ if ( boot_edd_info_nr < EDD_INFO_MAX )
+ {
+ info->device = 0x80 + boot_edd_info_nr; /* fake */
+ info->version = 0x11;
+ params->length = offsetof(struct edd_device_params, dpte_ptr);
+ params->number_of_sectors = bio->Media->LastBlock + 1;
+ params->bytes_per_sector = bio->Media->BlockSize;
+ params->dpte_ptr = ~0;
+ }
+ ++boot_edd_info_nr;
+ status = efi_bs->HandleProtocol(handles[i], &devp_guid,
+ (void **)&devp);
+ if ( EFI_ERROR(status) )
+ continue;
+ for ( ; !IsDevicePathEnd(devp.DevPath);
+ devp.DevPath = NextDevicePathNode(devp.DevPath) )
+ {
+ switch ( DevicePathType(devp.DevPath) )
+ {
+ const u8 *p;
+
+ case ACPI_DEVICE_PATH:
+ if ( state != root || boot_edd_info_nr > EDD_INFO_MAX )
+ break;
+ switch ( DevicePathSubType(devp.DevPath) )
+ {
+ case ACPI_DP:
+ if ( devp.Acpi->HID != EISA_PNP_ID(0xA03) &&
+ devp.Acpi->HID != EISA_PNP_ID(0xA08) )
+ break;
+ params->interface_path.pci.bus = devp.Acpi->UID;
+ state = acpi;
+ break;
+ case EXPANDED_ACPI_DP:
+ /* XXX */
+ break;
+ }
+ break;
+ case HARDWARE_DEVICE_PATH:
+ if ( state != acpi ||
+ DevicePathSubType(devp.DevPath) != HW_PCI_DP ||
+ boot_edd_info_nr > EDD_INFO_MAX )
+ break;
+ state = pci;
+ edd_put_string(params->host_bus_type, "PCI");
+ params->interface_path.pci.slot = devp.Pci->Device;
+ params->interface_path.pci.function = devp.Pci->Function;
+ break;
+ case MESSAGING_DEVICE_PATH:
+ if ( state != pci || boot_edd_info_nr > EDD_INFO_MAX )
+ break;
+ state = ctrlr;
+ switch ( DevicePathSubType(devp.DevPath) )
+ {
+ case MSG_ATAPI_DP:
+ edd_put_string(params->interface_type, "ATAPI");
+ params->interface_path.pci.channel =
+ devp.Atapi->PrimarySecondary;
+ params->device_path.atapi.device = devp.Atapi->SlaveMaster;
+ params->device_path.atapi.lun = devp.Atapi->Lun;
+ break;
+ case MSG_SCSI_DP:
+ edd_put_string(params->interface_type, "SCSI");
+ params->device_path.scsi.id = devp.Scsi->Pun;
+ params->device_path.scsi.lun = devp.Scsi->Lun;
+ break;
+ case MSG_FIBRECHANNEL_DP:
+ edd_put_string(params->interface_type, "FIBRE");
+ params->device_path.fibre.wwid = devp.FibreChannel->WWN;
+ params->device_path.fibre.lun = devp.FibreChannel->Lun;
+ break;
+ case MSG_1394_DP:
+ edd_put_string(params->interface_type, "1394");
+ params->device_path.i1394.eui = devp.F1394->Guid;
+ break;
+ case MSG_USB_DP:
+ case MSG_USB_CLASS_DP:
+ edd_put_string(params->interface_type, "USB");
+ break;
+ case MSG_I2O_DP:
+ edd_put_string(params->interface_type, "I2O");
+ params->device_path.i2o.identity_tag = devp.I2O->Tid;
+ break;
+ default:
+ continue;
+ }
+ info->version = 0x30;
+ params->length = sizeof(struct edd_device_params);
+ params->key = 0xbedd;
+ params->device_path_info_length =
+ sizeof(struct edd_device_params) -
+ offsetof(struct edd_device_params, key);
+ for ( p = (const u8 *)¶ms->key; p < ¶ms->checksum; ++p
)
+ params->checksum -= *p;
+ break;
+ case MEDIA_DEVICE_PATH:
+ if ( DevicePathSubType(devp.DevPath) == MEDIA_HARDDRIVE_DP &&
+ devp.HardDrive->MBRType == MBR_TYPE_PCAT &&
+ boot_mbr_signature_nr < EDD_MBR_SIG_MAX )
+ {
+ struct mbr_signature *sig = boot_mbr_signature +
+ boot_mbr_signature_nr;
+
+ sig->device = 0x80 + boot_edd_info_nr; /* fake */
+ memcpy(&sig->signature, devp.HardDrive->Signature,
+ sizeof(sig->signature));
+ ++boot_mbr_signature_nr;
+ }
+ break;
+ }
+ }
+ }
+ if ( handles )
+ efi_bs->FreePool(handles);
+ if ( boot_edd_info_nr > EDD_INFO_MAX )
+ boot_edd_info_nr = EDD_INFO_MAX;
+
/* XXX Collect EDID info. */
if ( cpuid_eax(0x80000000) > 0x80000000 )
Index: xen-4.1.2-testing/xen/include/asm-x86/edd.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/edd.h
+++ xen-4.1.2-testing/xen/include/asm-x86/edd.h
@@ -23,6 +23,8 @@
#ifndef __XEN_EDD_H__
#define __XEN_EDD_H__
+#ifndef __ASSEMBLY__
+
struct edd_info {
/* Int13, Fn48: Check Extensions Present. */
u8 device; /* %dl: device */
@@ -33,10 +35,106 @@ struct edd_info {
u8 legacy_max_head; /* %dh: maximum head number */
u8 legacy_sectors_per_track; /* %cl[5:0]: maximum sector number */
/* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
- struct {
+ struct edd_device_params {
u16 length;
- u8 data[72];
- } edd_device_params;
+ u16 info_flags;
+ u32 num_default_cylinders;
+ u32 num_default_heads;
+ u32 sectors_per_track;
+ u64 number_of_sectors;
+ u16 bytes_per_sector;
+ u32 dpte_ptr; /* 0xFFFFFFFF for our purposes */
+ u16 key; /* = 0xBEDD */
+ u8 device_path_info_length;
+ u8 reserved2;
+ u16 reserved3;
+ u8 host_bus_type[4];
+ u8 interface_type[8];
+ union {
+ struct {
+ u16 base_address;
+ u16 reserved1;
+ u32 reserved2;
+ } __attribute__ ((packed)) isa;
+ struct {
+ u8 bus;
+ u8 slot;
+ u8 function;
+ u8 channel;
+ u32 reserved;
+ } __attribute__ ((packed)) pci;
+ /* pcix is same as pci */
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) ibnd;
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) xprs;
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) htpt;
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) unknown;
+ } interface_path;
+ union {
+ struct {
+ u8 device;
+ u8 reserved1;
+ u16 reserved2;
+ u32 reserved3;
+ u64 reserved4;
+ } __attribute__ ((packed)) ata;
+ struct {
+ u8 device;
+ u8 lun;
+ u8 reserved1;
+ u8 reserved2;
+ u32 reserved3;
+ u64 reserved4;
+ } __attribute__ ((packed)) atapi;
+ struct {
+ u16 id;
+ u64 lun;
+ u16 reserved1;
+ u32 reserved2;
+ } __attribute__ ((packed)) scsi;
+ struct {
+ u64 serial_number;
+ u64 reserved;
+ } __attribute__ ((packed)) usb;
+ struct {
+ u64 eui;
+ u64 reserved;
+ } __attribute__ ((packed)) i1394;
+ struct {
+ u64 wwid;
+ u64 lun;
+ } __attribute__ ((packed)) fibre;
+ struct {
+ u64 identity_tag;
+ u64 reserved;
+ } __attribute__ ((packed)) i2o;
+ struct {
+ u32 array_number;
+ u32 reserved1;
+ u64 reserved2;
+ } __attribute__ ((packed)) raid;
+ struct {
+ u8 device;
+ u8 reserved1;
+ u16 reserved2;
+ u32 reserved3;
+ u64 reserved4;
+ } __attribute__ ((packed)) sata;
+ struct {
+ u64 reserved1;
+ u64 reserved2;
+ } __attribute__ ((packed)) unknown;
+ } device_path;
+ u8 reserved4;
+ u8 checksum;
+ } __attribute__ ((packed)) edd_device_params;
} __attribute__ ((packed));
struct mbr_signature {
@@ -51,4 +149,16 @@ extern u8 boot_mbr_signature_nr;
extern struct edd_info boot_edd_info[];
extern u8 boot_edd_info_nr;
+#endif /* __ASSEMBLY__ */
+
+/* Maximum number of EDD information structures at boot_edd_info. */
+#define EDD_INFO_MAX 6
+
+/* Maximum number of MBR signatures at boot_mbr_signature. */
+#define EDD_MBR_SIG_MAX 16
+
+/* Size of components of EDD information structure. */
+#define EDDEXTSIZE 8
+#define EDDPARMSIZE 74
+
#endif /* __XEN_EDD_H__ */
++++++ 23781-pm-wide-ACPI-ids.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1314004239 -3600
# Node ID 0849b0e59e2418e8215616df147f955b01b07577
# Parent 07f78b5bd03c02e32324eaa00487643d27b7ffa8
pm: don't truncate processors' ACPI IDs to 8 bits
This is just another adjustment to allow systems with very many CPUs
(or unusual ACPI IDs) to be properly power-managed.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/ia64/linux-xen/acpi.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/ia64/linux-xen/acpi.c
+++ xen-4.1.2-testing/xen/arch/ia64/linux-xen/acpi.c
@@ -223,11 +223,14 @@ static u16 ia64_acpiid_to_sapicid[ MAX_L
{[0 ... MAX_LOCAL_SAPIC - 1] = 0xffff };
/* acpi id to cpu id */
-int get_cpu_id(u8 acpi_id)
+int get_cpu_id(u32 acpi_id)
{
int i;
u16 apic_id;
+ if ( acpi_id >= MAX_LOCAL_SAPIC )
+ return -EINVAL;
+
apic_id = ia64_acpiid_to_sapicid[acpi_id];
if ( apic_id == 0xffff )
return -EINVAL;
Index: xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/acpi/cpu_idle.c
+++ xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
@@ -901,11 +901,14 @@ static void set_cx(
acpi_power->safe_state = cx;
}
-int get_cpu_id(u8 acpi_id)
+int get_cpu_id(u32 acpi_id)
{
int i;
u32 apic_id;
+ if ( acpi_id >= MAX_MADT_ENTRIES )
+ return -1;
+
apic_id = x86_acpiid_to_apicid[acpi_id];
if ( apic_id == BAD_APICID )
return -1;
@@ -982,7 +985,7 @@ long set_cx_pminfo(uint32_t cpu, struct
print_cx_pminfo(cpu, power);
/* map from acpi_id to cpu_id */
- cpu_id = get_cpu_id((u8)cpu);
+ cpu_id = get_cpu_id(cpu);
if ( cpu_id == -1 )
{
printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu);
Index: xen-4.1.2-testing/xen/include/acpi/cpufreq/processor_perf.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/acpi/cpufreq/processor_perf.h
+++ xen-4.1.2-testing/xen/include/acpi/cpufreq/processor_perf.h
@@ -6,7 +6,7 @@
#define XEN_PX_INIT 0x80000000
-int get_cpu_id(u8);
+int get_cpu_id(u32);
int powernow_cpufreq_init(void);
unsigned int powernow_register_driver(void);
unsigned int get_measured_perf(unsigned int cpu, unsigned int flag);
++++++ 23782-x86-ioapic-clear-irr.patch ++++++
References: bnc#701686
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1314004270 -3600
# Node ID 25dfe53bb1898b3967ceb71a7eb60a8b760c25fb
# Parent 0849b0e59e2418e8215616df147f955b01b07577
x86/IO-APIC: clear remoteIRR in clear_IO_APIC_pin()
It was found that in a crash scenario, the remoteIRR bit in an IO-APIC
RTE could be left set, causing problems when bringing up a kdump
kernel. While this generally is most important to be taken care of in
the new kernel (which usually would be a native one), it still seems
desirable to also address this problem in Xen so that (a) the problem
doesn't bite Xen when used as a secondary emergency kernel and (b) an
attempt is being made to save un-fixed secondary kernels from running
into said problem.
Based on a Linux patch from suresh.b.siddha@xxxxxxxxx.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -481,11 +481,35 @@ static void clear_IO_APIC_pin(unsigned i
return;
/*
+ * Make sure the entry is masked and re-read the contents to check
+ * if it is a level triggered pin and if the remoteIRR is set.
+ */
+ if (!entry.mask) {
+ entry.mask = 1;
+ __ioapic_write_entry(apic, pin, FALSE, entry);
+ }
+ entry = __ioapic_read_entry(apic, pin, TRUE);
+
+ if (entry.irr) {
+ /* Make sure the trigger mode is set to level. */
+ if (!entry.trigger) {
+ entry.trigger = 1;
+ __ioapic_write_entry(apic, pin, TRUE, entry);
+ }
+ __io_apic_eoi(apic, entry.vector, pin);
+ }
+
+ /*
* Disable it in the IO-APIC irq-routing table:
*/
memset(&entry, 0, sizeof(entry));
entry.mask = 1;
__ioapic_write_entry(apic, pin, TRUE, entry);
+
+ entry = __ioapic_read_entry(apic, pin, TRUE);
+ if (entry.irr)
+ printk(KERN_ERR "IO-APIC%02x-%u: Unable to reset IRR\n",
+ IO_APIC_ID(apic), pin);
}
static void clear_IO_APIC (void)
++++++ 23783-ACPI-set-_PDC-bits.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1314004356 -3600
# Node ID 2029263c501c315fa4d94845e5cfa6a9b0b395d5
# Parent 25dfe53bb1898b3967ceb71a7eb60a8b760c25fb
ACPI: add _PDC input override mechanism
In order to have Dom0 call _PDC with input fully representing Xen's
capabilities, and in order to avoid building knowledge of Xen
implementation details into Dom0, this provides a mechanism by which
the Dom0 kernel can, once it filled the _PDC input buffer according to
its own knowledge, present the buffer to Xen to apply overrides for
the parts of the C-, P-, and T-state management that it controls. This
is particularly to address the dependency of Xen using MWAIT to enter
certain C-states on the availability of the break-on-interrupt
extension (which the Dom0 kernel should have no need to know about).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/ia64/linux-xen/acpi.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/ia64/linux-xen/acpi.c
+++ xen-4.1.2-testing/xen/arch/ia64/linux-xen/acpi.c
@@ -243,6 +243,13 @@ int get_cpu_id(u32 acpi_id)
return -1;
}
+
+int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *pdc, u32 mask)
+{
+ pdc[2] |= ACPI_PDC_EST_CAPABILITY_SMP & mask;
+ return 0;
+}
+
#endif
static int __init
Index: xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/acpi/cpu_idle.c
+++ xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
@@ -649,12 +649,6 @@ static int cpuidle_init_cpu(int cpu)
return 0;
}
-#define CPUID_MWAIT_LEAF (5)
-#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
-#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
-
-#define MWAIT_ECX_INTERRUPT_BREAK (0x1)
-
#define MWAIT_SUBSTATE_MASK (0xf)
#define MWAIT_SUBSTATE_SIZE (4)
Index: xen-4.1.2-testing/xen/arch/x86/acpi/boot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/acpi/boot.c
+++ xen-4.1.2-testing/xen/arch/x86/acpi/boot.c
@@ -1006,3 +1006,47 @@ unsigned int acpi_get_processor_id(unsig
return INVALID_ACPIID;
}
+
+static void get_mwait_ecx(void *info)
+{
+ *(u32 *)info = cpuid_ecx(CPUID_MWAIT_LEAF);
+}
+
+int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *pdc, u32 mask)
+{
+ unsigned int cpu = get_cpu_id(acpi_id);
+ struct cpuinfo_x86 *c;
+ u32 ecx;
+
+ if (!(acpi_id + 1))
+ c = &boot_cpu_data;
+ else if (cpu >= NR_CPUS || !cpu_online(cpu))
+ return -EINVAL;
+ else
+ c = cpu_data + cpu;
+
+ pdc[2] |= ACPI_PDC_C_CAPABILITY_SMP & mask;
+
+ if (cpu_has(c, X86_FEATURE_EST))
+ pdc[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP & mask;
+
+ if (cpu_has(c, X86_FEATURE_ACPI))
+ pdc[2] |= ACPI_PDC_T_FFH & mask;
+
+ /*
+ * If mwait/monitor or its break-on-interrupt extension are
+ * unsupported, Cx_FFH will be disabled.
+ */
+ if (!cpu_has(c, X86_FEATURE_MWAIT) ||
+ c->cpuid_level < CPUID_MWAIT_LEAF)
+ ecx = 0;
+ else if (c == &boot_cpu_data || cpu == smp_processor_id())
+ ecx = cpuid_ecx(CPUID_MWAIT_LEAF);
+ else
+ on_selected_cpus(cpumask_of(cpu), get_mwait_ecx, &ecx, 1);
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+ !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+ pdc[2] &= ~(ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH);
+
+ return 0;
+}
Index: xen-4.1.2-testing/xen/arch/x86/platform_hypercall.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/platform_hypercall.c
+++ xen-4.1.2-testing/xen/arch/x86/platform_hypercall.c
@@ -419,6 +419,15 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
ret = -EINVAL;
break;
+ case XEN_PM_PDC:
+ {
+ XEN_GUEST_HANDLE(uint32) pdc;
+
+ guest_from_compat_handle(pdc, op->u.set_pminfo.u.pdc);
+ ret = acpi_set_pdc_bits(op->u.set_pminfo.id, pdc);
+ }
+ break;
+
default:
ret = -EINVAL;
break;
Index: xen-4.1.2-testing/xen/drivers/acpi/pmstat.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/acpi/pmstat.c
+++ xen-4.1.2-testing/xen/drivers/acpi/pmstat.c
@@ -519,3 +519,34 @@ int do_pm_op(struct xen_sysctl_pm_op *op
return ret;
}
+
+int acpi_set_pdc_bits(u32 acpi_id, XEN_GUEST_HANDLE(uint32) pdc)
+{
+ u32 bits[3];
+ int ret;
+
+ if ( copy_from_guest(bits, pdc, 2) )
+ ret = -EFAULT;
+ else if ( bits[0] != ACPI_PDC_REVISION_ID || !bits[1] )
+ ret = -EINVAL;
+ else if ( copy_from_guest_offset(bits + 2, pdc, 2, 1) )
+ ret = -EFAULT;
+ else
+ {
+ u32 mask = 0;
+
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_CX )
+ mask |= ACPI_PDC_C_MASK | ACPI_PDC_SMP_C1PT;
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_PX )
+ mask |= ACPI_PDC_P_MASK | ACPI_PDC_SMP_C1PT;
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_TX )
+ mask |= ACPI_PDC_T_MASK | ACPI_PDC_SMP_C1PT;
+ bits[2] &= (ACPI_PDC_C_MASK | ACPI_PDC_P_MASK | ACPI_PDC_T_MASK |
+ ACPI_PDC_SMP_C1PT) & ~mask;
+ ret = arch_acpi_set_pdc_bits(acpi_id, bits, mask);
+ }
+ if ( !ret )
+ ret = copy_to_guest_offset(pdc, 2, bits + 2, 1);
+
+ return ret;
+}
Index: xen-4.1.2-testing/xen/include/acpi/cpufreq/processor_perf.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/acpi/cpufreq/processor_perf.h
+++ xen-4.1.2-testing/xen/include/acpi/cpufreq/processor_perf.h
@@ -3,10 +3,10 @@
#include <public/platform.h>
#include <public/sysctl.h>
+#include <xen/acpi.h>
#define XEN_PX_INIT 0x80000000
-int get_cpu_id(u32);
int powernow_cpufreq_init(void);
unsigned int powernow_register_driver(void);
unsigned int get_measured_perf(unsigned int cpu, unsigned int flag);
Index: xen-4.1.2-testing/xen/include/acpi/pdc_intel.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/acpi/pdc_intel.h
+++ xen-4.1.2-testing/xen/include/acpi/pdc_intel.h
@@ -4,6 +4,8 @@
#ifndef __PDC_INTEL_H__
#define __PDC_INTEL_H__
+#define ACPI_PDC_REVISION_ID 1
+
#define ACPI_PDC_P_FFH (0x0001)
#define ACPI_PDC_C_C1_HALT (0x0002)
#define ACPI_PDC_T_FFH (0x0004)
@@ -14,6 +16,7 @@
#define ACPI_PDC_SMP_T_SWCOORD (0x0080)
#define ACPI_PDC_C_C1_FFH (0x0100)
#define ACPI_PDC_C_C2C3_FFH (0x0200)
+#define ACPI_PDC_SMP_P_HWCOORD (0x0800)
#define ACPI_PDC_EST_CAPABILITY_SMP (ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT | \
@@ -22,6 +25,7 @@
#define ACPI_PDC_EST_CAPABILITY_SWSMP (ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT | \
ACPI_PDC_SMP_P_SWCOORD | \
+ ACPI_PDC_SMP_P_HWCOORD | \
ACPI_PDC_P_FFH)
#define ACPI_PDC_C_CAPABILITY_SMP (ACPI_PDC_SMP_C2C3 | \
@@ -30,4 +34,17 @@
ACPI_PDC_C_C1_FFH | \
ACPI_PDC_C_C2C3_FFH)
+#define ACPI_PDC_C_MASK (ACPI_PDC_C_C1_HALT | \
+ ACPI_PDC_C_C1_FFH | \
+ ACPI_PDC_SMP_C2C3 | \
+ ACPI_PDC_SMP_C_SWCOORD | \
+ ACPI_PDC_C_C2C3_FFH)
+
+#define ACPI_PDC_P_MASK (ACPI_PDC_P_FFH | \
+ ACPI_PDC_SMP_P_SWCOORD | \
+ ACPI_PDC_SMP_P_HWCOORD)
+
+#define ACPI_PDC_T_MASK (ACPI_PDC_T_FFH | \
+ ACPI_PDC_SMP_T_SWCOORD)
+
#endif /* __PDC_INTEL_H__ */
Index: xen-4.1.2-testing/xen/include/asm-x86/cpufeature.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/cpufeature.h
+++ xen-4.1.2-testing/xen/include/asm-x86/cpufeature.h
@@ -151,6 +151,10 @@
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
#define cpufeat_mask(idx) (1u << ((idx) & 31))
+#define CPUID_MWAIT_LEAF 5
+#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
+#define CPUID5_ECX_INTERRUPT_BREAK 0x2
+
#ifdef __i386__
#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME)
#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
Index: xen-4.1.2-testing/xen/include/public/platform.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/platform.h
+++ xen-4.1.2-testing/xen/include/public/platform.h
@@ -304,6 +304,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_getidletim
#define XEN_PM_CX 0
#define XEN_PM_PX 1
#define XEN_PM_TX 2
+#define XEN_PM_PDC 3
/* Px sub info type */
#define XEN_PX_PCT 1
@@ -401,6 +402,7 @@ struct xenpf_set_processor_pminfo {
union {
struct xen_processor_power power;/* Cx: _CST/_CSD */
struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
+ XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */
} u;
};
typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
Index: xen-4.1.2-testing/xen/include/xen/acpi.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/acpi.h
+++ xen-4.1.2-testing/xen/include/xen/acpi.h
@@ -334,6 +334,8 @@ static inline int acpi_boot_table_init(v
#endif /*!CONFIG_ACPI_BOOT*/
+int get_cpu_id(u32 acpi_id);
+
unsigned int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low);
int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
@@ -431,6 +433,9 @@ static inline unsigned int acpi_get_csta
static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; }
#endif
+int acpi_set_pdc_bits(u32 acpi_id, XEN_GUEST_HANDLE(uint32));
+int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *, u32 mask);
+
#ifdef CONFIG_ACPI_NUMA
int acpi_get_pxm(acpi_handle handle);
#else
++++++ 23795-intel-ich10-quirk.patch ++++++
References: bnc#683580
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1314443678 -3600
# Node ID e17f70940d1f57fe04dde3bf4e243f75c89f0d0e
# Parent 4705eca37c9fac9d13867a856bdcfa8b7bad56c6
x86: work around certain Intel BIOSes causing (transient) hangs during boot
They apparently leave the USB legacy emulation bits set in ICH10's
SMI Control and Enable register, but fail to handle the resulting SMIs
gracefully. The hangs can apparently extend indefinitely, but are
commonly observed to last between a few seconds and a minute.
This assumes that only ICH10-based systems on Intel main boards with
Intel BIOS may be affected. Until Intel comes up with a more precise
identification of affected BIOSes, all Intel ones on Intel boards
will get this workaround applied.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/dmi_scan.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/dmi_scan.c
+++ xen-4.1.2-testing/xen/arch/x86/dmi_scan.c
@@ -10,6 +10,8 @@
#include <asm/system.h>
#include <xen/dmi.h>
#include <xen/efi.h>
+#include <xen/pci.h>
+#include <xen/pci_regs.h>
#define bt_ioremap(b,l) ((void *)__acpi_map_table(b,l))
#define bt_iounmap(b,l) ((void)0)
@@ -278,6 +280,28 @@ static __init int broken_toshiba_keyboar
return 0;
}
+static int __init ich10_bios_quirk(struct dmi_system_id *d)
+{
+ u32 port, smictl;
+
+ if ( pci_conf_read16(0, 0x1f, 0, PCI_VENDOR_ID) != 0x8086 )
+ return 0;
+
+ switch ( pci_conf_read16(0, 0x1f, 0, PCI_DEVICE_ID) ) {
+ case 0x3a14:
+ case 0x3a16:
+ case 0x3a18:
+ case 0x3a1a:
+ port = (pci_conf_read16(0, 0x1f, 0, 0x40) & 0xff80) + 0x30;
+ smictl = inl(port);
+ /* turn off LEGACY_USB{,2}_EN if enabled */
+ if ( smictl & 0x20008 )
+ outl(smictl & ~0x20008, port);
+ break;
+ }
+
+ return 0;
+}
#ifdef CONFIG_ACPI_SLEEP
static __init int reset_videomode_after_s3(struct dmi_blacklist *d)
@@ -363,6 +387,18 @@ static __initdata struct dmi_blacklist d
} },
#endif
+ { ich10_bios_quirk, "Intel board & BIOS",
+ /*
+ * BIOS leaves legacy USB emulation enabled while
+ * SMM can't properly handle it.
+ */
+ {
+ MATCH(DMI_BOARD_VENDOR, "Intel Corp"),
+ MATCH(DMI_BIOS_VENDOR, "Intel Corp"),
+ NO_MATCH, NO_MATCH
+ }
+ },
+
#ifdef CONFIG_ACPI_BOOT
/*
* If your system is blacklisted here, but you find that acpi=force
++++++ 23800-x86_64-guest-addr-range.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1314800089 -3600
# Node ID 72edc40e2942a3cf0ee8e0d3a330d2e5c2bdfb53
# Parent ac9aa65050e9abc8f1c12c8603acf3b99e22cddc
x86-64: Fix off-by-one error in __addr_ok() macro
Signed-off-by: Laszlo Ersek <lersek@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/include/asm-x86/x86_64/uaccess.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/x86_64/uaccess.h
+++ xen-4.1.2-testing/xen/include/asm-x86/x86_64/uaccess.h
@@ -21,7 +21,7 @@ void free_compat_arg_xlat(struct vcpu *v
* non-canonical address (and thus fault) before ever reaching VIRT_START.
*/
#define __addr_ok(addr) \
- (((unsigned long)(addr) < (1UL<<48)) || \
+ (((unsigned long)(addr) < (1UL<<47)) || \
((unsigned long)(addr) >= HYPERVISOR_VIRT_END))
#define access_ok(addr, size) \
++++++ 23804-x86-IPI-counts.patch ++++++
# HG changeset patch
# User Kevin Tian <kevin.tian@xxxxxxxxx>
# Date 1314800303 -3600
# Node ID 42d76c68b2bfbedee3e5f79d32344e14bce48b0f
# Parent 51983821efa4db4040ae1c5063a4404791597699
x86: add irq count for IPIs
such count is useful to assist decision make in cpuidle governor,
while w/o this patch only device interrupts through do_IRQ is
currently counted.
Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx>
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1372,6 +1372,7 @@ fastcall void smp_apic_timer_interrupt(s
struct cpu_user_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
perfc_incr(apic_timer);
+ this_cpu(irq_count)++;
raise_softirq(TIMER_SOFTIRQ);
set_irq_regs(old_regs);
}
@@ -1393,6 +1394,7 @@ fastcall void smp_spurious_interrupt(str
unsigned long v;
struct cpu_user_regs *old_regs = set_irq_regs(regs);
+ this_cpu(irq_count)++;
irq_enter();
/*
@@ -1428,6 +1430,7 @@ fastcall void smp_error_interrupt(struct
unsigned long v, v1;
struct cpu_user_regs *old_regs = set_irq_regs(regs);
+ this_cpu(irq_count)++;
irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
@@ -1459,6 +1462,7 @@ fastcall void smp_pmu_apic_interrupt(str
{
struct cpu_user_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
+ this_cpu(irq_count)++;
hvm_do_pmu_interrupt(regs);
set_irq_regs(old_regs);
}
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -66,6 +66,7 @@ static void (*vendor_thermal_interrupt)(
fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
{
struct cpu_user_regs *old_regs = set_irq_regs(regs);
+ this_cpu(irq_count)++;
irq_enter();
vendor_thermal_interrupt(regs);
irq_exit();
@@ -1094,6 +1095,7 @@ fastcall void smp_cmci_interrupt(struct
struct cpu_user_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
+ this_cpu(irq_count)++;
irq_enter();
mctc = mcheck_mca_logout(
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -529,6 +529,7 @@ fastcall void smp_irq_move_cleanup_inter
struct cpu_user_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
+ this_cpu(irq_count)++;
irq_enter();
me = smp_processor_id();
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -221,6 +221,7 @@ fastcall void smp_invalidate_interrupt(v
{
ack_APIC_irq();
perfc_incr(ipis);
+ this_cpu(irq_count)++;
irq_enter();
if ( !__sync_local_execstate() ||
(flush_flags & (FLUSH_TLB_GLOBAL | FLUSH_CACHE)) )
@@ -385,6 +386,7 @@ fastcall void smp_event_check_interrupt(
struct cpu_user_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
perfc_incr(ipis);
+ this_cpu(irq_count)++;
set_irq_regs(old_regs);
}
@@ -421,6 +423,7 @@ fastcall void smp_call_function_interrup
ack_APIC_irq();
perfc_incr(ipis);
+ this_cpu(irq_count)++;
__smp_call_function_interrupt();
set_irq_regs(old_regs);
}
++++++ 23817-mem_event_add_ref_counting_for_free_requestslots.patch ++++++
changeset: 23817:083f10851dd8
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon Sep 05 15:10:09 2011 +0100
files: xen/arch/x86/mm/mem_event.c xen/arch/x86/mm/mem_sharing.c
xen/arch/x86/mm/p2m.c xen/include/asm-x86/mem_event.h xen/include/xen/sched.h
description:
mem_event: add ref counting for free requestslots
If mem_event_check_ring() is called by many vcpus at the same time
before any of them called also mem_event_put_request(), all of the
callers must assume there are enough free slots available in the ring.
Record the number of request producers in mem_event_check_ring() to
keep track of available free slots.
Add a new mem_event_put_req_producers() function to release a request
attempt made in mem_event_check_ring(). Its required for
p2m_mem_paging_populate() because that function can only modify the
p2m type if there are free request slots. But in some cases
p2m_mem_paging_populate() does not actually have to produce another
request when it is known that the same request was already made
earlier by a different vcpu.
mem_event_check_ring() can not return a reference to a free request
slot because there could be multiple references for different vcpus
and the order of mem_event_put_request() calls is not known. As a
result, incomplete requests could be consumed by the ring user.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/arch/x86/mm/mem_event.c | 19 ++++++++++++-------
xen/arch/x86/mm/mem_sharing.c | 1 -
xen/arch/x86/mm/p2m.c | 1 +
xen/include/asm-x86/mem_event.h | 1 +
xen/include/xen/sched.h | 1 +
5 files changed, 15 insertions(+), 8 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -37,8 +37,6 @@
#define mem_event_ring_lock(_d) spin_lock(&(_d)->mem_event.ring_lock)
#define mem_event_ring_unlock(_d) spin_unlock(&(_d)->mem_event.ring_lock)
-#define MEM_EVENT_RING_THRESHOLD 4
-
static int mem_event_enable(struct domain *d, mfn_t ring_mfn, mfn_t shared_mfn)
{
int rc;
@@ -109,6 +107,7 @@ void mem_event_put_request(struct domain
req_prod++;
/* Update ring */
+ d->mem_event.req_producers--;
front_ring->req_prod_pvt = req_prod;
RING_PUSH_REQUESTS(front_ring);
@@ -153,11 +152,18 @@ void mem_event_mark_and_pause(struct vcp
vcpu_sleep_nosync(v);
}
+void mem_event_put_req_producers(struct domain *d)
+{
+ mem_event_ring_lock(d);
+ d->mem_event.req_producers--;
+ mem_event_ring_unlock(d);
+}
+
int mem_event_check_ring(struct domain *d)
{
struct vcpu *curr = current;
int free_requests;
- int ring_full;
+ int ring_full = 1;
if ( !d->mem_event.ring_page )
return -1;
@@ -165,12 +171,11 @@ int mem_event_check_ring(struct domain *
mem_event_ring_lock(d);
free_requests = RING_FREE_REQUESTS(&d->mem_event.front_ring);
- if ( unlikely(free_requests < 2) )
+ if ( d->mem_event.req_producers < free_requests )
{
- gdprintk(XENLOG_INFO, "free request slots: %d\n", free_requests);
- WARN_ON(free_requests == 0);
+ d->mem_event.req_producers++;
+ ring_full = 0;
}
- ring_full = free_requests < MEM_EVENT_RING_THRESHOLD ? 1 : 0;
if ( (curr->domain->domain_id == d->domain_id) && ring_full )
{
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_sharing.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
@@ -322,7 +322,6 @@ static struct page_info* mem_sharing_all
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
- /* XXX: Need to reserve a request, not just check the ring! */
if(mem_event_check_ring(d)) return page;
req.gfn = gfn;
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2970,6 +2970,7 @@ void p2m_mem_paging_populate(struct p2m_
else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
{
/* gfn is already on its way back and vcpu is not paused */
+ mem_event_put_req_producers(d);
return;
}
Index: xen-4.1.2-testing/xen/include/asm-x86/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/mem_event.h
+++ xen-4.1.2-testing/xen/include/asm-x86/mem_event.h
@@ -27,6 +27,7 @@
/* Pauses VCPU while marking pause flag for mem event */
void mem_event_mark_and_pause(struct vcpu *v);
int mem_event_check_ring(struct domain *d);
+void mem_event_put_req_producers(struct domain *d);
void mem_event_put_request(struct domain *d, mem_event_request_t *req);
void mem_event_get_response(struct domain *d, mem_event_response_t *rsp);
void mem_event_unpause_vcpus(struct domain *d);
Index: xen-4.1.2-testing/xen/include/xen/sched.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/sched.h
+++ xen-4.1.2-testing/xen/include/xen/sched.h
@@ -190,6 +190,7 @@ struct mem_event_domain
{
/* ring lock */
spinlock_t ring_lock;
+ unsigned int req_producers;
/* shared page */
mem_event_shared_page_t *shared_page;
/* shared ring page */
++++++
23818-mem_event_use_mem_event_mark_and_pause_in_mem_event_check_ring.patch
++++++
changeset: 23818:0268e7380953
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon Sep 05 15:10:28 2011 +0100
files: xen/arch/x86/mm/mem_event.c
description:
mem_event: use mem_event_mark_and_pause() in mem_event_check_ring()
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/arch/x86/mm/mem_event.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -178,10 +178,7 @@ int mem_event_check_ring(struct domain *
}
if ( (curr->domain->domain_id == d->domain_id) && ring_full )
- {
- set_bit(_VPF_mem_event, &curr->pause_flags);
- vcpu_sleep_nosync(curr);
- }
+ mem_event_mark_and_pause(curr);
mem_event_ring_unlock(d);
++++++ 23819-make-docs.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1315320580 -3600
# Node ID 5fe770c8a8a35c58893816ee6335a90ed43f3bbd
# Parent 0268e73809532a4a3ca18a075efcee3c62caf458
docs: Fix 'make docs'
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/docs/src/user.tex
===================================================================
--- xen-4.1.2-testing.orig/docs/src/user.tex
+++ xen-4.1.2-testing/docs/src/user.tex
@@ -4197,7 +4197,7 @@ writing to the VGA console after domain
\item [ vcpu\_migration\_delay=$<$minimum\_time$>$] Set minimum time of
vcpu migration in microseconds (default 0). This parameter avoids agressive
vcpu migration. For example, the linux kernel uses 0.5ms by default.
-\item [ irq_vector_map=xxx ] Enable irq vector non-sharing maps. Setting
'global'
+\item [ irq\_vector\_map=xxx ] Enable irq vector non-sharing maps. Setting
'global'
will ensure that no IRQs will share vectors. Setting 'per-device' will
ensure
that no IRQs from the same device will share vectors. Setting to 'none' will
disable it entirely, overriding any defaults the IOMMU code may set.
++++++ 23827-xenpaging_use_batch_of_pages_during_final_page-in.patch ++++++
changeset: 23827:d1d6abc1db20
user: Olaf Hering <olaf@xxxxxxxxx>
date: Tue Sep 13 10:25:32 2011 +0100
files: tools/xenpaging/pagein.c tools/xenpaging/xenpaging.c
tools/xenpaging/xenpaging.h
description:
xenpaging: use batch of pages during final page-in
Map up to RING_SIZE pages in exit path to fill the ring instead of
populating one page at a time.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
tools/xenpaging/pagein.c | 36 ++++++++++++++++++++++++------------
tools/xenpaging/xenpaging.c | 18 +++++++++++++-----
tools/xenpaging/xenpaging.h | 7 +++++--
3 files changed, 42 insertions(+), 19 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/pagein.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/pagein.c
+++ xen-4.1.2-testing/tools/xenpaging/pagein.c
@@ -1,14 +1,16 @@
/* Trigger a page-in in a separate thread-of-execution to avoid deadlock */
#include <pthread.h>
-#include "xc_private.h"
+#include <xc_private.h>
+#include "xenpaging.h"
struct page_in_args {
domid_t dom;
+ unsigned long *pagein_queue;
xc_interface *xch;
};
static struct page_in_args page_in_args;
-static unsigned long page_in_gfn;
+static unsigned long page_in_request;
static unsigned int page_in_possible;
static pthread_t page_in_thread;
@@ -19,19 +21,28 @@ static void *page_in(void *arg)
{
struct page_in_args *pia = arg;
void *page;
- xen_pfn_t gfn;
+ int i, num;
+ xen_pfn_t gfns[XENPAGING_PAGEIN_QUEUE_SIZE];
while (1)
{
pthread_mutex_lock(&page_in_mutex);
- while (!page_in_gfn)
+ while (!page_in_request)
pthread_cond_wait(&page_in_cond, &page_in_mutex);
- gfn = page_in_gfn;
- page_in_gfn = 0;
+ num = 0;
+ for (i = 0; i < XENPAGING_PAGEIN_QUEUE_SIZE; i++)
+ {
+ if (!pia->pagein_queue[i])
+ continue;
+ gfns[num] = pia->pagein_queue[i];
+ pia->pagein_queue[i] = 0;
+ num++;
+ }
+ page_in_request = 0;
pthread_mutex_unlock(&page_in_mutex);
/* Ignore errors */
- page = xc_map_foreign_pages(pia->xch, pia->dom, PROT_READ, &gfn, 1);
+ page = xc_map_foreign_pages(pia->xch, pia->dom, PROT_READ, gfns, num);
if (page)
munmap(page, PAGE_SIZE);
}
@@ -39,21 +50,22 @@ static void *page_in(void *arg)
pthread_exit(NULL);
}
-void page_in_trigger(unsigned long gfn)
+void page_in_trigger(void)
{
if (!page_in_possible)
return;
pthread_mutex_lock(&page_in_mutex);
- page_in_gfn = gfn;
+ page_in_request = 1;
pthread_mutex_unlock(&page_in_mutex);
pthread_cond_signal(&page_in_cond);
}
-void create_page_in_thread(domid_t domain_id, xc_interface *xch)
+void create_page_in_thread(xenpaging_t *paging)
{
- page_in_args.dom = domain_id;
- page_in_args.xch = xch;
+ page_in_args.dom = paging->mem_event.domain_id;
+ page_in_args.pagein_queue = paging->pagein_queue;
+ page_in_args.xch = paging->xc_handle;
if (pthread_create(&page_in_thread, NULL, page_in, &page_in_args) == 0)
page_in_possible = 1;
}
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -648,7 +648,7 @@ int main(int argc, char *argv[])
sigaction(SIGALRM, &act, NULL);
/* listen for page-in events to stop pager */
- create_page_in_thread(paging->mem_event.domain_id, xch);
+ create_page_in_thread(paging);
/* Evict pages */
for ( i = 0; i < paging->num_pages; i++ )
@@ -764,16 +764,24 @@ int main(int argc, char *argv[])
/* Write all pages back into the guest */
if ( interrupted == SIGTERM || interrupted == SIGINT )
{
+ int num = 0;
for ( i = 0; i < paging->domain_info->max_pages; i++ )
{
if ( test_bit(i, paging->bitmap) )
{
- page_in_trigger(i);
- break;
+ paging->pagein_queue[num] = i;
+ num++;
+ if ( num == XENPAGING_PAGEIN_QUEUE_SIZE )
+ break;
}
}
- /* If no more pages to process, exit loop */
- if ( i == paging->domain_info->max_pages )
+ /*
+ * One more round if there are still pages to process.
+ * If no more pages to process, exit loop.
+ */
+ if ( num )
+ page_in_trigger();
+ else if ( i == paging->domain_info->max_pages )
break;
}
else
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -29,6 +29,8 @@
#include <xen/event_channel.h>
#include <xen/mem_event.h>
+#define XENPAGING_PAGEIN_QUEUE_SIZE 64
+
typedef struct mem_event {
domid_t domain_id;
xc_evtchn *xce_handle;
@@ -49,6 +51,7 @@ typedef struct xenpaging {
mem_event_t mem_event;
int num_pages;
int policy_mru_size;
+ unsigned long pagein_queue[XENPAGING_PAGEIN_QUEUE_SIZE];
} xenpaging_t;
@@ -58,8 +61,8 @@ typedef struct xenpaging_victim {
} xenpaging_victim_t;
-extern void create_page_in_thread(domid_t domain_id, xc_interface *xch);
-extern void page_in_trigger(unsigned long gfn);
+extern void create_page_in_thread(xenpaging_t *paging);
+extern void page_in_trigger(void);
#endif // __XEN_PAGING_H__
++++++
23841-mem_event_pass_mem_event_domain_pointer_to_mem_event_functions.patch
++++++
changeset: 23841:ed7586b1d515
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Sep 16 12:13:31 2011 +0100
files: xen/arch/x86/hvm/hvm.c xen/arch/x86/mm/mem_event.c
xen/arch/x86/mm/mem_sharing.c xen/arch/x86/mm/p2m.c
xen/include/asm-x86/mem_event.h
description:
mem_event: pass mem_event_domain pointer to mem_event functions
Pass a struct mem_event_domain pointer to the various mem_event
functions. This will be used in a subsequent patch which creates
different ring buffers for the memshare, xenpaging and memaccess
functionality.
Remove the struct domain argument from some functions.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/hvm/hvm.c | 4 -
xen/arch/x86/mm/mem_event.c | 95 ++++++++++++++++++++--------------------
xen/arch/x86/mm/mem_sharing.c | 6 +-
xen/arch/x86/mm/p2m.c | 18 +++----
xen/include/asm-x86/mem_event.h | 8 +--
5 files changed, 66 insertions(+), 65 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
@@ -3909,7 +3909,7 @@ static int hvm_memory_event_traps(long p
if ( (p & HVMPME_onchangeonly) && (value == old) )
return 1;
- rc = mem_event_check_ring(d);
+ rc = mem_event_check_ring(d, &d->mem_event);
if ( rc )
return rc;
@@ -3932,7 +3932,7 @@ static int hvm_memory_event_traps(long p
req.gla_valid = 1;
}
- mem_event_put_request(d, &req);
+ mem_event_put_request(d, &d->mem_event, &req);
return 1;
}
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -33,21 +33,21 @@
#define xen_rmb() rmb()
#define xen_wmb() wmb()
-#define mem_event_ring_lock_init(_d)
spin_lock_init(&(_d)->mem_event.ring_lock)
-#define mem_event_ring_lock(_d) spin_lock(&(_d)->mem_event.ring_lock)
-#define mem_event_ring_unlock(_d) spin_unlock(&(_d)->mem_event.ring_lock)
+#define mem_event_ring_lock_init(_med) spin_lock_init(&(_med)->ring_lock)
+#define mem_event_ring_lock(_med) spin_lock(&(_med)->ring_lock)
+#define mem_event_ring_unlock(_med) spin_unlock(&(_med)->ring_lock)
-static int mem_event_enable(struct domain *d, mfn_t ring_mfn, mfn_t shared_mfn)
+static int mem_event_enable(struct domain *d, struct mem_event_domain *med,
mfn_t ring_mfn, mfn_t shared_mfn)
{
int rc;
/* Map ring and shared pages */
- d->mem_event.ring_page = map_domain_page(mfn_x(ring_mfn));
- if ( d->mem_event.ring_page == NULL )
+ med->ring_page = map_domain_page(mfn_x(ring_mfn));
+ if ( med->ring_page == NULL )
goto err;
- d->mem_event.shared_page = map_domain_page(mfn_x(shared_mfn));
- if ( d->mem_event.shared_page == NULL )
+ med->shared_page = map_domain_page(mfn_x(shared_mfn));
+ if ( med->shared_page == NULL )
goto err_ring;
/* Allocate event channel */
@@ -56,15 +56,15 @@ static int mem_event_enable(struct domai
if ( rc < 0 )
goto err_shared;
- ((mem_event_shared_page_t *)d->mem_event.shared_page)->port = rc;
- d->mem_event.xen_port = rc;
+ ((mem_event_shared_page_t *)med->shared_page)->port = rc;
+ med->xen_port = rc;
/* Prepare ring buffer */
- FRONT_RING_INIT(&d->mem_event.front_ring,
- (mem_event_sring_t *)d->mem_event.ring_page,
+ FRONT_RING_INIT(&med->front_ring,
+ (mem_event_sring_t *)med->ring_page,
PAGE_SIZE);
- mem_event_ring_lock_init(d);
+ mem_event_ring_lock_init(med);
/* Wake any VCPUs paused for memory events */
mem_event_unpause_vcpus(d);
@@ -72,34 +72,34 @@ static int mem_event_enable(struct domai
return 0;
err_shared:
- unmap_domain_page(d->mem_event.shared_page);
- d->mem_event.shared_page = NULL;
+ unmap_domain_page(med->shared_page);
+ med->shared_page = NULL;
err_ring:
- unmap_domain_page(d->mem_event.ring_page);
- d->mem_event.ring_page = NULL;
+ unmap_domain_page(med->ring_page);
+ med->ring_page = NULL;
err:
return 1;
}
-static int mem_event_disable(struct domain *d)
+static int mem_event_disable(struct mem_event_domain *med)
{
- unmap_domain_page(d->mem_event.ring_page);
- d->mem_event.ring_page = NULL;
+ unmap_domain_page(med->ring_page);
+ med->ring_page = NULL;
- unmap_domain_page(d->mem_event.shared_page);
- d->mem_event.shared_page = NULL;
+ unmap_domain_page(med->shared_page);
+ med->shared_page = NULL;
return 0;
}
-void mem_event_put_request(struct domain *d, mem_event_request_t *req)
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req)
{
mem_event_front_ring_t *front_ring;
RING_IDX req_prod;
- mem_event_ring_lock(d);
+ mem_event_ring_lock(med);
- front_ring = &d->mem_event.front_ring;
+ front_ring = &med->front_ring;
req_prod = front_ring->req_prod_pvt;
/* Copy request */
@@ -107,23 +107,23 @@ void mem_event_put_request(struct domain
req_prod++;
/* Update ring */
- d->mem_event.req_producers--;
+ med->req_producers--;
front_ring->req_prod_pvt = req_prod;
RING_PUSH_REQUESTS(front_ring);
- mem_event_ring_unlock(d);
+ mem_event_ring_unlock(med);
- notify_via_xen_event_channel(d, d->mem_event.xen_port);
+ notify_via_xen_event_channel(d, med->xen_port);
}
-void mem_event_get_response(struct domain *d, mem_event_response_t *rsp)
+void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp)
{
mem_event_front_ring_t *front_ring;
RING_IDX rsp_cons;
- mem_event_ring_lock(d);
+ mem_event_ring_lock(med);
- front_ring = &d->mem_event.front_ring;
+ front_ring = &med->front_ring;
rsp_cons = front_ring->rsp_cons;
/* Copy response */
@@ -134,7 +134,7 @@ void mem_event_get_response(struct domai
front_ring->rsp_cons = rsp_cons;
front_ring->sring->rsp_event = rsp_cons + 1;
- mem_event_ring_unlock(d);
+ mem_event_ring_unlock(med);
}
void mem_event_unpause_vcpus(struct domain *d)
@@ -152,35 +152,35 @@ void mem_event_mark_and_pause(struct vcp
vcpu_sleep_nosync(v);
}
-void mem_event_put_req_producers(struct domain *d)
+void mem_event_put_req_producers(struct mem_event_domain *med)
{
- mem_event_ring_lock(d);
- d->mem_event.req_producers--;
- mem_event_ring_unlock(d);
+ mem_event_ring_lock(med);
+ med->req_producers--;
+ mem_event_ring_unlock(med);
}
-int mem_event_check_ring(struct domain *d)
+int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
{
struct vcpu *curr = current;
int free_requests;
int ring_full = 1;
- if ( !d->mem_event.ring_page )
+ if ( !med->ring_page )
return -1;
- mem_event_ring_lock(d);
+ mem_event_ring_lock(med);
- free_requests = RING_FREE_REQUESTS(&d->mem_event.front_ring);
- if ( d->mem_event.req_producers < free_requests )
+ free_requests = RING_FREE_REQUESTS(&med->front_ring);
+ if ( med->req_producers < free_requests )
{
- d->mem_event.req_producers++;
+ med->req_producers++;
ring_full = 0;
}
- if ( (curr->domain->domain_id == d->domain_id) && ring_full )
+ if ( ring_full && (curr->domain == d) )
mem_event_mark_and_pause(curr);
- mem_event_ring_unlock(d);
+ mem_event_ring_unlock(med);
return ring_full;
}
@@ -230,6 +230,7 @@ int mem_event_domctl(struct domain *d, x
{
struct domain *dom_mem_event = current->domain;
struct vcpu *v = current;
+ struct mem_event_domain *med = &d->mem_event;
unsigned long ring_addr = mec->ring_addr;
unsigned long shared_addr = mec->shared_addr;
l1_pgentry_t l1e;
@@ -242,7 +243,7 @@ int mem_event_domctl(struct domain *d, x
* the cache is in an undefined state and so is the guest
*/
rc = -EBUSY;
- if ( d->mem_event.ring_page )
+ if ( med->ring_page )
break;
/* Currently only EPT is supported */
@@ -270,7 +271,7 @@ int mem_event_domctl(struct domain *d, x
break;
rc = -EINVAL;
- if ( mem_event_enable(d, ring_mfn, shared_mfn) != 0 )
+ if ( mem_event_enable(d, med, ring_mfn, shared_mfn) != 0 )
break;
rc = 0;
@@ -279,7 +280,7 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_DISABLE:
{
- rc = mem_event_disable(d);
+ rc = mem_event_disable(&d->mem_event);
}
break;
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_sharing.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
@@ -322,12 +322,12 @@ static struct page_info* mem_sharing_all
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
- if(mem_event_check_ring(d)) return page;
+ if(mem_event_check_ring(d, &d->mem_event)) return page;
req.gfn = gfn;
req.p2mt = p2m_ram_shared;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &req);
+ mem_event_put_request(d, &d->mem_event, &req);
return page;
}
@@ -342,7 +342,7 @@ int mem_sharing_sharing_resume(struct do
mem_event_response_t rsp;
/* Get request off the ring */
- mem_event_get_response(d, &rsp);
+ mem_event_get_response(&d->mem_event, &rsp);
/* Unpause domain/vcpu */
if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2923,7 +2923,7 @@ void p2m_mem_paging_drop_page(struct p2m
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d) == 0)
+ if ( mem_event_check_ring(d, &d->mem_event) == 0)
{
/* Send release notification to pager */
memset(&req, 0, sizeof(req));
@@ -2931,7 +2931,7 @@ void p2m_mem_paging_drop_page(struct p2m
req.gfn = gfn;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &req);
+ mem_event_put_request(d, &d->mem_event, &req);
}
}
@@ -2943,7 +2943,7 @@ void p2m_mem_paging_populate(struct p2m_
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d) )
+ if ( mem_event_check_ring(d, &d->mem_event) )
return;
memset(&req, 0, sizeof(req));
@@ -2970,7 +2970,7 @@ void p2m_mem_paging_populate(struct p2m_
else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
{
/* gfn is already on its way back and vcpu is not paused */
- mem_event_put_req_producers(d);
+ mem_event_put_req_producers(&d->mem_event);
return;
}
@@ -2979,7 +2979,7 @@ void p2m_mem_paging_populate(struct p2m_
req.p2mt = p2mt;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &req);
+ mem_event_put_request(d, &d->mem_event, &req);
}
int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn)
@@ -3008,7 +3008,7 @@ void p2m_mem_paging_resume(struct p2m_do
mfn_t mfn;
/* Pull the response off the ring */
- mem_event_get_response(d, &rsp);
+ mem_event_get_response(&d->mem_event, &rsp);
/* Fix p2m entry if the page was not dropped */
if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
@@ -3055,7 +3055,7 @@ void p2m_mem_access_check(unsigned long
p2m_unlock(p2m);
/* Otherwise, check if there is a memory event listener, and send the
message along */
- res = mem_event_check_ring(d);
+ res = mem_event_check_ring(d, &d->mem_event);
if ( res < 0 )
{
/* No listener */
@@ -3099,7 +3099,7 @@ void p2m_mem_access_check(unsigned long
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &req);
+ mem_event_put_request(d, &d->mem_event, &req);
/* VCPU paused, mem event request sent */
}
@@ -3109,7 +3109,7 @@ void p2m_mem_access_resume(struct p2m_do
struct domain *d = p2m->domain;
mem_event_response_t rsp;
- mem_event_get_response(d, &rsp);
+ mem_event_get_response(&d->mem_event, &rsp);
/* Unpause domain */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
Index: xen-4.1.2-testing/xen/include/asm-x86/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/mem_event.h
+++ xen-4.1.2-testing/xen/include/asm-x86/mem_event.h
@@ -26,10 +26,10 @@
/* Pauses VCPU while marking pause flag for mem event */
void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d);
-void mem_event_put_req_producers(struct domain *d);
-void mem_event_put_request(struct domain *d, mem_event_request_t *req);
-void mem_event_get_response(struct domain *d, mem_event_response_t *rsp);
+int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
+void mem_event_put_req_producers(struct mem_event_domain *med);
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req);
+void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp);
void mem_event_unpause_vcpus(struct domain *d);
int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
++++++
23842-mem_event_use_different_ringbuffers_for_share_paging_and_access.patch
++++++
++++ 868 lines (skipped)
++++++ 23853-x86-pv-cpuid-xsave.patch ++++++
# HG changeset patch
# User Shan Haitao <haitao.shan@xxxxxxxxx>
# Date 1316300518 -3600
# Node ID b78235de5c6407023759f9bbf723dd83887fedf0
# Parent c944e82bb092925f31403a129087e9d40e0fa06a
Fix PV CPUID virtualization of XSave
The patch will fix XSave CPUID virtualization for PV guests. The XSave
area size returned by CPUID leaf D is changed dynamically depending on
the XCR0. Tools/libxc only assigns a static value. The fix will adjust
xsave area size during runtime.
Note: This fix is already in HVM cpuid virtualization. And Dom0 is not
affected, either.
Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2310,7 +2310,7 @@ void hvm_cpuid(unsigned int input, unsig
{
/* reset EBX to default value first */
*ebx = XSAVE_AREA_MIN_SIZE;
- for ( sub_leaf = 2; sub_leaf < 64; sub_leaf++ )
+ for ( sub_leaf = 2; sub_leaf < 63; sub_leaf++ )
{
if ( !(v->arch.xcr0 & (1ULL << sub_leaf)) )
continue;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -768,6 +768,30 @@ static void pv_cpuid(struct cpu_user_reg
{
if ( !cpuid_hypervisor_leaves(a, c, &a, &b, &c, &d) )
domain_cpuid(current->domain, a, c, &a, &b, &c, &d);
+
+ switch ( a )
+ {
+ case 0xd:
+ {
+ unsigned int sub_leaf, _eax, _ebx, _ecx, _edx;
+ /* EBX value of main leaf 0 depends on enabled xsave features */
+ if ( c == 0 && current->arch.xcr0 )
+ {
+ /* reset EBX to default value first */
+ b = XSAVE_AREA_MIN_SIZE;
+ for ( sub_leaf = 2; sub_leaf < 63; sub_leaf++ )
+ {
+ if ( !(current->arch.xcr0 & (1ULL << sub_leaf)) )
+ continue;
+ domain_cpuid(current->domain, a, c, &_eax, &_ebx, &_ecx,
+ &_edx);
+ if ( (_eax + _ebx) > b )
+ b = _eax + _ebx;
+ }
+ }
+ break;
+ }
+ }
goto out;
}
++++++ 23874-xenpaging_track_number_of_paged_pages_in_struct_domain.patch ++++++
changeset: 23874:651aed73b39c
user: Olaf Hering <olafiaepfle.de>
date: Mon Sep 26 22:19:42 2011 +0100
files: tools/libxc/xc_domain.c tools/libxc/xenctrl.h
xen/arch/x86/mm/p2m.c xen/common/domctl.c xen/include/public/domctl.h
xen/include/xen/sched.h
description:
xenpaging: track number of paged pages in struct domain
The toolstack should know how many pages are paged-out at a given point
in time so it could make smarter decisions about how many pages should
be paged or ballooned.
Add a new member to xen_domctl_getdomaininfo and bump interface version.
Use the new member in xc_dominfo_t.
The SONAME of libxc should be changed if this patch gets applied.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/libxc/xc_domain.c | 1 +
tools/libxc/xenctrl.h | 1 +
xen/arch/x86/mm/p2m.c | 5 +++++
xen/common/domctl.c | 1 +
xen/include/public/domctl.h | 3 ++-
xen/include/xen/sched.h | 1 +
6 files changed, 11 insertions(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/libxc/xc_domain.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_domain.c
+++ xen-4.1.2-testing/tools/libxc/xc_domain.c
@@ -235,6 +235,7 @@ int xc_domain_getinfo(xc_interface *xch,
info->ssidref = domctl.u.getdomaininfo.ssidref;
info->nr_pages = domctl.u.getdomaininfo.tot_pages;
info->nr_shared_pages = domctl.u.getdomaininfo.shr_pages;
+ info->nr_paged_pages = domctl.u.getdomaininfo.paged_pages;
info->max_memkb = domctl.u.getdomaininfo.max_pages << (PAGE_SHIFT-10);
info->shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
info->cpu_time = domctl.u.getdomaininfo.cpu_time;
Index: xen-4.1.2-testing/tools/libxc/xenctrl.h
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xenctrl.h
+++ xen-4.1.2-testing/tools/libxc/xenctrl.h
@@ -353,6 +353,7 @@ typedef struct xc_dominfo {
unsigned int shutdown_reason; /* only meaningful if shutdown==1 */
unsigned long nr_pages; /* current number, not maximum */
unsigned long nr_shared_pages;
+ unsigned long nr_paged_pages;
unsigned long shared_info_frame;
uint64_t cpu_time;
unsigned long max_memkb;
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2913,6 +2913,9 @@ int p2m_mem_paging_evict(struct p2m_doma
/* Put the page back so it gets freed */
put_page(page);
+ /* Track number of paged gfns */
+ atomic_inc(&p2m->domain->paged_pages);
+
return 0;
}
@@ -2997,6 +3000,8 @@ int p2m_mem_paging_prep(struct p2m_domai
audit_p2m(p2m, 1);
p2m_unlock(p2m);
+ atomic_dec(&p2m->domain->paged_pages);
+
return 0;
}
Index: xen-4.1.2-testing/xen/common/domctl.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/domctl.c
+++ xen-4.1.2-testing/xen/common/domctl.c
@@ -139,6 +139,7 @@ void getdomaininfo(struct domain *d, str
info->tot_pages = d->tot_pages;
info->max_pages = d->max_pages;
info->shr_pages = atomic_read(&d->shr_pages);
+ info->paged_pages = atomic_read(&d->paged_pages);
info->shared_info_frame = mfn_to_gmfn(d, __pa(d->shared_info)>>PAGE_SHIFT);
BUG_ON(SHARED_M2P(info->shared_info_frame));
Index: xen-4.1.2-testing/xen/include/public/domctl.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/domctl.h
+++ xen-4.1.2-testing/xen/include/public/domctl.h
@@ -35,7 +35,7 @@
#include "xen.h"
#include "grant_table.h"
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000007
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000008
/*
* NB. xen_domctl.domain is an IN/OUT parameter for this operation.
@@ -95,6 +95,7 @@ struct xen_domctl_getdomaininfo {
uint64_aligned_t tot_pages;
uint64_aligned_t max_pages;
uint64_aligned_t shr_pages;
+ uint64_aligned_t paged_pages;
uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */
uint64_aligned_t cpu_time;
uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */
Index: xen-4.1.2-testing/xen/include/xen/sched.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/sched.h
+++ xen-4.1.2-testing/xen/include/xen/sched.h
@@ -215,6 +215,7 @@ struct domain
unsigned int tot_pages; /* number of pages currently possesed */
unsigned int max_pages; /* maximum value for tot_pages */
atomic_t shr_pages; /* number of shared pages */
+ atomic_t paged_pages; /* number of paged-out pages */
unsigned int xenheap_pages; /* # pages allocated from Xen heap */
unsigned int max_vcpus;
++++++ 23897-x86-mce-offline-again.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1317413803 -3600
# Node ID 2215d7d7382617adbe97831fe35752a027917d1d
# Parent d568e2313fd6f055b66a6c3cb2bca6372b77692e
X86 MCE: Prevent malicious guest access broken page again
To avoid recursive mce.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/mce_intel.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -639,6 +639,8 @@ static void intel_memerr_dhandler(int bn
/* This is free page */
if (status & PG_OFFLINE_OFFLINED)
result->result = MCA_RECOVERED;
+ else if (status & PG_OFFLINE_AGAIN)
+ result->result = MCA_NO_ACTION;
else if (status & PG_OFFLINE_PENDING) {
/* This page has owner */
if (status & PG_OFFLINE_OWNED) {
Index: xen-4.1.2-testing/xen/common/page_alloc.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/page_alloc.c
+++ xen-4.1.2-testing/xen/common/page_alloc.c
@@ -38,6 +38,7 @@
#include <xen/tmem.h>
#include <xen/tmem_xen.h>
#include <public/sysctl.h>
+#include <public/sched.h>
#include <asm/page.h>
#include <asm/numa.h>
#include <asm/flushtlb.h>
@@ -708,6 +709,19 @@ int offline_page(unsigned long mfn, int
return -EINVAL;
}
+ /*
+ * NB. When broken page belong to guest, usually hypervisor will
+ * notify the guest to handle the broken page. However, hypervisor
+ * need to prevent malicious guest access the broken page again.
+ * Under such case, hypervisor shutdown guest, preventing recursive mce.
+ */
+ if ( (pg->count_info & PGC_broken) && (owner = page_get_owner(pg)) )
+ {
+ *status = PG_OFFLINE_AGAIN;
+ domain_shutdown(owner, SHUTDOWN_crash);
+ return 0;
+ }
+
spin_lock(&heap_lock);
old_info = mark_page_offline(pg, broken);
Index: xen-4.1.2-testing/xen/include/public/sysctl.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/sysctl.h
+++ xen-4.1.2-testing/xen/include/public/sysctl.h
@@ -399,6 +399,7 @@ struct xen_sysctl_page_offline_op {
#define PG_OFFLINE_OFFLINED (0x1UL << 1)
#define PG_OFFLINE_PENDING (0x1UL << 2)
#define PG_OFFLINE_FAILED (0x1UL << 3)
+#define PG_OFFLINE_AGAIN (0x1UL << 4)
#define PG_ONLINE_FAILED PG_OFFLINE_FAILED
#define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED
++++++ 23900-xzalloc.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1317730526 -7200
# Node ID e09ebf7a31f55bb26c3cce7695a435ed20adf05b
# Parent a99d75671a911f9c0d5d11e0fe88a0a65863cb44
introduce xzalloc() & Co
Rather than having to match a call to one of the xmalloc() flavors with
a subsequent memset(), introduce a zeroing variant of each of those
flavors.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/common/xmalloc_tlsf.c
+++ b/xen/common/xmalloc_tlsf.c
@@ -585,6 +585,13 @@ void *_xmalloc(unsigned long size, unsig
return p;
}
+void *_xzalloc(unsigned long size, unsigned long align)
+{
+ void *p = _xmalloc(size, align);
+
+ return p ? memset(p, 0, size) : p;
+}
+
void xfree(void *p)
{
struct bhdr *b;
--- a/xen/include/acpi/platform/aclinux.h
+++ b/xen/include/acpi/platform/aclinux.h
@@ -77,10 +77,7 @@
#define acpi_thread_id struct vcpu *
#define ACPI_ALLOCATE(a) xmalloc_bytes(a)
-#define ACPI_ALLOCATE_ZEROED(a) ({ \
- void *p = xmalloc_bytes(a); \
- if ( p ) memset(p, 0, a); \
- p; })
+#define ACPI_ALLOCATE_ZEROED(a) xzalloc_bytes(a)
#define ACPI_FREE(a) xfree(a)
#endif /* __ACLINUX_H__ */
--- a/xen/include/xen/xmalloc.h
+++ b/xen/include/xen/xmalloc.h
@@ -8,19 +8,25 @@
/* Allocate space for typed object. */
#define xmalloc(_type) ((_type *)_xmalloc(sizeof(_type), __alignof__(_type)))
+#define xzalloc(_type) ((_type *)_xzalloc(sizeof(_type), __alignof__(_type)))
/* Allocate space for array of typed objects. */
#define xmalloc_array(_type, _num) \
((_type *)_xmalloc_array(sizeof(_type), __alignof__(_type), _num))
+#define xzalloc_array(_type, _num) \
+ ((_type *)_xzalloc_array(sizeof(_type), __alignof__(_type), _num))
/* Allocate untyped storage. */
-#define xmalloc_bytes(_bytes) (_xmalloc(_bytes, SMP_CACHE_BYTES))
+#define xmalloc_bytes(_bytes) _xmalloc(_bytes, SMP_CACHE_BYTES)
+#define xzalloc_bytes(_bytes) _xzalloc(_bytes, SMP_CACHE_BYTES)
/* Free any of the above. */
extern void xfree(void *);
/* Underlying functions */
extern void *_xmalloc(unsigned long size, unsigned long align);
+extern void *_xzalloc(unsigned long size, unsigned long align);
+
static inline void *_xmalloc_array(
unsigned long size, unsigned long align, unsigned long num)
{
@@ -30,6 +36,15 @@ static inline void *_xmalloc_array(
return _xmalloc(size * num, align);
}
+static inline void *_xzalloc_array(
+ unsigned long size, unsigned long align, unsigned long num)
+{
+ /* Check for overflow. */
+ if (size && num > UINT_MAX / size)
+ return NULL;
+ return _xzalloc(size * num, align);
+}
+
/*
* Pooled allocator interface.
*/
++++++ 23904-xenpaging_use_p2m-get_entry_in_p2m_mem_paging_functions.patch
++++++
changeset: 23904:ecab267b85ef
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 06 12:33:17 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: use p2m->get_entry() in p2m_mem_paging functions
Use p2m->get_entry() in the p2m_mem_paging functions. This preserves the
p2m_access type when gfn is updated with set_p2m_entry().
Its also a preparation for locking fixes in a subsequent patch.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2839,10 +2839,11 @@ int p2m_mem_paging_nominate(struct p2m_d
{
struct page_info *page;
p2m_type_t p2mt;
+ p2m_access_t a;
mfn_t mfn;
int ret;
- mfn = gfn_to_mfn(p2m, gfn, &p2mt);
+ mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
/* Check if mfn is valid */
ret = -EINVAL;
@@ -2869,7 +2870,7 @@ int p2m_mem_paging_nominate(struct p2m_d
/* Fix p2m entry */
p2m_lock(p2m);
- set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_out, p2m->default_access);
+ set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_out, a);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
@@ -2883,11 +2884,12 @@ int p2m_mem_paging_evict(struct p2m_doma
{
struct page_info *page;
p2m_type_t p2mt;
+ p2m_access_t a;
mfn_t mfn;
struct domain *d = p2m->domain;
/* Get mfn */
- mfn = gfn_to_mfn(p2m, gfn, &p2mt);
+ mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
if ( unlikely(!mfn_valid(mfn)) )
return -EINVAL;
@@ -2906,7 +2908,7 @@ int p2m_mem_paging_evict(struct p2m_doma
/* Remove mapping from p2m table */
p2m_lock(p2m);
- set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged,
p2m->default_access);
+ set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged, a);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
@@ -2943,6 +2945,7 @@ void p2m_mem_paging_populate(struct p2m_
struct vcpu *v = current;
mem_event_request_t req;
p2m_type_t p2mt;
+ p2m_access_t a;
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
@@ -2955,11 +2958,11 @@ void p2m_mem_paging_populate(struct p2m_
/* Fix p2m mapping */
/* XXX: It seems inefficient to have this here, as it's only needed
* in one case (ept guest accessing paging out page) */
- gfn_to_mfn(p2m, gfn, &p2mt);
+ p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
if ( p2mt == p2m_ram_paged )
{
p2m_lock(p2m);
- set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start,
p2m->default_access);
+ set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start,
a);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
}
@@ -2988,7 +2991,10 @@ void p2m_mem_paging_populate(struct p2m_
int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn)
{
struct page_info *page;
+ p2m_type_t p2mt;
+ p2m_access_t a;
+ p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
/* Get a free page */
page = alloc_domheap_page(p2m->domain, 0);
if ( unlikely(page == NULL) )
@@ -2996,7 +3002,7 @@ int p2m_mem_paging_prep(struct p2m_domai
/* Fix p2m mapping */
p2m_lock(p2m);
- set_p2m_entry(p2m, gfn, page_to_mfn(page), 0, p2m_ram_paging_in,
p2m->default_access);
+ set_p2m_entry(p2m, gfn, page_to_mfn(page), 0, p2m_ram_paging_in, a);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
@@ -3010,6 +3016,7 @@ void p2m_mem_paging_resume(struct p2m_do
struct domain *d = p2m->domain;
mem_event_response_t rsp;
p2m_type_t p2mt;
+ p2m_access_t a;
mfn_t mfn;
/* Pull the response off the ring */
@@ -3018,9 +3025,9 @@ void p2m_mem_paging_resume(struct p2m_do
/* Fix p2m entry if the page was not dropped */
if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
{
- mfn = gfn_to_mfn(p2m, rsp.gfn, &p2mt);
+ mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query);
p2m_lock(p2m);
- set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, p2m->default_access);
+ set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
++++++ 23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch ++++++
changeset: 23905:50ee6be56460
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 06 12:33:17 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: fix locking in p2m_mem_paging functions
As suggested by <hongkaixing@xxxxxxxxxx>, query and adjust the p2mt
under the p2m_lock to prevent races with PoD.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 42 ++++++++++++++++++++++++++----------------
1 file changed, 26 insertions(+), 16 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2843,6 +2843,8 @@ int p2m_mem_paging_nominate(struct p2m_d
mfn_t mfn;
int ret;
+ p2m_lock(p2m);
+
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
/* Check if mfn is valid */
@@ -2869,14 +2871,12 @@ int p2m_mem_paging_nominate(struct p2m_d
goto out;
/* Fix p2m entry */
- p2m_lock(p2m);
set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_out, a);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
-
ret = 0;
out:
+ p2m_unlock(p2m);
return ret;
}
@@ -2887,30 +2887,31 @@ int p2m_mem_paging_evict(struct p2m_doma
p2m_access_t a;
mfn_t mfn;
struct domain *d = p2m->domain;
+ int ret = -EINVAL;
+
+ p2m_lock(p2m);
/* Get mfn */
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
if ( unlikely(!mfn_valid(mfn)) )
- return -EINVAL;
+ goto out;
if ( (p2mt == p2m_ram_paged) || (p2mt == p2m_ram_paging_in) ||
(p2mt == p2m_ram_paging_in_start) )
- return -EINVAL;
+ goto out;
/* Get the page so it doesn't get modified under Xen's feet */
page = mfn_to_page(mfn);
if ( unlikely(!get_page(page, d)) )
- return -EINVAL;
+ goto out;
/* Decrement guest domain's ref count of the page */
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
/* Remove mapping from p2m table */
- p2m_lock(p2m);
set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged, a);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
/* Put the page back so it gets freed */
put_page(page);
@@ -2918,7 +2919,11 @@ int p2m_mem_paging_evict(struct p2m_doma
/* Track number of paged gfns */
atomic_inc(&p2m->domain->paged_pages);
- return 0;
+ ret = 0;
+
+ out:
+ p2m_unlock(p2m);
+ return ret;
}
void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn)
@@ -2958,14 +2963,14 @@ void p2m_mem_paging_populate(struct p2m_
/* Fix p2m mapping */
/* XXX: It seems inefficient to have this here, as it's only needed
* in one case (ept guest accessing paging out page) */
+ p2m_lock(p2m);
p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
if ( p2mt == p2m_ram_paged )
{
- p2m_lock(p2m);
set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start,
a);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
}
+ p2m_unlock(p2m);
/* Pause domain */
if ( v->domain->domain_id == d->domain_id )
@@ -2993,22 +2998,27 @@ int p2m_mem_paging_prep(struct p2m_domai
struct page_info *page;
p2m_type_t p2mt;
p2m_access_t a;
+ int ret = -ENOMEM;
+
+ p2m_lock(p2m);
p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
+
/* Get a free page */
page = alloc_domheap_page(p2m->domain, 0);
if ( unlikely(page == NULL) )
- return -ENOMEM;
+ goto out;
/* Fix p2m mapping */
- p2m_lock(p2m);
set_p2m_entry(p2m, gfn, page_to_mfn(page), 0, p2m_ram_paging_in, a);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
atomic_dec(&p2m->domain->paged_pages);
- return 0;
+ ret = 0;
+ out:
+ p2m_unlock(p2m);
+ return ret;
}
void p2m_mem_paging_resume(struct p2m_domain *p2m)
@@ -3025,8 +3035,8 @@ void p2m_mem_paging_resume(struct p2m_do
/* Fix p2m entry if the page was not dropped */
if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
{
- mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query);
p2m_lock(p2m);
+ mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query);
set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
audit_p2m(p2m, 1);
++++++
23906-xenpaging_remove_confusing_comment_from_p2m_mem_paging_populate.patch
++++++
changeset: 23906:7bf85c3fd9f0
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 06 12:33:17 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: remove confusing comment from p2m_mem_paging_populate
Currently there is no way to avoid the double check of the p2mt
because p2m_mem_paging_populate() is called from many places without
the p2m_lock held. Upcoming changes will move the function into
gfn_to_mfn(), so its interface could be changed and the extra
p2m_lock/get_entry can be removed.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 2 --
1 file changed, 2 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2961,8 +2961,6 @@ void p2m_mem_paging_populate(struct p2m_
req.type = MEM_EVENT_TYPE_PAGING;
/* Fix p2m mapping */
- /* XXX: It seems inefficient to have this here, as it's only needed
- * in one case (ept guest accessing paging out page) */
p2m_lock(p2m);
p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
if ( p2mt == p2m_ram_paged )
++++++ 23908-p2m_query-modify_p2mt_with_p2m_lock_held.patch ++++++
changeset: 23908:88b6e08b8aa8
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 06 14:15:43 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
p2m: query/modify p2mt with p2m_lock held
Query and update the p2mt in set_mmio_p2m_entry, clear_mmio_p2m_entry
and set_shared_p2m_entry with the p2m_lock held.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2751,9 +2751,11 @@ set_mmio_p2m_entry(struct p2m_domain *p2
if ( !paging_mode_translate(p2m->domain) )
return 0;
+ p2m_lock(p2m);
omfn = gfn_to_mfn_query(p2m, gfn, &ot);
if ( p2m_is_grant(ot) )
{
+ p2m_unlock(p2m);
domain_crash(p2m->domain);
return 0;
}
@@ -2764,7 +2766,6 @@ set_mmio_p2m_entry(struct p2m_domain *p2
}
P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
- p2m_lock(p2m);
rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_mmio_direct, p2m->default_access);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
@@ -2785,18 +2786,20 @@ clear_mmio_p2m_entry(struct p2m_domain *
if ( !paging_mode_translate(p2m->domain) )
return 0;
- mfn = gfn_to_mfn(p2m, gfn, &t);
+ p2m_lock(p2m);
+ mfn = gfn_to_mfn_query(p2m, gfn, &t);
/* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
if ( (INVALID_MFN == mfn_x(mfn)) || (t != p2m_mmio_direct) )
{
gdprintk(XENLOG_ERR,
"clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
- return 0;
+ goto out;
}
- p2m_lock(p2m);
rc = set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), 0, p2m_invalid,
p2m->default_access);
audit_p2m(p2m, 1);
+
+out:
p2m_unlock(p2m);
return rc;
@@ -2813,6 +2816,8 @@ set_shared_p2m_entry(struct p2m_domain *
if ( !paging_mode_translate(p2m->domain) )
return 0;
+ if ( need_lock )
+ p2m_lock(p2m);
omfn = gfn_to_mfn_query(p2m, gfn, &ot);
/* At the moment we only allow p2m change if gfn has already been made
* sharable first */
@@ -2822,8 +2827,6 @@ set_shared_p2m_entry(struct p2m_domain *
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn));
- if ( need_lock )
- p2m_lock(p2m);
rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_shared, p2m->default_access);
if ( need_lock )
p2m_unlock(p2m);
++++++ 23925-x86-AMD-ARAT-Fam12.patch ++++++
# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
# Date 1317976335 -7200
# Node ID 08d6ba4e447d6c13c6dfac5c23e84b73961cb109
# Parent 159be83e5fe9111bb30d8b1f83127f5724d44424
x86/AMD: Do not enable ARAT feature on AMD processors below family 0x12
Determining whether an AMD processor is affected by erratum 400 may
have some corner cases and handling these cases is somewhat complicated.
In the interest of simplicity we won't claim ARAT support on processor
families below 0x12.
Mirrors Linux commit e9cdd343a5e42c43bcda01e609fa23089e026470
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -632,8 +632,11 @@ static void __devinit init_amd(struct cp
}
#endif
- /* As a rule processors have APIC timer running in deep C states */
- if (c->x86 > 0xf && !cpu_has_amd_erratum(c, AMD_ERRATUM_400))
+ /*
+ * Family 0x12 and above processors have APIC timer
+ * running in deep C states.
+ */
+ if (c->x86 > 0x11)
set_bit(X86_FEATURE_ARAT, c->x86_capability);
/* Prevent TSC drift in non single-processor, single-core platforms. */
++++++ 23933-pt-bus2bridge-update.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1318231292 -7200
# Node ID 0b81515b8e982e8012c28e5f8d9e965c63b6503d
# Parent 0c2bfd1f9c6822fbd23af0043f83d93be976323c
passthrough: update bus2bridge mapping as PCI devices get added/removed
This deals with two limitations at once: On device removal, the
mapping did not get updated so far at all, and hotplugged devices as
well as such not discoverable by Xen's initial bus scan (including the
case where a non-zero PCI segment wasn't accessible during Xen boot,
but became accessible after Dom0 validated access information against
ACPI data) wouldn't cause updates to the mapping either.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Kay, Allen M" <allen.m.kay@xxxxxxxxx>
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -63,11 +63,67 @@ static struct pci_dev *alloc_pdev(u8 bus
list_add(&pdev->alldevs_list, &alldevs_list);
spin_lock_init(&pdev->msix_table_lock);
+ /* update bus2bridge */
+ switch ( pdev_type(bus, devfn) )
+ {
+ u8 sec_bus, sub_bus;
+
+ case DEV_TYPE_PCIe_BRIDGE:
+ break;
+
+ case DEV_TYPE_PCIe2PCI_BRIDGE:
+ case DEV_TYPE_LEGACY_PCI_BRIDGE:
+ sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ PCI_SECONDARY_BUS);
+ sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ PCI_SUBORDINATE_BUS);
+
+ spin_lock(&bus2bridge_lock);
+ for ( ; sec_bus <= sub_bus; sec_bus++ )
+ {
+ bus2bridge[sec_bus].map = 1;
+ bus2bridge[sec_bus].bus = bus;
+ bus2bridge[sec_bus].devfn = devfn;
+ }
+ spin_unlock(&bus2bridge_lock);
+ break;
+
+ case DEV_TYPE_PCIe_ENDPOINT:
+ case DEV_TYPE_PCI:
+ break;
+
+ default:
+ printk(XENLOG_WARNING "%s: unknown type: %02x:%02x.%u\n",
+ __func__, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ break;
+ }
+
return pdev;
}
static void free_pdev(struct pci_dev *pdev)
{
+ /* update bus2bridge */
+ switch ( pdev_type(pdev->bus, pdev->devfn) )
+ {
+ u8 dev, func, sec_bus, sub_bus;
+
+ case DEV_TYPE_PCIe2PCI_BRIDGE:
+ case DEV_TYPE_LEGACY_PCI_BRIDGE:
+ dev = PCI_SLOT(pdev->devfn);
+ func = PCI_FUNC(pdev->devfn);
+ sec_bus = pci_conf_read8(pdev->bus, dev, func,
+ PCI_SECONDARY_BUS);
+ sub_bus = pci_conf_read8(pdev->bus, dev, func,
+ PCI_SUBORDINATE_BUS);
+
+ spin_lock(&bus2bridge_lock);
+ for ( ; sec_bus <= sub_bus; sec_bus++ )
+ bus2bridge[sec_bus] = bus2bridge[pdev->bus];
+ spin_unlock(&bus2bridge_lock);
+ break;
+ }
+
list_del(&pdev->alldevs_list);
xfree(pdev);
}
@@ -432,8 +488,6 @@ int __init scan_pci_devices(void)
{
struct pci_dev *pdev;
int bus, dev, func;
- u8 sec_bus, sub_bus;
- int type;
spin_lock(&pcidevs_lock);
for ( bus = 0; bus < 256; bus++ )
@@ -453,41 +507,6 @@ int __init scan_pci_devices(void)
return -ENOMEM;
}
- /* build bus2bridge */
- type = pdev_type(bus, PCI_DEVFN(dev, func));
- switch ( type )
- {
- case DEV_TYPE_PCIe_BRIDGE:
- break;
-
- case DEV_TYPE_PCIe2PCI_BRIDGE:
- case DEV_TYPE_LEGACY_PCI_BRIDGE:
- sec_bus = pci_conf_read8(bus, dev, func,
- PCI_SECONDARY_BUS);
- sub_bus = pci_conf_read8(bus, dev, func,
- PCI_SUBORDINATE_BUS);
-
- spin_lock(&bus2bridge_lock);
- for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ )
- {
- bus2bridge[sec_bus].map = 1;
- bus2bridge[sec_bus].bus = bus;
- bus2bridge[sec_bus].devfn = PCI_DEVFN(dev, func);
- }
- spin_unlock(&bus2bridge_lock);
- break;
-
- case DEV_TYPE_PCIe_ENDPOINT:
- case DEV_TYPE_PCI:
- break;
-
- default:
- printk("%s: unknown type: bdf = %x:%x.%x\n",
- __func__, bus, dev, func);
- spin_unlock(&pcidevs_lock);
- return -EINVAL;
- }
-
if ( !func && !(pci_conf_read8(bus, dev, func,
PCI_HEADER_TYPE) & 0x80) )
break;
++++++ 23943-xenpaging_clear_page_content_after_evict.patch ++++++
changeset: 23943:1185ae04b5aa
user: Olaf Hering <olaf@xxxxxxxxx>
date: Tue Oct 11 10:46:28 2011 +0100
files: tools/xenpaging/xenpaging.c xen/arch/x86/mm/p2m.c
description:
xenpaging: clear page content after evict
If the guest happens to read from the gfn while xenpaging is in the process of
evicting the page, the guest may read zeros instead of actual data.
Also if eviction fails the page content will be corrupted and xenpaging wont
attempt to restore the page.
Remove page scrubbing from pager and do it after successful eviction.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/xenpaging/xenpaging.c | 3 ---
xen/arch/x86/mm/p2m.c | 3 +++
2 files changed, 3 insertions(+), 3 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -455,9 +455,6 @@ static int xenpaging_evict_page(xenpagin
goto out;
}
- /* Clear page */
- memset(page, 0, PAGE_SIZE);
-
munmap(page, PAGE_SIZE);
/* Tell Xen to evict page */
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2916,6 +2916,9 @@ int p2m_mem_paging_evict(struct p2m_doma
set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged, a);
audit_p2m(p2m, 1);
+ /* Clear content before returning the page to Xen */
+ scrub_one_page(page);
+
/* Put the page back so it gets freed */
put_page(page);
++++++ 23949-constify_vcpu_set_affinitys_second_parameter.patch ++++++
changeset: 23949:39df16923958
user: Jan Beulich <jbeulich@xxxxxxxx>
date: Thu Oct 13 10:00:13 2011 +0200
files: xen/arch/x86/cpu/mcheck/vmce.c xen/arch/x86/traps.c
xen/common/schedule.c xen/include/xen/sched.h
description:
constify vcpu_set_affinity()'s second parameter
None of the callers actually make use of the function's returning of
the old affinity through its second parameter, and eliminating this
capability allows some callers to no longer use a local variable here,
reducing their stack footprint significantly when building with large
NR_CPUS.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/cpu/mcheck/vmce.c | 5 +----
xen/arch/x86/traps.c | 10 ++--------
xen/common/schedule.c | 6 ++----
xen/include/xen/sched.h | 2 +-
4 files changed, 6 insertions(+), 17 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/vmce.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/mcheck/vmce.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/vmce.c
@@ -304,7 +304,6 @@ int vmce_wrmsr(u32 msr, u64 val)
int inject_vmce(struct domain *d)
{
int cpu = smp_processor_id();
- cpumask_t affinity;
/* PV guest and HVM guest have different vMCE# injection methods. */
if ( !test_and_set_bool(d->vcpu[0]->mce_pending) )
@@ -323,11 +322,9 @@ int inject_vmce(struct domain *d)
{
d->vcpu[0]->cpu_affinity_tmp =
d->vcpu[0]->cpu_affinity;
- cpus_clear(affinity);
- cpu_set(cpu, affinity);
mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old %d\n",
cpu, d->vcpu[0]->processor);
- vcpu_set_affinity(d->vcpu[0], &affinity);
+ vcpu_set_affinity(d->vcpu[0], cpumask_of(cpu));
vcpu_kick(d->vcpu[0]);
}
else
Index: xen-4.1.2-testing/xen/arch/x86/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/traps.c
@@ -3104,7 +3104,6 @@ static void nmi_mce_softirq(void)
{
int cpu = smp_processor_id();
struct softirq_trap *st = &per_cpu(softirq_trap, cpu);
- cpumask_t affinity;
BUG_ON(st == NULL);
BUG_ON(st->vcpu == NULL);
@@ -3120,9 +3119,7 @@ static void nmi_mce_softirq(void)
* Make sure to wakeup the vcpu on the
* specified processor.
*/
- cpus_clear(affinity);
- cpu_set(st->processor, affinity);
- vcpu_set_affinity(st->vcpu, &affinity);
+ vcpu_set_affinity(st->vcpu, cpumask_of(st->processor));
/* Affinity is restored in the iret hypercall. */
}
@@ -3192,14 +3189,11 @@ void async_exception_cleanup(struct vcpu
!test_and_set_bool(curr->mce_pending) )
{
int cpu = smp_processor_id();
- cpumask_t affinity;
curr->cpu_affinity_tmp = curr->cpu_affinity;
- cpus_clear(affinity);
- cpu_set(cpu, affinity);
printk(XENLOG_DEBUG "MCE: CPU%d set affinity, old %d\n",
cpu, curr->processor);
- vcpu_set_affinity(curr, &affinity);
+ vcpu_set_affinity(curr, cpumask_of(cpu));
}
}
}
Index: xen-4.1.2-testing/xen/common/schedule.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/schedule.c
+++ xen-4.1.2-testing/xen/common/schedule.c
@@ -593,9 +593,9 @@ int cpu_disable_scheduler(unsigned int c
return ret;
}
-int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
+int vcpu_set_affinity(struct vcpu *v, const cpumask_t *affinity)
{
- cpumask_t online_affinity, old_affinity;
+ cpumask_t online_affinity;
cpumask_t *online;
if ( v->domain->is_pinned )
@@ -607,9 +607,7 @@ int vcpu_set_affinity(struct vcpu *v, cp
vcpu_schedule_lock_irq(v);
- old_affinity = v->cpu_affinity;
v->cpu_affinity = *affinity;
- *affinity = old_affinity;
if ( !cpu_isset(v->processor, v->cpu_affinity) )
set_bit(_VPF_migrating, &v->pause_flags);
Index: xen-4.1.2-testing/xen/include/xen/sched.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/sched.h
+++ xen-4.1.2-testing/xen/include/xen/sched.h
@@ -623,7 +623,7 @@ void scheduler_free(struct scheduler *sc
int schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
void vcpu_force_reschedule(struct vcpu *v);
int cpu_disable_scheduler(unsigned int cpu);
-int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
+int vcpu_set_affinity(struct vcpu *v, const cpumask_t *affinity);
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
uint64_t get_cpu_idle_time(unsigned int cpu);
++++++ 23953-xenpaging_handle_evict_failures.patch ++++++
changeset: 23953:eda18b27de6e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 13 12:21:10 2011 +0100
files: tools/xenpaging/xenpaging.c xen/arch/x86/mm.c
xen/arch/x86/mm/p2m.c xen/include/public/mem_event.h
description:
xenpaging: handle evict failures
Evict of a nominated gfn must fail if some other process mapped the
page without checking the p2mt of that gfn first.
Add a check to cancel eviction if the page usage count is not 1.
Handle the possible eviction failure in the page-in paths.
After nominate and before evict, something may check the p2mt and call
populate. Handle this case and let the gfn enter the page-in path. The
gfn may still be connected to a mfn, so there is no need to allocate a
new page in prep.
Adjust do_mmu_update to return -ENOENT only if the gfn has entered the
page-in path and if it is not yet connected to a mfn. Otherwise
linux_privcmd_map_foreign_bulk() may loop forever.
Add MEM_EVENT_FLAG_EVICT_FAIL to inform pager that a page-in request for
a possible not-evicted page was sent. xenpaging does currently not need
that flag because failure to evict a gfn will be caught.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/xenpaging/xenpaging.c | 10 ++++---
xen/arch/x86/mm.c | 8 ++---
xen/arch/x86/mm/p2m.c | 55 +++++++++++++++++++++++++++++------------
xen/include/public/mem_event.h | 1
4 files changed, 50 insertions(+), 24 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -734,10 +734,12 @@ int main(int argc, char *argv[])
}
else
{
- DPRINTF("page already populated (domain = %d; vcpu = %d;"
- " gfn = %"PRIx64"; paused = %d)\n",
- paging->mem_event.domain_id, req.vcpu_id,
- req.gfn, req.flags & MEM_EVENT_FLAG_VCPU_PAUSED);
+ DPRINTF("page %s populated (domain = %d; vcpu = %d;"
+ " gfn = %"PRIx64"; paused = %d; evict_fail = %d)\n",
+ req.flags & MEM_EVENT_FLAG_EVICT_FAIL ? "not" :
"already",
+ paging->mem_event.domain_id, req.vcpu_id, req.gfn,
+ !!(req.flags & MEM_EVENT_FLAG_VCPU_PAUSED) ,
+ !!(req.flags & MEM_EVENT_FLAG_EVICT_FAIL) );
/* Tell Xen to resume the vcpu */
/* XXX: Maybe just check if the vcpu was paused? */
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -3502,7 +3502,7 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l1e_p2mt )
+ else if ( p2m_ram_paging_in_start == l1e_p2mt &&
!mfn_valid(mfn) )
{
rc = -ENOENT;
break;
@@ -3543,7 +3543,7 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l2e_p2mt )
+ else if ( p2m_ram_paging_in_start == l2e_p2mt &&
!mfn_valid(mfn) )
{
rc = -ENOENT;
break;
@@ -3572,7 +3572,7 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l3e_p2mt )
+ else if ( p2m_ram_paging_in_start == l3e_p2mt &&
!mfn_valid(mfn) )
{
rc = -ENOENT;
break;
@@ -3602,7 +3602,7 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l4e_p2mt )
+ else if ( p2m_ram_paging_in_start == l4e_p2mt &&
!mfn_valid(mfn) )
{
rc = -ENOENT;
break;
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2899,15 +2899,24 @@ int p2m_mem_paging_evict(struct p2m_doma
if ( unlikely(!mfn_valid(mfn)) )
goto out;
- if ( (p2mt == p2m_ram_paged) || (p2mt == p2m_ram_paging_in) ||
- (p2mt == p2m_ram_paging_in_start) )
+ /* Allow only nominated pages */
+ if ( p2mt != p2m_ram_paging_out )
goto out;
+ ret = -EBUSY;
/* Get the page so it doesn't get modified under Xen's feet */
page = mfn_to_page(mfn);
if ( unlikely(!get_page(page, d)) )
goto out;
+ /* Check page count and type once more */
+ if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
+ (2 | PGC_allocated) )
+ goto out_put;
+
+ if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_none )
+ goto out_put;
+
/* Decrement guest domain's ref count of the page */
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
@@ -2919,14 +2928,15 @@ int p2m_mem_paging_evict(struct p2m_doma
/* Clear content before returning the page to Xen */
scrub_one_page(page);
- /* Put the page back so it gets freed */
- put_page(page);
-
/* Track number of paged gfns */
atomic_inc(&p2m->domain->paged_pages);
ret = 0;
+ out_put:
+ /* Put the page back so it gets freed */
+ put_page(page);
+
out:
p2m_unlock(p2m);
return ret;
@@ -2957,6 +2967,7 @@ void p2m_mem_paging_populate(struct p2m_
mem_event_request_t req;
p2m_type_t p2mt;
p2m_access_t a;
+ mfn_t mfn;
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
@@ -2968,20 +2979,26 @@ void p2m_mem_paging_populate(struct p2m_
/* Fix p2m mapping */
p2m_lock(p2m);
- p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
- if ( p2mt == p2m_ram_paged )
+ mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
+ /* Allow only nominated or evicted pages to enter page-in path */
+ if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
{
- set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start,
a);
+ /* Evict will fail now, tag this request for pager */
+ if ( p2mt == p2m_ram_paging_out )
+ req.flags |= MEM_EVENT_FLAG_EVICT_FAIL;
+
+ set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a);
audit_p2m(p2m, 1);
}
p2m_unlock(p2m);
- /* Pause domain */
- if ( v->domain->domain_id == d->domain_id )
+ /* Pause domain if request came from guest and gfn has paging type */
+ if ( p2m_is_paging(p2mt) && v->domain->domain_id == d->domain_id )
{
vcpu_pause_nosync(v);
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
+ /* No need to inform pager if the gfn is not in the page-out path */
else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
{
/* gfn is already on its way back and vcpu is not paused */
@@ -3002,19 +3019,25 @@ int p2m_mem_paging_prep(struct p2m_domai
struct page_info *page;
p2m_type_t p2mt;
p2m_access_t a;
+ mfn_t mfn;
int ret = -ENOMEM;
p2m_lock(p2m);
- p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
+ mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
- /* Get a free page */
- page = alloc_domheap_page(p2m->domain, 0);
- if ( unlikely(page == NULL) )
- goto out;
+ /* Allocate a page if the gfn does not have one yet */
+ if ( !mfn_valid(mfn) )
+ {
+ /* Get a free page */
+ page = alloc_domheap_page(p2m->domain, 0);
+ if ( unlikely(page == NULL) )
+ goto out;
+ mfn = page_to_mfn(page);
+ }
/* Fix p2m mapping */
- set_p2m_entry(p2m, gfn, page_to_mfn(page), 0, p2m_ram_paging_in, a);
+ set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in, a);
audit_p2m(p2m, 1);
atomic_dec(&p2m->domain->paged_pages);
Index: xen-4.1.2-testing/xen/include/public/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/mem_event.h
+++ xen-4.1.2-testing/xen/include/public/mem_event.h
@@ -38,6 +38,7 @@
/* Memory event flags */
#define MEM_EVENT_FLAG_VCPU_PAUSED (1 << 0)
#define MEM_EVENT_FLAG_DROP_PAGE (1 << 1)
+#define MEM_EVENT_FLAG_EVICT_FAIL (1 << 2)
/* Reasons for the memory event request */
#define MEM_EVENT_REASON_UNKNOWN 0 /* typical reason */
++++++ 23955-x86-pv-cpuid-xsave.patch ++++++
# HG changeset patch
# User Shan Haitao <haitao.shan@xxxxxxxxx>
# Date 1318517935 -3600
# Node ID bbde1453cbd95d6d36febe9c552f9cfa26b5c49e
# Parent c1bd53fac3d5e5868352894d62dbacd6de0b0e7a
x86: Further fixes for xsave leaf in pv_cpuid().
Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -766,16 +766,18 @@ static void pv_cpuid(struct cpu_user_reg
if ( current->domain->domain_id != 0 )
{
+ unsigned int cpuid_leaf = a, sub_leaf = c;
+
if ( !cpuid_hypervisor_leaves(a, c, &a, &b, &c, &d) )
domain_cpuid(current->domain, a, c, &a, &b, &c, &d);
- switch ( a )
+ switch ( cpuid_leaf )
{
case 0xd:
{
- unsigned int sub_leaf, _eax, _ebx, _ecx, _edx;
+ unsigned int _eax, _ebx, _ecx, _edx;
/* EBX value of main leaf 0 depends on enabled xsave features */
- if ( c == 0 && current->arch.xcr0 )
+ if ( sub_leaf == 0 && current->arch.xcr0 )
{
/* reset EBX to default value first */
b = XSAVE_AREA_MIN_SIZE;
@@ -783,8 +785,8 @@ static void pv_cpuid(struct cpu_user_reg
{
if ( !(current->arch.xcr0 & (1ULL << sub_leaf)) )
continue;
- domain_cpuid(current->domain, a, c, &_eax, &_ebx, &_ecx,
- &_edx);
+ domain_cpuid(current->domain, cpuid_leaf, sub_leaf,
+ &_eax, &_ebx, &_ecx, &_edx);
if ( (_eax + _ebx) > b )
b = _eax + _ebx;
}
++++++ 23957-cpufreq-error-paths.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1318580154 -7200
# Node ID de316831471a8e0f11f615e7bf336dee2ba811e7
# Parent a65693f9fb1250ff4819774a70284693705db9e7
cpufreq: error path fixes
This fixes an actual bug (failure to exit from a function after an
allocation failure), an inconsistency (not removing the cpufreq_dom
list member upon failure), and a latent bug (not clearing the current
governor upon governor initialization failure when there was no old
one; latent because the only current code path leading to this
situation frees the policy upon failure and hence the governor not
getting cleared is benign).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -195,8 +195,10 @@ int cpufreq_add_cpu(unsigned int cpu)
if (!domexist || hw_all) {
policy = xmalloc(struct cpufreq_policy);
- if (!policy)
+ if (!policy) {
ret = -ENOMEM;
+ goto err0;
+ }
memset(policy, 0, sizeof(struct cpufreq_policy));
policy->cpu = cpu;
@@ -206,7 +208,7 @@ int cpufreq_add_cpu(unsigned int cpu)
if (ret) {
xfree(policy);
per_cpu(cpufreq_cpu_policy, cpu) = NULL;
- return ret;
+ goto err0;
}
if (cpufreq_verbose)
printk("CPU %u initialization completed\n", cpu);
@@ -263,7 +265,7 @@ err1:
cpufreq_driver->exit(policy);
xfree(policy);
}
-
+err0:
if (cpus_empty(cpufreq_dom->map)) {
list_del(&cpufreq_dom->node);
xfree(cpufreq_dom);
--- a/xen/drivers/cpufreq/utility.c
+++ b/xen/drivers/cpufreq/utility.c
@@ -462,8 +462,8 @@ int __cpufreq_set_policy(struct cpufreq_
data->governor->name);
/* new governor failed, so re-start old one */
+ data->governor = old_gov;
if (old_gov) {
- data->governor = old_gov;
__cpufreq_governor(data, CPUFREQ_GOV_START);
printk(KERN_WARNING "Still stay at %s governor\n",
data->governor->name);
++++++ 23978-xenpaging_check_p2mt_in_p2m_mem_paging_functions.patch ++++++
changeset: 23978:fd3fa0a85020
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 20 11:25:55 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: check p2mt in p2m_mem_paging functions
Add checks to forward the p2m_ram_paging* state properly during page-in.
Resume can be called several times if several vcpus called populate for
the gfn. Finish resume only once.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -3020,16 +3020,22 @@ int p2m_mem_paging_prep(struct p2m_domai
p2m_type_t p2mt;
p2m_access_t a;
mfn_t mfn;
- int ret = -ENOMEM;
+ int ret;
p2m_lock(p2m);
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
+ ret = -ENOENT;
+ /* Allow only missing pages */
+ if ( p2mt != p2m_ram_paging_in_start )
+ goto out;
+
/* Allocate a page if the gfn does not have one yet */
if ( !mfn_valid(mfn) )
{
/* Get a free page */
+ ret = -ENOMEM;
page = alloc_domheap_page(p2m->domain, 0);
if ( unlikely(page == NULL) )
goto out;
@@ -3064,9 +3070,15 @@ void p2m_mem_paging_resume(struct p2m_do
{
p2m_lock(p2m);
mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query);
- set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
- set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
- audit_p2m(p2m, 1);
+ /* Allow only pages which were prepared properly, or pages which
+ * were nominated but not evicted */
+ if ( mfn_valid(mfn) &&
+ (p2mt == p2m_ram_paging_in || p2mt == p2m_ram_paging_in_start) )
+ {
+ set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
+ set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
+ audit_p2m(p2m, 1);
+ }
p2m_unlock(p2m);
}
++++++ 23979-xenpaging_document_p2m_mem_paging_functions.patch ++++++
changeset: 23979:18306b054799
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 20 11:25:58 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: document p2m_mem_paging functions
Add some documentation for each of the p2m_mem_paging functions to describe
what they ought to do.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 93 insertions(+)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2838,6 +2838,24 @@ set_shared_p2m_entry(struct p2m_domain *
}
#ifdef __x86_64__
+/**
+ * p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out
+ * @d: guest domain
+ * @gfn: guest page to nominate
+ *
+ * Returns 0 for success or negative errno values if gfn is not pageable.
+ *
+ * p2m_mem_paging_nominate() is called by the pager and checks if a guest page
+ * can be paged out. If the following conditions are met the p2mt will be
+ * changed:
+ * - the gfn is backed by a mfn
+ * - the p2mt of the gfn is pageable
+ * - the mfn is not used for IO
+ * - the mfn has exactly one user and has no special meaning
+ *
+ * Once the p2mt is changed the page is readonly for the guest. On success the
+ * pager can write the page contents to disk and later evict the page.
+ */
int p2m_mem_paging_nominate(struct p2m_domain *p2m, unsigned long gfn)
{
struct page_info *page;
@@ -2883,6 +2901,25 @@ int p2m_mem_paging_nominate(struct p2m_d
return ret;
}
+/**
+ * p2m_mem_paging_evict - Mark a guest page as paged-out
+ * @d: guest domain
+ * @gfn: guest page to evict
+ *
+ * Returns 0 for success or negative errno values if eviction is not possible.
+ *
+ * p2m_mem_paging_evict() is called by the pager and will free a guest page and
+ * release it back to Xen. If the following conditions are met the page can be
+ * freed:
+ * - the gfn is backed by a mfn
+ * - the gfn was nominated
+ * - the mfn has still exactly one user and has no special meaning
+ *
+ * After successful nomination some other process could have mapped the page.
In
+ * this case eviction can not be done. If the gfn was populated before the
pager
+ * could evict it, eviction can not be done either. In this case the gfn is
+ * still backed by a mfn.
+ */
int p2m_mem_paging_evict(struct p2m_domain *p2m, unsigned long gfn)
{
struct page_info *page;
@@ -2942,6 +2979,15 @@ int p2m_mem_paging_evict(struct p2m_doma
return ret;
}
+/**
+ * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page
+ * @d: guest domain
+ * @gfn: guest page to drop
+ *
+ * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was
+ * released by the guest. The pager is supposed to drop its reference of the
+ * gfn.
+ */
void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn)
{
struct vcpu *v = current;
@@ -2961,6 +3007,27 @@ void p2m_mem_paging_drop_page(struct p2m
}
}
+/**
+ * p2m_mem_paging_populate - Tell pager to populete a paged page
+ * @d: guest domain
+ * @gfn: guest page in paging state
+ *
+ * p2m_mem_paging_populate() will notify the pager that a page in any of the
+ * paging states needs to be written back into the guest.
+ * This function needs to be called whenever gfn_to_mfn() returns any of the
p2m
+ * paging types because the gfn may not be backed by a mfn.
+ *
+ * The gfn can be in any of the paging states, but the pager needs only be
+ * notified when the gfn is in the paging-out path (paging_out or paged). This
+ * function may be called more than once from several vcpus. If the vcpu
belongs
+ * to the guest, the vcpu must be stopped and the pager notified that the vcpu
+ * was stopped. The pager needs to handle several requests for the same gfn.
+ *
+ * If the gfn is not in the paging-out path and the vcpu does not belong to the
+ * guest, nothing needs to be done and the function assumes that a request was
+ * already sent to the pager. In this case the caller has to try again until
the
+ * gfn is fully paged in again.
+ */
void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn)
{
struct vcpu *v = current;
@@ -3014,6 +3081,17 @@ void p2m_mem_paging_populate(struct p2m_
mem_event_put_request(d, &d->mem_paging, &req);
}
+/**
+ * p2m_mem_paging_prep - Allocate a new page for the guest
+ * @d: guest domain
+ * @gfn: guest page in paging state
+ *
+ * p2m_mem_paging_prep() will allocate a new page for the guest if the gfn is
+ * not backed by a mfn. It is called by the pager.
+ * It is required that the gfn was already populated. The gfn may already have
a
+ * mfn if populate was called for gfn which was nominated but not evicted. In
+ * this case only the p2mt needs to be forwarded.
+ */
int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn)
{
struct page_info *page;
@@ -3054,6 +3132,21 @@ int p2m_mem_paging_prep(struct p2m_domai
return ret;
}
+/**
+ * p2m_mem_paging_resume - Resume guest gfn and vcpus
+ * @d: guest domain
+ * @gfn: guest page in paging state
+ *
+ * p2m_mem_paging_resume() will forward the p2mt of a gfn to ram_rw and all
+ * waiting vcpus will be unpaused again. It is called by the pager.
+ *
+ * The gfn was previously either evicted and populated, or nominated and
+ * populated. If the page was evicted the p2mt will be p2m_ram_paging_in. If
+ * the page was just nominated the p2mt will be p2m_ram_paging_in_start because
+ * the pager did not call p2m_mem_paging_prep().
+ *
+ * If the gfn was dropped the vcpu needs to be unpaused.
+ */
void p2m_mem_paging_resume(struct p2m_domain *p2m)
{
struct domain *d = p2m->domain;
++++++ 23980-xenpaging_disallow_paging_in_a_PoD_guest.patch ++++++
changeset: 23980:a06609840ff1
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 20 11:25:58 2011 +0100
files: tools/xenpaging/xenpaging.c xen/arch/x86/mm/mem_event.c
description:
xenpaging: disallow paging in a PoD guest
Disallow xenpaging in a PoD guest until coexistance between the two features
is properly implemented.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/xenpaging/xenpaging.c | 3 +++
xen/arch/x86/mm/mem_event.c | 6 ++++++
2 files changed, 9 insertions(+)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -246,6 +246,9 @@ static xenpaging_t *xenpaging_init(domid
case ENODEV:
ERROR("EPT not supported for this guest");
break;
+ case EXDEV:
+ ERROR("xenpaging not supported in a PoD guest");
+ break;
default:
ERROR("Error initialising shared page: %s", strerror(errno));
break;
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -253,6 +253,7 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_PAGING:
{
struct mem_event_domain *med = &d->mem_paging;
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
rc = -ENODEV;
/* Only HAP is supported */
if ( !hap_enabled(d) )
@@ -262,6 +263,11 @@ int mem_event_domctl(struct domain *d, x
if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
break;
+ rc = -EXDEV;
+ /* Disallow paging in a PoD guest */
+ if ( p2m->pod.entry_count )
+ break;
+
switch( mec->op )
{
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE:
++++++ 23993-x86-microcode-amd-fix-23871.patch ++++++
References: bnc#725169
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1319475620 -3600
# Node ID e458dfc35b8d3be04a9b72c30ff97163e27a7314
# Parent ffe861c1d5dfa8f4485052e5600e06124105033f
x86/ucode-amd: fix regression from c/s 23871:503ee256fecf
microcode_fits() must return distinct values for the success and
no-fit-but-no-error cases, so the caller can react accordingly. Make
it return 1 in the success case, and adjust its single caller.
Also remove an impossible code path - install_equiv_cpu_table(), which
gets called prior to microcode_fits(), never leaves equiv_cpu_table
being NULL without also returning an error.
Note that this is still awaiting testing on a system where the
regression was actually observed (which also requires a new enough
microcode_ctl package). Note also that this will need to be
backported to 4.0 and 4.1 (or the broken c/s that got backported
there reverted).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/microcode_amd.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/microcode_amd.c
+++ xen-4.1.2-testing/xen/arch/x86/microcode_amd.c
@@ -76,14 +76,6 @@ static int microcode_fits(void *mc, int
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
- if ( equiv_cpu_table == NULL )
- {
- printk(KERN_INFO "microcode: CPU%d microcode update with "
- "version 0x%x (current=0x%x)\n",
- cpu, mc_header->patch_id, uci->cpu_sig.rev);
- goto out;
- }
-
current_cpu_id = cpuid_eax(0x00000001);
for ( i = 0; equiv_cpu_table[i].installed_cpu != 0; i++ )
@@ -96,7 +88,7 @@ static int microcode_fits(void *mc, int
}
if ( !equiv_cpu_id )
- return 0;
+ return 0;
if ( (mc_header->processor_rev_id) != equiv_cpu_id )
{
@@ -113,8 +105,7 @@ static int microcode_fits(void *mc, int
"update with version 0x%x (current=0x%x)\n",
cpu, mc_header->patch_id, uci->cpu_sig.rev);
-out:
- return 0;
+ return 1;
}
static int apply_microcode(int cpu)
@@ -289,7 +280,7 @@ static int cpu_request_microcode(int cpu
while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) ==
0)
{
error = microcode_fits(mc, cpu);
- if (error != 0)
+ if (error <= 0)
continue;
error = apply_microcode(cpu);
++++++ 24104-waitqueue_Double_size_of_x86_shadow_stack..patch ++++++
changeset: 24104:4daa4ad90f12
user: Keir Fraser <keir@xxxxxxx>
date: Tue Nov 08 19:33:46 2011 +0000
files: xen/common/wait.c
description:
waitqueue: Double size of x86 shadow stack.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -33,7 +33,7 @@ struct waitqueue_vcpu {
* hypervisor context before sleeping (descheduling), setjmp/longjmp-style.
*/
void *esp;
- char stack[1500];
+ char stack[3000];
#endif
};
++++++ 24105-xenpaging_compare_domain_pointer_in_p2m_mem_paging_populate.patch
++++++
changeset: 24105:89efd82620ec
user: Olaf Hering <olaf@xxxxxxxxx>
date: Tue Nov 08 19:35:01 2011 +0000
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: compare domain pointer in p2m_mem_paging_populate
Compare just the domain pointer instead of the domain_id number.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -3060,7 +3060,7 @@ void p2m_mem_paging_populate(struct p2m_
p2m_unlock(p2m);
/* Pause domain if request came from guest and gfn has paging type */
- if ( p2m_is_paging(p2mt) && v->domain->domain_id == d->domain_id )
+ if ( p2m_is_paging(p2mt) && v->domain == d )
{
vcpu_pause_nosync(v);
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
++++++ 24106-mem_event_check_capabilities_only_once.patch ++++++
changeset: 24106:2af5bfbc9fde
user: Olaf Hering <olaf@xxxxxxxxx>
date: Tue Nov 08 19:35:42 2011 +0000
files: xen/arch/x86/mm/mem_event.c
description:
mem_event: check capabilities only once
It is not required to check the system capabilities during every domctl.
Rearrange the code to check them only once.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/mm/mem_event.c | 54 ++++++++++++++++++++++++--------------------
1 file changed, 30 insertions(+), 24 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -253,32 +253,35 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_PAGING:
{
struct mem_event_domain *med = &d->mem_paging;
- struct p2m_domain *p2m = p2m_get_hostp2m(d);
- rc = -ENODEV;
- /* Only HAP is supported */
- if ( !hap_enabled(d) )
- break;
-
- /* Currently only EPT is supported */
- if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
- break;
-
- rc = -EXDEV;
- /* Disallow paging in a PoD guest */
- if ( p2m->pod.entry_count )
- break;
+ rc = -EINVAL;
switch( mec->op )
{
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE:
{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ rc = -ENODEV;
+ /* Only HAP is supported */
+ if ( !hap_enabled(d) )
+ break;
+
+ /* Currently only EPT is supported */
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+ break;
+
+ rc = -EXDEV;
+ /* Disallow paging in a PoD guest */
+ if ( p2m->pod.entry_count )
+ break;
+
rc = mem_event_enable(d, mec, med);
}
break;
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE:
{
- rc = mem_event_disable(med);
+ if ( med->ring_page )
+ rc = mem_event_disable(med);
}
break;
@@ -295,26 +298,29 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS:
{
struct mem_event_domain *med = &d->mem_access;
- rc = -ENODEV;
- /* Only HAP is supported */
- if ( !hap_enabled(d) )
- break;
-
- /* Currently only EPT is supported */
- if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
- break;
+ rc = -EINVAL;
switch( mec->op )
{
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE:
{
+ rc = -ENODEV;
+ /* Only HAP is supported */
+ if ( !hap_enabled(d) )
+ break;
+
+ /* Currently only EPT is supported */
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+ break;
+
rc = mem_event_enable(d, mec, med);
}
break;
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
{
- rc = mem_event_disable(&d->mem_access);
+ if ( med->ring_page )
+ rc = mem_event_disable(&d->mem_access);
}
break;
++++++ 24116-x86-continuation-cancel.patch ++++++
# HG changeset patch
# User Jean Guyader <jean.guyader@xxxxxxxxxxxxx>
# Date 1321002862 -3600
# Node ID a095cf28f2b6eeb8f5873c18eb18d4d7e5544e2c
# Parent 6534da595d695a4f2af12a64e46fb06219a0e4bc
Hypercall continuation cancelation in compat mode for XENMEM_get/set_pod_target
If copy_to_guest failed in the compat code after a continuation as been
done in the native code we need to cancel it so we won't reexecute the
hypercall but return from the hypercall with the appropriate error.
Signed-off-by: Jean Guyader <jean.guyader@xxxxxxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1585,6 +1585,24 @@ void sync_vcpu_execstate(struct vcpu *v)
__arg; \
})
+void hypercall_cancel_continuation(void)
+{
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ struct mc_state *mcs = ¤t->mc_state;
+
+ if ( test_bit(_MCSF_in_multicall, &mcs->flags) )
+ {
+ __clear_bit(_MCSF_call_preempted, &mcs->flags);
+ }
+ else
+ {
+ if ( !is_hvm_vcpu(current) )
+ regs->eip += 2; /* skip re-execute 'syscall' / 'int $xx' */
+ else
+ current->arch.hvm_vcpu.hcall_preempted = 0;
+ }
+}
+
unsigned long hypercall_create_continuation(
unsigned int op, const char *format, ...)
{
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -133,7 +133,11 @@ int compat_arch_memory_op(int op, XEN_GU
XLAT_pod_target(&cmp, nat);
if ( copy_to_guest(arg, &cmp, 1) )
+ {
+ if ( rc == __HYPERVISOR_memory_op )
+ hypercall_cancel_continuation();
rc = -EFAULT;
+ }
break;
}
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -560,6 +560,7 @@ void startup_cpu_idle_loop(void);
*/
unsigned long hypercall_create_continuation(
unsigned int op, const char *format, ...);
+void hypercall_cancel_continuation(void);
#define hypercall_preempt_check() (unlikely( \
softirq_pending(smp_processor_id()) | \
++++++ 24123-x86-cpuidle-quiesce.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321017916 -3600
# Node ID 8b08b2166aa82e7df0b1fa620ed57078810f8c12
# Parent 4699decb8424a00447c466205be3cb4d0fb95a76
x86: quiesce cpuidle code
So far these messages got pointlessly (as the code in other places
assumes symmetric configuration) emitted once per CPU. Hide the debug
one behind opt_cpu_info, and issue the info one just once (if the code
gets adjusted to support assymtric configurations, this would need to
be revisited, but ideally without producing per-CPU messages again).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -659,6 +659,8 @@ static int acpi_processor_ffh_cstate_pro
unsigned int edx_part;
unsigned int cstate_type; /* C-state type and not ACPI C-state type */
unsigned int num_cstate_subtype;
+ int ret = 0;
+ static unsigned long printed;
if ( c->cpuid_level < CPUID_MWAIT_LEAF )
{
@@ -667,8 +669,9 @@ static int acpi_processor_ffh_cstate_pro
}
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
- printk(XENLOG_DEBUG "cpuid.MWAIT[.eax=%x, .ebx=%x, .ecx=%x, .edx=%x]\n",
- eax, ebx, ecx, edx);
+ if ( opt_cpu_info )
+ printk(XENLOG_DEBUG "cpuid.MWAIT[eax=%x ebx=%x ecx=%x edx=%x]\n",
+ eax, ebx, ecx, edx);
/* Check whether this particular cx_type (in CST) is supported or not */
cstate_type = (cx->reg.address >> MWAIT_SUBSTATE_SIZE) + 1;
@@ -676,15 +679,16 @@ static int acpi_processor_ffh_cstate_pro
num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
if ( num_cstate_subtype < (cx->reg.address & MWAIT_SUBSTATE_MASK) )
- return -EFAULT;
-
+ ret = -ERANGE;
/* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
- if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
- !(ecx & CPUID5_ECX_INTERRUPT_BREAK) )
- return -EFAULT;
-
- printk(XENLOG_INFO "Monitor-Mwait will be used to enter C-%d state\n",
cx->type);
- return 0;
+ else if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+ !(ecx & CPUID5_ECX_INTERRUPT_BREAK) )
+ ret = -ENODEV;
+ else if ( opt_cpu_info || cx->type >= BITS_PER_LONG ||
+ !test_and_set_bit(cx->type, &printed) )
+ printk(XENLOG_INFO "Monitor-Mwait will be used to enter C%d state\n",
+ cx->type);
+ return ret;
}
/*
++++++ 24124-x86-microcode-amd-quiesce.patch ++++++
References: bnc#719700
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321018008 -3600
# Node ID 69f8b6f4c29cb2fb2d11e27c391090f543e6b393
# Parent 8b08b2166aa82e7df0b1fa620ed57078810f8c12
x86/amd-ucode: further turn down verbosity
Turn up the log level on various (mostly debug-only) messages.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/microcode_amd.c
+++ b/xen/arch/x86/microcode_amd.c
@@ -59,7 +59,7 @@ static int collect_cpu_info(int cpu, str
rdmsrl(MSR_AMD_PATCHLEVEL, csig->rev);
- printk(KERN_INFO "microcode: collect_cpu_info: patch_id=0x%x\n",
+ printk(KERN_DEBUG "microcode: collect_cpu_info: patch_id=0x%x\n",
csig->rev);
return 0;
@@ -92,7 +92,7 @@ static int microcode_fits(void *mc, int
if ( (mc_header->processor_rev_id) != equiv_cpu_id )
{
- printk(KERN_INFO "microcode: CPU%d patch does not match "
+ printk(KERN_DEBUG "microcode: CPU%d patch does not match "
"(patch is %x, cpu base id is %x) \n",
cpu, mc_header->processor_rev_id, equiv_cpu_id);
return -EINVAL;
@@ -101,7 +101,7 @@ static int microcode_fits(void *mc, int
if ( mc_header->patch_id <= uci->cpu_sig.rev )
return -EINVAL;
- printk(KERN_INFO "microcode: CPU%d found a matching microcode "
+ printk(KERN_DEBUG "microcode: CPU%d found a matching microcode "
"update with version 0x%x (current=0x%x)\n",
cpu, mc_header->patch_id, uci->cpu_sig.rev);
@@ -139,8 +139,7 @@ static int apply_microcode(int cpu)
return -EIO;
}
- printk("microcode: CPU%d updated from revision "
- "0x%x to 0x%x \n",
+ printk(KERN_INFO "microcode: CPU%d updated from revision %#x to %#x\n",
cpu, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
uci->cpu_sig.rev = rev;
@@ -173,7 +172,7 @@ static int get_next_ucode_from_buffer_am
total_size = (unsigned long) (bufp[off+4] + (bufp[off+5] << 8));
- printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n",
+ printk(KERN_DEBUG "microcode: size %lu, total_size %lu, offset %ld\n",
(unsigned long)size, total_size, off);
if ( (off + total_size) > size )
++++++ 24137-revert-23666.patch ++++++
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
# Date 1321035275 0
# Node ID 0844b17df7a9dd885e98e505f14fc99c1951b483
# Parent 3622d7fae14dfc2d00f378738ace3b65ee65b6cc
Revert c/s 23666:b96f8bdcaa15 KEXEC: disconnect all PCI devices from the PCI
bus on crash
It turns out that this causes all mannor of problems on certain
motherboards (so far with no pattern I can discern)
Problems include:
* Hanging forever checking hlt instruction.
* Panics when trying to change switch root device
* Drivers hanging when trying to check for interrupts.
From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -28,7 +28,6 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <xen/iommu.h>
-#include <xen/pci.h>
#include <asm/hpet.h>
static atomic_t waiting_for_crash_ipi;
@@ -83,8 +82,6 @@ static void nmi_shootdown_cpus(void)
msecs--;
}
- disconnect_pci_devices();
-
/* Crash shutdown any IOMMU functionality as the crashdump kernel is not
* happy when booting if interrupt/dma remapping is still enabled */
iommu_crash_shutdown();
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -518,25 +518,6 @@ int __init scan_pci_devices(void)
return 0;
}
-/* Disconnect all PCI devices from the PCI buses. From the PCI spec:
- * "When a 0 is written to [the COMMAND] register, the device is
- * logically disconnected from the PCI bus for all accesses except
- * configuration accesses. All devices are required to support
- * this base level of functionality."
- */
-void disconnect_pci_devices(void)
-{
- struct pci_dev *pdev;
-
- spin_lock(&pcidevs_lock);
-
- list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
- pci_conf_write16(pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn), PCI_COMMAND, 0);
-
- spin_unlock(&pcidevs_lock);
-}
-
#ifdef SUPPORT_MSI_REMAPPING
static void dump_pci_devices(unsigned char ch)
{
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -94,8 +94,6 @@ int pci_remove_device(u8 bus, u8 devfn);
struct pci_dev *pci_get_pdev(int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);
-void disconnect_pci_devices(void);
-
uint8_t pci_conf_read8(
unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);
uint16_t pci_conf_read16(
++++++ 24138-xenpaging_munmap_all_pages_after_page-in.patch ++++++
changeset: 24138:e2cc58b85b6e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon Nov 14 17:49:14 2011 +0000
files: tools/xenpaging/pagein.c
description:
xenpaging: munmap all pages after page-in
Do munmap() on all mapped pages, not just the first one. Without this
change the gfns backing the remaining pages can not be paged out again
because the page count does not go down to 1. This change was missing
from changeset 23827:d1d6abc1db20.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
tools/xenpaging/pagein.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/xenpaging/pagein.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/pagein.c
+++ xen-4.1.2-testing/tools/xenpaging/pagein.c
@@ -44,7 +44,7 @@ static void *page_in(void *arg)
/* Ignore errors */
page = xc_map_foreign_pages(pia->xch, pia->dom, PROT_READ, gfns, num);
if (page)
- munmap(page, PAGE_SIZE);
+ munmap(page, PAGE_SIZE * num);
}
page_in_possible = 0;
pthread_exit(NULL);
++++++ 24144-cpufreq-turbo-crash.patch ++++++
# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxx>
# Date 1321363478 -3600
# Node ID cd3ef25f207a3925f1f8650c227e24f839da13ad
# Parent 848049b14ec7fbd28bd4cd756e01609698b60c7a
xen: avoid crash enabling turbo mode
On a system which has not had P-state information pushed down into the
hypervisor running "xenpm enable-turbo-mode" will reliably crash the host.
(XEN) PM OP 38 on CPU0
(XEN) ----[ Xen-4.2-unstable x86_64 debug=y Not tainted ]----
(XEN) CPU: 0
(XEN) RIP: e008:[<ffff82c48013ceed>] cpufreq_enable_turbo+0x1d/0x29
(XEN) RFLAGS: 0000000000010297 CONTEXT: hypervisor
(XEN) rax: 0000000000000000 rbx: ffff82c48029fe40 rcx: 0000000000000000
(XEN) rdx: 0000000000000000 rsi: 000000000000000a rdi: 0000000000000000
(XEN) rbp: ffff82c48029fd08 rsp: ffff82c48029fd08 r8: 0000000000000004
(XEN) r9: 0000000000000000 r10: 00000000fffffffd r11: ffff82c480218f20
(XEN) r12: ffff830106e720b0 r13: 0000000000000000 r14: ffff82c4802bff80
(XEN) r15: ffff82c48025c0e4 cr0: 000000008005003b cr4: 00000000000026f0
(XEN) cr3: 000000011f459000 cr2: 0000000000000051
(XEN) ds: 007b es: 007b fs: 00d8 gs: 0033 ss: 0000 cs: e008
(XEN) Xen stack trace from rsp=ffff82c48029fd08:
(XEN) ffff82c48029fdb8 ffff82c48014c427 ffff82c48029fd98 ffff82c48016b2d6
(XEN) ffff82c48029fdb8 ffff82c48029fd60 00000000001196c5 0000000116bbe025
(XEN) 0000000116bbe025 ffff8301196c5338 ffff82f6022d77c0 0000000000000000
(XEN) ffff82f60205edf0 0000000000000000 0000000000000000 ffff8300dffba000
(XEN) ffff82c48029fdb8 ffff82c48029ff18 0000000008050004 0000000000000000
(XEN) ffff82c4802bff80 ffff82c48025c0e4 ffff82c48029fef8 ffff82c480124fd8
(XEN) 0000000000000000 ffff83011f48c000 0000000116bbe025 0000000000000025
(XEN) ffff82c48029fe28 0000000080167722 ffff82c48029ff08 ffff83011f48c000
(XEN) ffff82f60232d8a0 ffff8300dffba000 ffff83011f48c000 ffff82f60232d8a0
(XEN) ffff82c48029fed8 ffff82c48017296a 000000080000000c 0000000000000026
(XEN) bfb338b000000000 bfb33874bfb33868 b78988f800000000 0000008800000000
(XEN) b787a6e0b78533a0 ffffffff00000001 080487aeb7897ff4 bfb3389000000001
(XEN) b7898ab0b7889626 0000000100000000 0000000000000001 0804867800000001
(XEN) 000000000804e998 00000000b78533a0 bfb33e70bfb33a7c 0000000000000000
(XEN) 0000000000000000 ffff8300dffba000 0000000000000000 0000000000000000
(XEN) 0000000000000000 0000000000000000 00007d3b7fd600c7 ffff82c48021511e
(XEN) 00000000c1002467 0000000000000023 0000000000000000 0000000000000000
(XEN) 0000000000000000 0000000000000000 00000000dbf5bef8 0000000008050004
(XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN) 0000000000000023 00000000bfb33874 00000000bfb33868 0000000000000000
(XEN) Xen call trace:
(XEN) [<ffff82c48013ceed>] cpufreq_enable_turbo+0x1d/0x29
(XEN) [<ffff82c48014c427>] do_pm_op+0x884/0x8e7
(XEN) [<ffff82c480124fd8>] do_sysctl+0x6d8/0x9f0
(XEN) [<ffff82c48021511e>] compat_hypercall+0xae/0x107
(XEN)
(XEN) Pagetable walk from 0000000000000051:
(XEN) L4[0x000] = 0000000116dbc027 000000000001bd22
(XEN) L3[0x000] = 0000000119ba8027 000000000001eb36
(XEN) L2[0x000] = 0000000000000000 ffffffffffffffff
(XEN)
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) FATAL PAGE FAULT
(XEN) [error_code=0000]
(XEN) Faulting linear address: 0000000000000051
(XEN) ****************************************
(XEN)
Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/drivers/cpufreq/utility.c
+++ b/xen/drivers/cpufreq/utility.c
@@ -400,7 +400,7 @@ void cpufreq_enable_turbo(int cpuid)
struct cpufreq_policy *policy;
policy = per_cpu(cpufreq_cpu_policy, cpuid);
- if (policy->turbo != CPUFREQ_TURBO_UNSUPPORTED)
+ if (policy && policy->turbo != CPUFREQ_TURBO_UNSUPPORTED)
policy->turbo = CPUFREQ_TURBO_ENABLED;
}
@@ -409,7 +409,7 @@ void cpufreq_disable_turbo(int cpuid)
struct cpufreq_policy *policy;
policy = per_cpu(cpufreq_cpu_policy, cpuid);
- if (policy->turbo != CPUFREQ_TURBO_UNSUPPORTED)
+ if (policy && policy->turbo != CPUFREQ_TURBO_UNSUPPORTED)
policy->turbo = CPUFREQ_TURBO_DISABLED;
}
@@ -418,7 +418,7 @@ int cpufreq_get_turbo_status(int cpuid)
struct cpufreq_policy *policy;
policy = per_cpu(cpufreq_cpu_policy, cpuid);
- return policy->turbo;
+ return policy && policy->turbo;
}
/*********************************************************************
++++++ 24148-shadow-pgt-dying-op-performance.patch ++++++
References: bnc#726332
# HG changeset patch
# User Gianluca Guida <gianluca.guida@xxxxxxxxxx>
# Date 1321456773 0
# Node ID 3ecc8fef428138e4304ef11b47498981e63626b3
# Parent a5f1d3b1612bb48e1cb09dc66b0909e3613dc855
[shadow] Disable higher level pagetables early unshadow only when the "process
dying" hypercall is used.
This patch fixes a performance problem in fully virtualized guests.
Signed-off-by: Gianluca Guida <gianluca.guida@xxxxxxxxxx>
Tested-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -2743,8 +2743,9 @@ static inline void check_for_early_unsha
|| ( !v->domain->arch.paging.shadow.pagetable_dying_op
&& v->arch.paging.shadow.last_emulated_mfn_for_unshadow ==
mfn_x(gmfn) ) )
&& sh_mfn_is_a_page_table(gmfn)
- && !(mfn_to_page(gmfn)->shadow_flags
- & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) )
+ && (!v->domain->arch.paging.shadow.pagetable_dying_op ||
+ !(mfn_to_page(gmfn)->shadow_flags
+ & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64))) )
{
perfc_incr(shadow_early_unshadow);
sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
++++++ 24153-x86-emul-feature-checks.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321459471 0
# Node ID 644ca5d3ec435f3372ce88a4de86909bd4033819
# Parent 1cbb3c1dfb3203f5344a6c1c52507b9e75af6742
x86/emulator: add feature checks for newer instructions
Certain instructions were introduced only after the i686 or original
x86-64 architecture, so we should not try to emulate them if the guest
is not seeing the respective feature enabled (or, worse, if the
underlying hardware doesn't support them). This affects fisttp,
movnti, and cmpxchg16b.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -955,6 +955,47 @@ in_protmode(
return !(in_realmode(ctxt, ops) || (ctxt->regs->eflags & EFLG_VM));
}
+#define EAX 0
+#define ECX 1
+#define EDX 2
+#define EBX 3
+
+static bool_t vcpu_has(
+ unsigned int eax,
+ unsigned int reg,
+ unsigned int bit,
+ struct x86_emulate_ctxt *ctxt,
+ const struct x86_emulate_ops *ops)
+{
+ unsigned int ebx = 0, ecx = 0, edx = 0;
+ int rc;
+
+ fail_if(!ops->cpuid);
+ rc = ops->cpuid(&eax, &ebx, &ecx, &edx, ctxt);
+ if ( rc == X86EMUL_OKAY )
+ {
+ switch ( reg )
+ {
+ case EAX: reg = eax; break;
+ case EBX: reg = ebx; break;
+ case ECX: reg = ecx; break;
+ case EDX: reg = edx; break;
+ default: BUG();
+ }
+ if ( !(reg & (1U << bit)) )
+ rc = ~X86EMUL_OKAY;
+ }
+
+ done:
+ return rc == X86EMUL_OKAY;
+}
+
+#define vcpu_must_have(leaf, reg, bit) \
+ generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
+#define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
+#define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0)
+#define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
+
static int
in_longmode(
struct x86_emulate_ctxt *ctxt,
@@ -2738,6 +2779,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fildl", src.val);
break;
case 1: /* fisttp m32i */
+ vcpu_must_have_sse3();
ea.bytes = 4;
dst = ea;
dst.type = OP_MEM;
@@ -2846,6 +2888,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fldl", src.val);
break;
case 1: /* fisttp m64i */
+ vcpu_must_have_sse3();
ea.bytes = 8;
dst = ea;
dst.type = OP_MEM;
@@ -2953,6 +2996,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fild", src.val);
break;
case 1: /* fisttp m16i */
+ vcpu_must_have_sse3();
ea.bytes = 2;
dst = ea;
dst.type = OP_MEM;
@@ -4141,6 +4185,7 @@ x86_emulate(
case 0xc3: /* movnti */
/* Ignore the non-temporal hint for now. */
+ vcpu_must_have_sse2();
generate_exception_if(dst.bytes <= 2, EXC_UD, -1);
dst.val = src.val;
break;
@@ -4151,6 +4196,8 @@ x86_emulate(
generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+ if ( op_bytes == 8 )
+ vcpu_must_have_cx16();
op_bytes *= 2;
/* Get actual old value. */
++++++ 24155-x86-ioapic-EOI-after-migration.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321604321 -3600
# Node ID 0d50e704834fb53c6c86b8b0badd19d88e73c4ed
# Parent dbdc840f8f62db58321b5009e5e0f7833066386f
x86/IO-APIC: refine EOI-ing of migrating level interrupts
Rather than going through all IO-APICs and calling io_apic_eoi_vector()
for the vector in question, just use eoi_IO_APIC_irq().
This in turn allows to eliminate quite a bit of other code.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Tested-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -69,10 +69,6 @@ int __read_mostly nr_ioapics;
#define ioapic_has_eoi_reg(apic) (mp_ioapics[(apic)].mpc_apicver >= 0x20)
-#define io_apic_eoi_vector(apic, vector) io_apic_eoi((apic), (vector), -1)
-#define io_apic_eoi_pin(apic, pin) io_apic_eoi((apic), -1, (pin))
-
-
/*
* This is performance-critical, we want to do it O(1)
*
@@ -208,21 +204,18 @@ static void ioapic_write_entry(int apic,
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-/* EOI an IO-APIC entry. One of vector or pin may be -1, indicating that
- * it should be worked out using the other. This function expect that the
- * ioapic_lock is taken, and interrupts are disabled (or there is a good reason
- * not to), and that if both pin and vector are passed, that they refer to the
+/* EOI an IO-APIC entry. Vector may be zero, indicating that it should be
+ * worked out using the pin. This function expects that the ioapic_lock is
+ * being held, and interrupts are disabled (or there is a good reason not
+ * to), and that if both pin and vector are passed, that they refer to the
* same redirection entry in the IO-APIC. */
static void __io_apic_eoi(unsigned int apic, unsigned int vector, unsigned int
pin)
{
- /* Ensure some useful information is passed in */
- BUG_ON( (vector == -1 && pin == -1) );
-
/* Prefer the use of the EOI register if available */
if ( ioapic_has_eoi_reg(apic) )
{
/* If vector is unknown, read it from the IO-APIC */
- if ( vector == -1 )
+ if ( !vector )
vector = __ioapic_read_entry(apic, pin, TRUE).vector;
*(IO_APIC_BASE(apic)+16) = vector;
@@ -234,42 +227,6 @@ static void __io_apic_eoi(unsigned int a
struct IO_APIC_route_entry entry;
bool_t need_to_unmask = 0;
- /* If pin is unknown, search for it */
- if ( pin == -1 )
- {
- unsigned int p;
- for ( p = 0; p < nr_ioapic_registers[apic]; ++p )
- {
- entry = __ioapic_read_entry(apic, p, TRUE);
- if ( entry.vector == vector )
- {
- pin = p;
- /* break; */
-
- /* Here should be a break out of the loop, but at the
- * Xen code doesn't actually prevent multiple IO-APIC
- * entries being assigned the same vector, so EOI all
- * pins which have the correct vector.
- *
- * Remove the following code when the above assertion
- * is fulfilled. */
- __io_apic_eoi(apic, vector, p);
- }
- }
-
- /* If search fails, nothing to do */
-
- /* if ( pin == -1 ) */
-
- /* Because the loop wasn't broken out of (see comment above),
- * all relevant pins have been EOI, so we can always return.
- *
- * Re-instate the if statement above when the Xen logic has been
- * fixed.*/
-
- return;
- }
-
entry = __ioapic_read_entry(apic, pin, TRUE);
if ( ! entry.mask )
@@ -296,17 +253,6 @@ static void __io_apic_eoi(unsigned int a
}
}
-/* EOI an IO-APIC entry. One of vector or pin may be -1, indicating that
- * it should be worked out using the other. This function disables interrupts
- * and takes the ioapic_lock */
-static void io_apic_eoi(unsigned int apic, unsigned int vector, unsigned int
pin)
-{
- unsigned int flags;
- spin_lock_irqsave(&ioapic_lock, flags);
- __io_apic_eoi(apic, vector, pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
/*
* Saves all the IO-APIC RTE's
*/
@@ -1830,11 +1776,7 @@ static void end_level_ioapic_irq (unsign
/* Manually EOI the old vector if we are moving to the new */
if ( vector && i != vector )
- {
- int ioapic;
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
- io_apic_eoi_vector(ioapic, i);
- }
+ eoi_IO_APIC_irq(irq);
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
++++++ 24156-x86-ioapic-shared-vectors.patch ++++++
References: bnc#713503
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321604484 -3600
# Node ID f29b5bd6e25fd78409baa5461914c67065f7579f
# Parent 0d50e704834fb53c6c86b8b0badd19d88e73c4ed
x86/IRQ: prevent vector sharing within IO-APICs
Following the prevention of vector sharing for MSIs, this change
enforces the same within IO-APICs: Pin based interrupts use the IO-APIC
as their identifying device under the AMD IOMMU (and just like for
MSIs, only the identifying device is used to remap interrupts here,
with no regard to an interrupt's destination).
Additionally, LAPIC initiated EOIs (for level triggered interrupts) too
use only the vector for identifying which interrupts to end. While this
generally causes no significant problem (at worst an interrupt would be
re-raised without a new interrupt event actually having occurred), it
still seems better to avoid the situation.
For this second aspect, a distinction is being made between the
traditional and the directed-EOI cases: In the former, vectors should
not be shared throughout all IO-APICs in the system, while in the
latter case only individual IO-APICs need to be contrained (or, if the
firmware indicates so, sub- groups of them having the same GSI appear
at multiple pins).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -69,6 +69,34 @@ int __read_mostly nr_ioapics;
#define ioapic_has_eoi_reg(apic) (mp_ioapics[(apic)].mpc_apicver >= 0x20)
+static int apic_pin_2_gsi_irq(int apic, int pin);
+
+static vmask_t *__read_mostly vector_map[MAX_IO_APICS];
+
+static void share_vector_maps(unsigned int src, unsigned int dst)
+{
+ unsigned int pin;
+
+ if (vector_map[src] == vector_map[dst])
+ return;
+
+ bitmap_or(vector_map[src]->_bits, vector_map[src]->_bits,
+ vector_map[dst]->_bits, NR_VECTORS);
+
+ for (pin = 0; pin < nr_ioapic_registers[dst]; ++pin) {
+ int irq = apic_pin_2_gsi_irq(dst, pin);
+ struct irq_cfg *cfg;
+
+ if (irq < 0)
+ continue;
+ cfg = irq_cfg(irq);
+ if (cfg->used_vectors == vector_map[dst])
+ cfg->used_vectors = vector_map[src];
+ }
+
+ vector_map[dst] = vector_map[src];
+}
+
/*
* This is performance-critical, we want to do it O(1)
*
@@ -109,6 +137,7 @@ static void add_pin_to_irq(unsigned int
}
entry->apic = apic;
entry->pin = pin;
+ share_vector_maps(irq_2_pin[irq].apic, apic);
}
/*
@@ -124,6 +153,7 @@ static void __init replace_pin_at_irq(un
if (entry->apic == oldapic && entry->pin == oldpin) {
entry->apic = newapic;
entry->pin = newpin;
+ share_vector_maps(oldapic, newapic);
}
if (!entry->next)
break;
@@ -131,6 +161,16 @@ static void __init replace_pin_at_irq(un
}
}
+vmask_t *io_apic_get_used_vector_map(unsigned int irq)
+{
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ if (entry->pin == -1)
+ return NULL;
+
+ return vector_map[entry->apic];
+}
+
struct IO_APIC_route_entry **alloc_ioapic_entries(void)
{
int apic;
@@ -1314,6 +1354,18 @@ static void __init enable_IO_APIC(void)
for (i = irq_2_pin_free_entry = nr_irqs_gsi; i < PIN_MAP_SIZE; i++)
irq_2_pin[i].next = i + 1;
+ if (directed_eoi_enabled) {
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ vector_map[apic] = xzalloc(vmask_t);
+ BUG_ON(!vector_map[apic]);
+ }
+ } else {
+ vector_map[0] = xzalloc(vmask_t);
+ BUG_ON(!vector_map[0]);
+ for (apic = 1; apic < nr_ioapics; apic++)
+ vector_map[apic] = vector_map[0];
+ }
+
for(apic = 0; apic < nr_ioapics; apic++) {
int pin;
/* See if any of the pins is in ExtINT mode */
@@ -2479,13 +2531,12 @@ int ioapic_guest_write(unsigned long phy
}
if ( cfg->vector <= 0 || cfg->vector > LAST_DYNAMIC_VECTOR ) {
+ add_pin_to_irq(irq, apic, pin);
vector = assign_irq_vector(irq);
if ( vector < 0 )
return vector;
printk(XENLOG_INFO "allocated vector %02x for irq %d\n", vector, irq);
-
- add_pin_to_irq(irq, apic, pin);
}
spin_lock(&pcidevs_lock);
spin_lock(&dom0->event_lock);
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -403,6 +403,11 @@ static vmask_t *irq_get_used_vector_mask
}
}
}
+ else if ( IO_APIC_IRQ(irq) &&
+ opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_NONE )
+ {
+ ret = io_apic_get_used_vector_map(irq);
+ }
return ret;
}
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -113,6 +113,7 @@ void setup_IO_APIC(void);
void disable_IO_APIC(void);
void print_IO_APIC(void);
void setup_ioapic_dest(void);
+vmask_t *io_apic_get_used_vector_map(unsigned int irq);
extern unsigned long io_apic_irqs;
++++++ 24157-x86-xstate-init.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321604565 -3600
# Node ID 7b5e1cb94bfa43a9268479b9a255fb88c07e4ce2
# Parent f29b5bd6e25fd78409baa5461914c67065f7579f
x86/xsave: provide guests with finit-like environment
Without the use of xsave, guests get their initial floating point
environment set up with finit. At least NetWare actually depends on
this (in particular on all exceptions being masked), so to be
consistent set the same environment also when using xsave. This is
also in line with all SSE exceptions getting masked initially.
To avoid further fragile casts in xstate_alloc_save_area() the patch
also changes xsave_struct's fpu_see member to have actually usable
fields.
The patch was tested in its technically identical, but modified-file-
wise different 4.1.2 version.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Tested-by: Charles Arnold <carnold@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -92,11 +92,14 @@ void setup_fpu(struct vcpu *v)
v->fpu_dirtied = 1;
}
+#define FCW_DEFAULT 0x037f
+#define MXCSR_DEFAULT 0x1f80
+
static void init_fpu(void)
{
asm volatile ( "fninit" );
if ( cpu_has_xmm )
- load_mxcsr(0x1f80);
+ load_mxcsr(MXCSR_DEFAULT);
}
void save_init_fpu(struct vcpu *v)
@@ -287,7 +290,7 @@ void xsave_init(void)
int xsave_alloc_save_area(struct vcpu *v)
{
- void *save_area;
+ struct xsave_struct *save_area;
if ( !cpu_has_xsave || is_idle_vcpu(v) )
return 0;
@@ -300,8 +303,9 @@ int xsave_alloc_save_area(struct vcpu *v
return -ENOMEM;
memset(save_area, 0, xsave_cntxt_size);
- ((u32 *)save_area)[6] = 0x1f80; /* MXCSR */
- *(uint64_t *)(save_area + 512) = XSTATE_FP_SSE; /* XSETBV */
+ save_area->fpu_sse.fcw = FCW_DEFAULT;
+ save_area->fpu_sse.mxcsr = MXCSR_DEFAULT;
+ save_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
v->arch.xsave_area = save_area;
v->arch.xcr0 = XSTATE_FP_SSE;
--- a/xen/include/asm-x86/i387.h
+++ b/xen/include/asm-x86/i387.h
@@ -37,7 +37,29 @@ bool_t xsave_enabled(const struct vcpu *
struct xsave_struct
{
- struct { char x[512]; } fpu_sse; /* FPU/MMX, SSE */
+ union { /* FPU/MMX, SSE */
+ char x[512];
+ struct {
+ uint16_t fcw;
+ uint16_t fsw;
+ uint8_t ftw;
+ uint8_t rsvd1;
+ uint16_t fop;
+ union {
+#ifdef __x86_64__
+ uint64_t addr;
+#endif
+ struct {
+ uint32_t offs;
+ uint16_t sel;
+ uint16_t rsvd;
+ };
+ } fip, fdp;
+ uint32_t mxcsr;
+ uint32_t mxcsr_mask;
+ /* data registers follow here */
+ };
+ } fpu_sse;
struct {
u64 xstate_bv;
++++++ 24168-x86-vioapic-clear-remote_irr.patch ++++++
References: bnc#694863
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321864171 -3600
# Node ID 9c350ab8d3ea64866421de756ab2bf3daaf63187
# Parent 335e8273a3f34a5e2972643a028f83684609f1c1
x86/vioapic: clear remote IRR when switching RTE to edge triggered mode
Xen itself (as much as Linux) relies on this behavior, so it should
also emulate it properly. Not doing so reportedly gets in the way of
kexec inside a HVM guest.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Tested-by: Olaf Hering <olaf@xxxxxxxxx>
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -154,8 +154,9 @@ static void vioapic_write_redirent(
{
vlapic_adjust_i8259_target(d);
}
- else if ( (ent.fields.trig_mode == VIOAPIC_LEVEL_TRIG) &&
- !ent.fields.mask &&
+ else if ( ent.fields.trig_mode == VIOAPIC_EDGE_TRIG )
+ pent->fields.remote_irr = 0;
+ else if ( !ent.fields.mask &&
!ent.fields.remote_irr &&
hvm_irq->gsi_assert_count[idx] )
{
++++++ 24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch ++++++
changeset: 24171:fe80909663c1
user: Keir Fraser <keir@xxxxxxx>
date: Tue Nov 22 13:00:21 2011 +0000
files: xen/common/wait.c
description:
x86,waitqueue: Allocate whole page for shadow stack.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -33,7 +33,7 @@ struct waitqueue_vcpu {
* hypervisor context before sleeping (descheduling), setjmp/longjmp-style.
*/
void *esp;
- char stack[3000];
+ char *stack;
#endif
};
@@ -41,11 +41,19 @@ int init_waitqueue_vcpu(struct vcpu *v)
{
struct waitqueue_vcpu *wqv;
- wqv = xmalloc(struct waitqueue_vcpu);
+ wqv = xzalloc(struct waitqueue_vcpu);
if ( wqv == NULL )
return -ENOMEM;
- memset(wqv, 0, sizeof(*wqv));
+#ifdef CONFIG_X86
+ wqv->stack = alloc_xenheap_page();
+ if ( wqv->stack == NULL )
+ {
+ xfree(wqv);
+ return -ENOMEM;
+ }
+#endif
+
INIT_LIST_HEAD(&wqv->list);
wqv->vcpu = v;
@@ -63,6 +71,9 @@ void destroy_waitqueue_vcpu(struct vcpu
return;
BUG_ON(!list_empty(&wqv->list));
+#ifdef CONFIG_X86
+ free_xenheap_page(wqv->stack);
+#endif
xfree(wqv);
v->waitqueue_vcpu = NULL;
@@ -115,7 +126,7 @@ static void __prepare_to_wait(struct wai
: "=S" (wqv->esp)
: "c" (cpu_info), "D" (wqv->stack)
: "memory" );
- BUG_ON((cpu_info - (char *)wqv->esp) > sizeof(wqv->stack));
+ BUG_ON((cpu_info - (char *)wqv->esp) > PAGE_SIZE);
}
static void __finish_wait(struct waitqueue_vcpu *wqv)
++++++ 24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch ++++++
changeset: 24178:1f2a06dbbb69
user: Keir Fraser <keir@xxxxxxx>
date: Tue Nov 22 15:35:26 2011 +0000
files: xen/common/keyhandler.c
description:
debug: Add domain/vcpu pause_count info to 'd' key.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/keyhandler.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/xen/common/keyhandler.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/keyhandler.c
+++ xen-4.1.2-testing/xen/common/keyhandler.c
@@ -244,9 +244,10 @@ static void dump_domains(unsigned char k
unsigned int i;
printk("General information for domain %u:\n", d->domain_id);
cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask);
- printk(" refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d "
- "dirty_cpus=%s max_pages=%u\n",
+ printk(" refcnt=%d dying=%d pause_count=%d\n",
atomic_read(&d->refcnt), d->is_dying,
+ atomic_read(&d->pause_count));
+ printk(" nr_pages=%d xenheap_pages=%d dirty_cpus=%s max_pages=%u\n",
d->tot_pages, d->xenheap_pages, tmpstr, d->max_pages);
printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
"%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n",
@@ -270,17 +271,18 @@ static void dump_domains(unsigned char k
d->domain_id);
for_each_vcpu ( d, v )
{
- printk(" VCPU%d: CPU%d [has=%c] flags=%lx poll=%d "
+ printk(" VCPU%d: CPU%d [has=%c] poll=%d "
"upcall_pend = %02x, upcall_mask = %02x ",
v->vcpu_id, v->processor,
- v->is_running ? 'T':'F',
- v->pause_flags, v->poll_evtchn,
+ v->is_running ? 'T':'F', v->poll_evtchn,
vcpu_info(v, evtchn_upcall_pending),
vcpu_info(v, evtchn_upcall_mask));
cpuset_print(tmpstr, sizeof(tmpstr), v->vcpu_dirty_cpumask);
printk("dirty_cpus=%s ", tmpstr);
cpuset_print(tmpstr, sizeof(tmpstr), v->cpu_affinity);
printk("cpu_affinity=%s\n", tmpstr);
+ printk(" pause_count=%d pause_flags=%lx\n",
+ atomic_read(&v->pause_count), v->pause_flags);
arch_dump_vcpu_info(v);
periodic_timer_print(tmpstr, sizeof(tmpstr), v->periodic_period);
printk(" %s\n", tmpstr);
++++++ 24189-x86-p2m-pod-locking.patch ++++++
# HG changeset patch
# User Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
# Date 1322148057 0
# Node ID 7da681c490e0a8a2b3f1fb311d254dc7ce618a43
# Parent b082fdc52ad7607d93b59148fb289aafe21f294b
x86/mm/p2m: fix pod locking
The path p2m-lookup -> p2m-pt->get_entry -> 1GB PoD superpage ->
pod_demand_populate ends in the pod code performing a p2m_set_entry with
no locks held (in order to split the 1GB superpage into 512 2MB ones)
Further, it calls p2m_unlock after that, which will break the spinlock.
This patch attempts to fix that.
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1244,7 +1244,6 @@ p2m_pod_demand_populate(struct p2m_domai
set_p2m_entry(p2m, gfn_aligned, _mfn(POPULATE_ON_DEMAND_MFN), 9,
p2m_populate_on_demand, p2m->default_access);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
return 0;
}
@@ -1602,7 +1601,8 @@ pod_retry_l3:
{
if ( q != p2m_query )
{
- if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) )
+ if ( !p2m_pod_check_and_populate(p2m, gfn,
+ (l1_pgentry_t *) &l3e, 18, q) )
goto pod_retry_l3;
}
else
@@ -1733,7 +1733,8 @@ static mfn_t p2m_gfn_to_mfn_current(stru
/* The read has succeeded, so we know that mapping exists */
if ( q != p2m_query )
{
- if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) )
+ if ( !p2m_pod_check_and_populate(p2m, gfn,
+ (l1_pgentry_t *) &l3e, 18, q) )
goto pod_retry_l3;
p2mt = p2m_invalid;
printk("%s: Allocate 1GB failed!\n", __func__);
++++++ 24190-hap-log-dirty-disable-rc.patch ++++++
# HG changeset patch
# User Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
# Date 1322148057 0
# Node ID 6b3d8250ee2c63d90680c142549123a4b1559f55
# Parent 7da681c490e0a8a2b3f1fb311d254dc7ce618a43
x86/mm: change return code for log-dirty disabling
Disabling log dirty mode in HAP always returns -EINVAL. Make it
return the correct rc on success.
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Signed-off-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -710,6 +710,8 @@ int hap_domctl(struct domain *d, xen_dom
return rc;
case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
sc->mb = hap_get_allocation(d);
+ /* Fall through... */
+ case XEN_DOMCTL_SHADOW_OP_OFF:
return 0;
default:
HAP_ERROR("Bad hap domctl op %u\n", sc->op);
++++++ 24193-hap-track-dirty-vram-rc.patch ++++++
# HG changeset patch
# User Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
# Date 1322149491 0
# Node ID 67d2ac426defedad9c10eb339019f9dc9f02d2ae
# Parent 3c864e04c2ad060ca1cac6579994777993fef6e6
Trivial fix for rc val in hap track dirty vram
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -143,7 +143,7 @@ int hap_track_dirty_vram(struct domain *
}
else if ( !paging_mode_log_dirty(d) && !dirty_vram )
{
- rc -ENOMEM;
+ rc = -ENOMEM;
if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
goto param_fail;
++++++ 24195-waitqueue_Detect_saved-stack_overflow_and_crash_the_guest..patch
++++++
changeset: 24195:9b65336f688f
user: Keir Fraser <keir@xxxxxxx>
date: Thu Nov 24 15:48:10 2011 +0000
files: xen/common/wait.c
description:
waitqueue: Detect saved-stack overflow and crash the guest.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -106,13 +106,16 @@ void wake_up(struct waitqueue_head *wq)
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
{
char *cpu_info = (char *)get_cpu_info();
+
asm volatile (
#ifdef CONFIG_X86_64
"push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
"push %%rbp; push %%r8; push %%r9; push %%r10; push %%r11; "
"push %%r12; push %%r13; push %%r14; push %%r15; call 1f; "
"1: mov 80(%%rsp),%%rdi; mov 96(%%rsp),%%rcx; mov %%rsp,%%rsi; "
- "sub %%rsi,%%rcx; rep movsb; mov %%rsp,%%rsi; pop %%rax; "
+ "sub %%rsi,%%rcx; cmp %3,%%rcx; jbe 2f; "
+ "xor %%esi,%%esi; jmp 3f; "
+ "2: rep movsb; mov %%rsp,%%rsi; 3: pop %%rax; "
"pop %%r15; pop %%r14; pop %%r13; pop %%r12; "
"pop %%r11; pop %%r10; pop %%r9; pop %%r8; "
"pop %%rbp; pop %%rdi; pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax"
@@ -120,13 +123,20 @@ static void __prepare_to_wait(struct wai
"push %%eax; push %%ebx; push %%ecx; push %%edx; push %%edi; "
"push %%ebp; call 1f; "
"1: mov 8(%%esp),%%edi; mov 16(%%esp),%%ecx; mov %%esp,%%esi; "
- "sub %%esi,%%ecx; rep movsb; mov %%esp,%%esi; pop %%eax; "
+ "sub %%esi,%%ecx; cmp %3,%%ecx; jbe 2f; "
+ "xor %%esi,%%esi; jmp 3f; "
+ "2: rep movsb; mov %%esp,%%esi; 3: pop %%eax; "
"pop %%ebp; pop %%edi; pop %%edx; pop %%ecx; pop %%ebx; pop %%eax"
#endif
: "=S" (wqv->esp)
- : "c" (cpu_info), "D" (wqv->stack)
+ : "c" (cpu_info), "D" (wqv->stack), "i" (PAGE_SIZE)
: "memory" );
- BUG_ON((cpu_info - (char *)wqv->esp) > PAGE_SIZE);
+
+ if ( unlikely(wqv->esp == 0) )
+ {
+ gdprintk(XENLOG_ERR, "Stack too large in %s\n", __FUNCTION__);
+ domain_crash_synchronous();
+ }
}
static void __finish_wait(struct waitqueue_vcpu *wqv)
@@ -162,6 +172,7 @@ void prepare_to_wait(struct waitqueue_he
struct vcpu *curr = current;
struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
+ ASSERT(!in_atomic());
ASSERT(list_empty(&wqv->list));
spin_lock(&wq->lock);
++++++
24196-waitqueue_Reorder_prepare_to_wait_so_that_vcpu_is_definitely_on_the.patch
++++++
changeset: 24196:de4fe05fe887
user: Keir Fraser <keir@xxxxxxx>
date: Thu Nov 24 15:49:25 2011 +0000
files: xen/common/wait.c
description:
waitqueue: Reorder prepare_to_wait() so that vcpu is definitely on the
queue on exit, even after a wakeup.
Otherwise, when we go round the loop in wait_event(), we may not
actually sleep after the first iteration, as we do not put ourselves
back on the queue on wakeup.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -107,6 +107,8 @@ static void __prepare_to_wait(struct wai
{
char *cpu_info = (char *)get_cpu_info();
+ ASSERT(wqv->esp == 0);
+
asm volatile (
#ifdef CONFIG_X86_64
"push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
@@ -173,14 +175,13 @@ void prepare_to_wait(struct waitqueue_he
struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
ASSERT(!in_atomic());
- ASSERT(list_empty(&wqv->list));
+ __prepare_to_wait(wqv);
+ ASSERT(list_empty(&wqv->list));
spin_lock(&wq->lock);
list_add_tail(&wqv->list, &wq->list);
vcpu_pause_nosync(curr);
spin_unlock(&wq->lock);
-
- __prepare_to_wait(wqv);
}
void finish_wait(struct waitqueue_head *wq)
++++++
24197-x86-waitqueue_Because_we_have_per-cpu_stacks_we_must_wake_up_on_teh.patch
++++++
changeset: 24197:ca92c4a8b31f
user: Keir Fraser <keir@xxxxxxx>
date: Thu Nov 24 15:50:08 2011 +0000
files: xen/common/wait.c
description:
x86/waitqueue: Because we have per-cpu stacks, we must wake up on teh
same cpu that we slept on. Otherwise stack references are bogus on
wakeup.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -34,6 +34,8 @@ struct waitqueue_vcpu {
*/
void *esp;
char *stack;
+ cpumask_t saved_affinity;
+ unsigned int wakeup_cpu;
#endif
};
@@ -106,9 +108,19 @@ void wake_up(struct waitqueue_head *wq)
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
{
char *cpu_info = (char *)get_cpu_info();
+ struct vcpu *curr = current;
ASSERT(wqv->esp == 0);
+ /* Save current VCPU affinity; force wakeup on *this* CPU only. */
+ wqv->wakeup_cpu = smp_processor_id();
+ wqv->saved_affinity = curr->cpu_affinity;
+ if ( vcpu_set_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
+ {
+ gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
+ domain_crash_synchronous();
+ }
+
asm volatile (
#ifdef CONFIG_X86_64
"push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
@@ -144,6 +156,7 @@ static void __prepare_to_wait(struct wai
static void __finish_wait(struct waitqueue_vcpu *wqv)
{
wqv->esp = NULL;
+ (void)vcpu_set_affinity(current, &wqv->saved_affinity);
}
void check_wakeup_from_wait(void)
@@ -155,6 +168,20 @@ void check_wakeup_from_wait(void)
if ( likely(wqv->esp == NULL) )
return;
+ /* Check if we woke up on the wrong CPU. */
+ if ( unlikely(smp_processor_id() != wqv->wakeup_cpu) )
+ {
+ /* Re-set VCPU affinity and re-enter the scheduler. */
+ struct vcpu *curr = current;
+ wqv->saved_affinity = curr->cpu_affinity;
+ if ( vcpu_set_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
+ {
+ gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
+ domain_crash_synchronous();
+ }
+ wait(); /* takes us back into the scheduler */
+ }
+
asm volatile (
"mov %1,%%"__OP"sp; rep movsb; jmp *(%%"__OP"sp)"
: : "S" (wqv->stack), "D" (wqv->esp),
++++++ 24201-x86-pcpu-platform-op.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1322153786 -3600
# Node ID 9c6bea25f71233787a36893deaf0e811f2dcb8d8
# Parent 480531cab3f4468b1ec9b549bc84d66e420ce685
x86: small fixes to pcpu platform op handling
XENPF_get_cpuinfo should init the flags output field rather than only
modify it.
XENPF_cpu_online must check for the input CPU number to be in range.
XENPF_cpu_offline must also do that, and should also reject attempts to
offline CPU 0 (this fails in cpu_down() too, but preventing this here
appears more correct given that the code here calls
continue_hypercall_on_cpu(0, ...), which would be flawed if cpu_down()
would ever allow bringing down CPU 0 (and a distinct error code is
easier to deal with when debugging issues).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -449,13 +449,14 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
if ( (g_info->xen_cpuid >= NR_CPUS) ||
!cpu_present(g_info->xen_cpuid) )
{
- g_info->flags |= XEN_PCPU_FLAGS_INVALID;
+ g_info->flags = XEN_PCPU_FLAGS_INVALID;
}
else
{
g_info->apic_id = x86_cpu_to_apicid[g_info->xen_cpuid];
g_info->acpi_id = acpi_get_processor_id(g_info->xen_cpuid);
ASSERT(g_info->apic_id != BAD_APICID);
+ g_info->flags = 0;
if (cpu_online(g_info->xen_cpuid))
g_info->flags |= XEN_PCPU_FLAGS_ONLINE;
}
@@ -472,7 +473,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
{
int cpu = op->u.cpu_ol.cpuid;
- if ( !cpu_present(cpu) )
+ if ( cpu >= NR_CPUS || !cpu_present(cpu) )
{
ret = -EINVAL;
break;
@@ -493,7 +494,13 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
{
int cpu = op->u.cpu_ol.cpuid;
- if ( !cpu_present(cpu) )
+ if ( cpu == 0 )
+ {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ if ( cpu >= NR_CPUS || !cpu_present(cpu) )
{
ret = -EINVAL;
break;
++++++ 24208-xenpaging_remove_filename_from_comment.patch ++++++
changeset: 24208:31fce41fc2b2
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:01:15 2011 +0100
files: tools/xenpaging/file_ops.c tools/xenpaging/policy_default.c
tools/xenpaging/xenpaging.c
description:
xenpaging: remove filename from comment
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/file_ops.c | 1 -
tools/xenpaging/policy_default.c | 1 -
tools/xenpaging/xenpaging.c | 1 -
3 files changed, 3 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/file_ops.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/file_ops.c
+++ xen-4.1.2-testing/tools/xenpaging/file_ops.c
@@ -1,5 +1,4 @@
/******************************************************************************
- * tools/xenpaging/file_ops.c
*
* Common file operations.
*
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -1,5 +1,4 @@
/******************************************************************************
- * tools/xenpaging/policy.c
*
* Xen domain paging default policy.
*
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -1,5 +1,4 @@
/******************************************************************************
- * tools/xenpaging/xenpaging.c
*
* Domain paging.
* Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
++++++ 24209-xenpaging_remove_obsolete_comment_in_resume_path.patch ++++++
changeset: 24209:fe8946916512
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:01:20 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: remove obsolete comment in resume path
Remove stale comment.
If a page was populated several times the vcpu is paused and
xenpaging has to unpause it again.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 1 -
1 file changed, 1 deletion(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -744,7 +744,6 @@ int main(int argc, char *argv[])
!!(req.flags & MEM_EVENT_FLAG_EVICT_FAIL) );
/* Tell Xen to resume the vcpu */
- /* XXX: Maybe just check if the vcpu was paused? */
if ( req.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
{
/* Prepare the response */
++++++ 24210-xenpaging_use_PERROR_to_print_errno.patch ++++++
changeset: 24210:d1d54cdc4a7b
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:01:32 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: use PERROR to print errno
v3:
- adjust arguments for xc_mem_paging_enable() failures
v2:
- move changes to file_op() to different patch
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 58 ++++++++++++++++++++++----------------------
1 file changed, 29 insertions(+), 29 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -90,7 +90,7 @@ static int xenpaging_wait_for_event_or_t
if (errno == EINTR)
return 0;
- ERROR("Poll exited with an error");
+ PERROR("Poll exited with an error");
return -errno;
}
@@ -121,7 +121,7 @@ static int xenpaging_wait_for_event_or_t
port = xc_evtchn_pending(xce);
if ( port == -1 )
{
- ERROR("Failed to read port from event channel");
+ PERROR("Failed to read port from event channel");
rc = -1;
goto err;
}
@@ -129,7 +129,7 @@ static int xenpaging_wait_for_event_or_t
rc = xc_evtchn_unmask(xce, port);
if ( rc < 0 )
{
- ERROR("Failed to unmask event channel port");
+ PERROR("Failed to unmask event channel port");
}
}
err:
@@ -185,7 +185,7 @@ static xenpaging_t *xenpaging_init(domid
paging->xs_handle = xs_open(0);
if ( paging->xs_handle == NULL )
{
- ERROR("Error initialising xenstore connection");
+ PERROR("Error initialising xenstore connection");
goto err;
}
@@ -193,7 +193,7 @@ static xenpaging_t *xenpaging_init(domid
snprintf(watch_token, sizeof(watch_token), "%u", domain_id);
if ( xs_watch(paging->xs_handle, "@releaseDomain", watch_token) == false )
{
- ERROR("Could not bind to shutdown watch\n");
+ PERROR("Could not bind to shutdown watch\n");
goto err;
}
@@ -214,7 +214,7 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.shared_page = init_page();
if ( paging->mem_event.shared_page == NULL )
{
- ERROR("Error initialising shared page");
+ PERROR("Error initialising shared page");
goto err;
}
@@ -222,7 +222,7 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.ring_page = init_page();
if ( paging->mem_event.ring_page == NULL )
{
- ERROR("Error initialising ring page");
+ PERROR("Error initialising ring page");
goto err;
}
@@ -249,7 +249,7 @@ static xenpaging_t *xenpaging_init(domid
ERROR("xenpaging not supported in a PoD guest");
break;
default:
- ERROR("Error initialising shared page: %s", strerror(errno));
+ PERROR("Error initialising shared page");
break;
}
goto err;
@@ -259,7 +259,7 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.xce_handle = xc_evtchn_open(NULL, 0);
if ( paging->mem_event.xce_handle == NULL )
{
- ERROR("Failed to open event channel");
+ PERROR("Failed to open event channel");
goto err;
}
@@ -269,7 +269,7 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.shared_page->port);
if ( rc < 0 )
{
- ERROR("Failed to bind event channel");
+ PERROR("Failed to bind event channel");
goto err;
}
@@ -279,7 +279,7 @@ static xenpaging_t *xenpaging_init(domid
paging->domain_info = malloc(sizeof(xc_domaininfo_t));
if ( paging->domain_info == NULL )
{
- ERROR("Error allocating memory for domain info");
+ PERROR("Error allocating memory for domain info");
goto err;
}
@@ -287,7 +287,7 @@ static xenpaging_t *xenpaging_init(domid
paging->domain_info);
if ( rc != 1 )
{
- ERROR("Error getting domain info");
+ PERROR("Error getting domain info");
goto err;
}
@@ -295,7 +295,7 @@ static xenpaging_t *xenpaging_init(domid
paging->bitmap = bitmap_alloc(paging->domain_info->max_pages);
if ( !paging->bitmap )
{
- ERROR("Error allocating bitmap");
+ PERROR("Error allocating bitmap");
goto err;
}
DPRINTF("max_pages = %"PRIx64"\n", paging->domain_info->max_pages);
@@ -311,7 +311,7 @@ static xenpaging_t *xenpaging_init(domid
rc = policy_init(paging);
if ( rc != 0 )
{
- ERROR("Error initialising policy");
+ PERROR("Error initialising policy");
goto err;
}
@@ -358,14 +358,14 @@ static int xenpaging_teardown(xenpaging_
rc = xc_mem_paging_disable(xch, paging->mem_event.domain_id);
if ( rc != 0 )
{
- ERROR("Error tearing down domain paging in xen");
+ PERROR("Error tearing down domain paging in xen");
}
/* Unbind VIRQ */
rc = xc_evtchn_unbind(paging->mem_event.xce_handle,
paging->mem_event.port);
if ( rc != 0 )
{
- ERROR("Error unbinding event port");
+ PERROR("Error unbinding event port");
}
paging->mem_event.port = -1;
@@ -373,7 +373,7 @@ static int xenpaging_teardown(xenpaging_
rc = xc_evtchn_close(paging->mem_event.xce_handle);
if ( rc != 0 )
{
- ERROR("Error closing event channel");
+ PERROR("Error closing event channel");
}
paging->mem_event.xce_handle = NULL;
@@ -384,7 +384,7 @@ static int xenpaging_teardown(xenpaging_
rc = xc_interface_close(xch);
if ( rc != 0 )
{
- ERROR("Error closing connection to xen");
+ PERROR("Error closing connection to xen");
}
return 0;
@@ -444,7 +444,7 @@ static int xenpaging_evict_page(xenpagin
PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
- ERROR("Error mapping page");
+ PERROR("Error mapping page");
goto out;
}
@@ -452,8 +452,8 @@ static int xenpaging_evict_page(xenpagin
ret = write_page(fd, page, i);
if ( ret != 0 )
{
+ PERROR("Error copying page");
munmap(page, PAGE_SIZE);
- ERROR("Error copying page");
goto out;
}
@@ -464,7 +464,7 @@ static int xenpaging_evict_page(xenpagin
victim->gfn);
if ( ret != 0 )
{
- ERROR("Error evicting page");
+ PERROR("Error evicting page");
goto out;
}
@@ -520,7 +520,7 @@ static int xenpaging_populate_page(xenpa
sleep(1);
continue;
}
- ERROR("Error preparing for page in");
+ PERROR("Error preparing for page in");
goto out_map;
}
}
@@ -532,7 +532,7 @@ static int xenpaging_populate_page(xenpa
PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
- ERROR("Error mapping page: page is null");
+ PERROR("Error mapping page: page is null");
goto out_map;
}
@@ -540,7 +540,7 @@ static int xenpaging_populate_page(xenpa
ret = read_page(fd, page, i);
if ( ret != 0 )
{
- ERROR("Error reading page");
+ PERROR("Error reading page");
goto out;
}
@@ -579,7 +579,7 @@ static int evict_victim(xenpaging_t *pag
{
if ( j++ % 1000 == 0 )
if ( xenpaging_mem_paging_flush_ioemu_cache(paging) )
- ERROR("Error flushing ioemu cache");
+ PERROR("Error flushing ioemu cache");
}
}
while ( ret );
@@ -670,7 +670,7 @@ int main(int argc, char *argv[])
rc = xenpaging_wait_for_event_or_timeout(paging);
if ( rc < 0 )
{
- ERROR("Error getting event");
+ PERROR("Error getting event");
goto out;
}
else if ( rc != 0 )
@@ -710,7 +710,7 @@ int main(int argc, char *argv[])
rc = xenpaging_populate_page(paging, req.gfn, fd, i);
if ( rc != 0 )
{
- ERROR("Error populating page");
+ PERROR("Error populating page");
goto out;
}
}
@@ -723,7 +723,7 @@ int main(int argc, char *argv[])
rc = xenpaging_resume_page(paging, &rsp, 1);
if ( rc != 0 )
{
- ERROR("Error resuming page");
+ PERROR("Error resuming page");
goto out;
}
@@ -754,7 +754,7 @@ int main(int argc, char *argv[])
rc = xenpaging_resume_page(paging, &rsp, 0);
if ( rc != 0 )
{
- ERROR("Error resuming");
+ PERROR("Error resuming");
goto out;
}
}
++++++ 24211-xenpaging_simplify_file_op.patch ++++++
changeset: 24211:8ddac056a89e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:01:39 2011 +0100
files: tools/xenpaging/file_ops.c
description:
xenpaging: simplify file_op
Catch lseek() errors.
Use -1 as return value and let caller read errno.
Remove const casts from buffer pointers, the page is writeable.
Use wrapper for write() which matches the read() prototype.
Remove unused stdarg.h inclusion.
Remove unused macro.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/file_ops.c | 29 +++++++++--------------------
1 file changed, 9 insertions(+), 20 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/file_ops.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/file_ops.c
+++ xen-4.1.2-testing/tools/xenpaging/file_ops.c
@@ -21,55 +21,44 @@
#include <unistd.h>
-#include <stdarg.h>
#include <xc_private.h>
-
-#define page_offset(_pfn) (((off_t)(_pfn)) << PAGE_SHIFT)
-
-
static int file_op(int fd, void *page, int i,
- ssize_t (*fn)(int, const void *, size_t))
+ ssize_t (*fn)(int, void *, size_t))
{
off_t seek_ret;
- int total;
+ int total = 0;
int bytes;
- int ret;
seek_ret = lseek(fd, i << PAGE_SHIFT, SEEK_SET);
+ if ( seek_ret == (off_t)-1 )
+ return -1;
- total = 0;
while ( total < PAGE_SIZE )
{
bytes = fn(fd, page + total, PAGE_SIZE - total);
if ( bytes <= 0 )
- {
- ret = -errno;
- goto err;
- }
+ return -1;
total += bytes;
}
return 0;
-
- err:
- return ret;
}
-static ssize_t my_read(int fd, const void *buf, size_t count)
+static ssize_t my_write(int fd, void *buf, size_t count)
{
- return read(fd, (void *)buf, count);
+ return write(fd, buf, count);
}
int read_page(int fd, void *page, int i)
{
- return file_op(fd, page, i, &my_read);
+ return file_op(fd, page, i, &read);
}
int write_page(int fd, void *page, int i)
{
- return file_op(fd, page, i, &write);
+ return file_op(fd, page, i, &my_write);
}
++++++ 24212-xenpaging_print_gfn_in_failure_case.patch ++++++
changeset: 24212:fbc31627adde
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:01:41 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: print gfn in failure case
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -444,7 +444,7 @@ static int xenpaging_evict_page(xenpagin
PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
- PERROR("Error mapping page");
+ PERROR("Error mapping page %lx", victim->gfn);
goto out;
}
@@ -452,7 +452,7 @@ static int xenpaging_evict_page(xenpagin
ret = write_page(fd, page, i);
if ( ret != 0 )
{
- PERROR("Error copying page");
+ PERROR("Error copying page %lx", victim->gfn);
munmap(page, PAGE_SIZE);
goto out;
}
@@ -464,7 +464,7 @@ static int xenpaging_evict_page(xenpagin
victim->gfn);
if ( ret != 0 )
{
- PERROR("Error evicting page");
+ PERROR("Error evicting page %lx", victim->gfn);
goto out;
}
@@ -520,7 +520,7 @@ static int xenpaging_populate_page(xenpa
sleep(1);
continue;
}
- PERROR("Error preparing for page in");
+ PERROR("Error preparing %"PRI_xen_pfn" for page-in", gfn);
goto out_map;
}
}
@@ -532,7 +532,7 @@ static int xenpaging_populate_page(xenpa
PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
- PERROR("Error mapping page: page is null");
+ PERROR("Error mapping page %"PRI_xen_pfn": page is null", gfn);
goto out_map;
}
@@ -540,7 +540,7 @@ static int xenpaging_populate_page(xenpa
ret = read_page(fd, page, i);
if ( ret != 0 )
{
- PERROR("Error reading page");
+ PERROR("Error reading page %"PRI_xen_pfn"", gfn);
goto out;
}
@@ -710,7 +710,7 @@ int main(int argc, char *argv[])
rc = xenpaging_populate_page(paging, req.gfn, fd, i);
if ( rc != 0 )
{
- PERROR("Error populating page");
+ PERROR("Error populating page %"PRIx64"", req.gfn);
goto out;
}
}
@@ -723,7 +723,7 @@ int main(int argc, char *argv[])
rc = xenpaging_resume_page(paging, &rsp, 1);
if ( rc != 0 )
{
- PERROR("Error resuming page");
+ PERROR("Error resuming page %"PRIx64"", req.gfn);
goto out;
}
@@ -754,7 +754,7 @@ int main(int argc, char *argv[])
rc = xenpaging_resume_page(paging, &rsp, 0);
if ( rc != 0 )
{
- PERROR("Error resuming");
+ PERROR("Error resuming page %"PRIx64"", req.gfn);
goto out;
}
}
++++++ 24213-xenpaging_update_xenpaging_init.patch ++++++
changeset: 24213:e3cbbad72382
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:22 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: update xenpaging_init
Move comment about xc_handle to the right place.
Allocate paging early and use calloc.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -169,18 +169,21 @@ static xenpaging_t *xenpaging_init(domid
char *p;
int rc;
+ /* Allocate memory */
+ paging = calloc(1, sizeof(xenpaging_t));
+ if ( !paging )
+ goto err;
+
if ( getenv("XENPAGING_DEBUG") )
dbg = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr,
XTL_DEBUG, 0);
- xch = xc_interface_open(dbg, NULL, 0);
+
+ /* Open connection to xen */
+ paging->xc_handle = xch = xc_interface_open(dbg, NULL, 0);
if ( !xch )
- goto err_iface;
+ goto err;
DPRINTF("xenpaging init\n");
- /* Allocate memory */
- paging = malloc(sizeof(xenpaging_t));
- memset(paging, 0, sizeof(xenpaging_t));
-
/* Open connection to xenstore */
paging->xs_handle = xs_open(0);
if ( paging->xs_handle == NULL )
@@ -204,9 +207,6 @@ static xenpaging_t *xenpaging_init(domid
DPRINTF("Setting policy mru_size to %d\n", paging->policy_mru_size);
}
- /* Open connection to xen */
- paging->xc_handle = xch;
-
/* Set domain id */
paging->mem_event.domain_id = domain_id;
@@ -322,7 +322,8 @@ static xenpaging_t *xenpaging_init(domid
{
if ( paging->xs_handle )
xs_close(paging->xs_handle);
- xc_interface_close(xch);
+ if ( xch )
+ xc_interface_close(xch);
if ( paging->mem_event.shared_page )
{
munlock(paging->mem_event.shared_page, PAGE_SIZE);
@@ -340,7 +341,6 @@ static xenpaging_t *xenpaging_init(domid
free(paging);
}
- err_iface:
return NULL;
}
++++++ 24214-xenpaging_remove_xc_dominfo_t_from_paging_t.patch ++++++
changeset: 24214:f06595abfa88
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:36 2011 +0100
files: tools/xenpaging/policy_default.c tools/xenpaging/xenpaging.c
tools/xenpaging/xenpaging.h
description:
xenpaging: remove xc_dominfo_t from paging_t
Remove xc_dominfo_t from paging_t, record only max_pages.
This value is used to setup internal data structures.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy_default.c | 8 ++++----
tools/xenpaging/xenpaging.c | 27 +++++++++++----------------
tools/xenpaging/xenpaging.h | 4 ++--
3 files changed, 17 insertions(+), 22 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -41,17 +41,17 @@ int policy_init(xenpaging_t *paging)
int i;
int rc = -ENOMEM;
+ max_pages = paging->max_pages;
+
/* Allocate bitmap for pages not to page out */
- bitmap = bitmap_alloc(paging->domain_info->max_pages);
+ bitmap = bitmap_alloc(max_pages);
if ( !bitmap )
goto out;
/* Allocate bitmap to track unusable pages */
- unconsumed = bitmap_alloc(paging->domain_info->max_pages);
+ unconsumed = bitmap_alloc(max_pages);
if ( !unconsumed )
goto out;
- max_pages = paging->domain_info->max_pages;
-
/* Initialise MRU list of paged in pages */
if ( paging->policy_mru_size > 0 )
mru_size = paging->policy_mru_size;
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -164,6 +164,7 @@ static void *init_page(void)
static xenpaging_t *xenpaging_init(domid_t domain_id, int num_pages)
{
xenpaging_t *paging;
+ xc_domaininfo_t domain_info;
xc_interface *xch;
xentoollog_logger *dbg = NULL;
char *p;
@@ -275,34 +276,29 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.port = rc;
- /* Get domaininfo */
- paging->domain_info = malloc(sizeof(xc_domaininfo_t));
- if ( paging->domain_info == NULL )
- {
- PERROR("Error allocating memory for domain info");
- goto err;
- }
-
rc = xc_domain_getinfolist(xch, paging->mem_event.domain_id, 1,
- paging->domain_info);
+ &domain_info);
if ( rc != 1 )
{
PERROR("Error getting domain info");
goto err;
}
+ /* Record number of max_pages */
+ paging->max_pages = domain_info.max_pages;
+
/* Allocate bitmap for tracking pages that have been paged out */
- paging->bitmap = bitmap_alloc(paging->domain_info->max_pages);
+ paging->bitmap = bitmap_alloc(paging->max_pages);
if ( !paging->bitmap )
{
PERROR("Error allocating bitmap");
goto err;
}
- DPRINTF("max_pages = %"PRIx64"\n", paging->domain_info->max_pages);
+ DPRINTF("max_pages = %d\n", paging->max_pages);
- if ( num_pages < 0 || num_pages > paging->domain_info->max_pages )
+ if ( num_pages < 0 || num_pages > paging->max_pages )
{
- num_pages = paging->domain_info->max_pages;
+ num_pages = paging->max_pages;
DPRINTF("setting num_pages to %d\n", num_pages);
}
paging->num_pages = num_pages;
@@ -337,7 +333,6 @@ static xenpaging_t *xenpaging_init(domid
}
free(paging->bitmap);
- free(paging->domain_info);
free(paging);
}
@@ -765,7 +760,7 @@ int main(int argc, char *argv[])
if ( interrupted == SIGTERM || interrupted == SIGINT )
{
int num = 0;
- for ( i = 0; i < paging->domain_info->max_pages; i++ )
+ for ( i = 0; i < paging->max_pages; i++ )
{
if ( test_bit(i, paging->bitmap) )
{
@@ -781,7 +776,7 @@ int main(int argc, char *argv[])
*/
if ( num )
page_in_trigger();
- else if ( i == paging->domain_info->max_pages )
+ else if ( i == paging->max_pages )
break;
}
else
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -44,11 +44,11 @@ typedef struct xenpaging {
xc_interface *xc_handle;
struct xs_handle *xs_handle;
- xc_domaininfo_t *domain_info;
-
unsigned long *bitmap;
mem_event_t mem_event;
+ /* number of pages for which data structures were allocated */
+ int max_pages;
int num_pages;
int policy_mru_size;
unsigned long pagein_queue[XENPAGING_PAGEIN_QUEUE_SIZE];
++++++ 24215-xenpaging_track_the_number_of_paged-out_pages.patch ++++++
changeset: 24215:dc7dabe2fe99
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:38 2011 +0100
files: tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: track the number of paged-out pages
This change is required by subsequent changes.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 8 ++++++++
tools/xenpaging/xenpaging.h | 1 +
2 files changed, 9 insertions(+)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -467,6 +467,9 @@ static int xenpaging_evict_page(xenpagin
/* Notify policy of page being paged out */
policy_notify_paged_out(victim->gfn);
+ /* Record number of evicted pages */
+ paging->num_paged_out++;
+
out:
return ret;
}
@@ -480,8 +483,13 @@ static int xenpaging_resume_page(xenpagi
/* Notify policy of page being paged in */
if ( notify_policy )
+ {
policy_notify_paged_in(rsp->gfn);
+ /* Record number of resumed pages */
+ paging->num_paged_out--;
+ }
+
/* Tell Xen page is ready */
ret = xc_mem_paging_resume(paging->xc_handle, paging->mem_event.domain_id,
rsp->gfn);
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -49,6 +49,7 @@ typedef struct xenpaging {
mem_event_t mem_event;
/* number of pages for which data structures were allocated */
int max_pages;
+ int num_paged_out;
int num_pages;
int policy_mru_size;
unsigned long pagein_queue[XENPAGING_PAGEIN_QUEUE_SIZE];
++++++ 24216-xenpaging_move_page_add-resume_loops_into_its_own_function..patch
++++++
changeset: 24216:4fe585c2a3e5
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:39 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: move page add/resume loops into its own function.
Move page resume loop into its own function.
Move page eviction loop into its own function.
Allocate all possible slots in a paging file to allow growing and
shrinking of the number of paged-out pages. Adjust other places to
iterate over all slots.
This change is required by subsequent patches.
v2:
- check if victims allocation succeeded
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 94 ++++++++++++++++++++++++++++----------------
1 file changed, 61 insertions(+), 33 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -553,6 +553,27 @@ static int xenpaging_populate_page(xenpa
return ret;
}
+/* Trigger a page-in for a batch of pages */
+static void resume_pages(xenpaging_t *paging, int num_pages)
+{
+ xc_interface *xch = paging->xc_handle;
+ int i, num = 0;
+
+ for ( i = 0; i < paging->max_pages && num < num_pages; i++ )
+ {
+ if ( test_bit(i, paging->bitmap) )
+ {
+ paging->pagein_queue[num] = i;
+ num++;
+ if ( num == XENPAGING_PAGEIN_QUEUE_SIZE )
+ break;
+ }
+ }
+ /* num may be less than num_pages, caller has to try again */
+ if ( num )
+ page_in_trigger();
+}
+
static int evict_victim(xenpaging_t *paging,
xenpaging_victim_t *victim, int fd, int i)
{
@@ -596,6 +617,30 @@ static int evict_victim(xenpaging_t *pag
return ret;
}
+/* Evict a batch of pages and write them to a free slot in the paging file */
+static int evict_pages(xenpaging_t *paging, int fd, xenpaging_victim_t
*victims, int num_pages)
+{
+ xc_interface *xch = paging->xc_handle;
+ int rc, slot, num = 0;
+
+ for ( slot = 0; slot < paging->max_pages && num < num_pages; slot++ )
+ {
+ /* Slot is allocated */
+ if ( victims[slot].gfn != INVALID_MFN )
+ continue;
+
+ rc = evict_victim(paging, &victims[slot], fd, slot);
+ if ( rc == -ENOSPC )
+ break;
+ if ( rc == -EINTR )
+ break;
+ if ( num && num % 100 == 0 )
+ DPRINTF("%d pages evicted\n", num);
+ num++;
+ }
+ return num;
+}
+
int main(int argc, char *argv[])
{
struct sigaction act;
@@ -638,7 +683,14 @@ int main(int argc, char *argv[])
return 2;
}
- victims = calloc(paging->num_pages, sizeof(xenpaging_victim_t));
+ /* Allocate upper limit of pages to allow growing and shrinking */
+ victims = calloc(paging->max_pages, sizeof(xenpaging_victim_t));
+ if ( !victims )
+ goto out;
+
+ /* Mark all slots as unallocated */
+ for ( i = 0; i < paging->max_pages; i++ )
+ victims[i].gfn = INVALID_MFN;
/* ensure that if we get a signal, we'll do cleanup, then exit */
act.sa_handler = close_handler;
@@ -652,18 +704,7 @@ int main(int argc, char *argv[])
/* listen for page-in events to stop pager */
create_page_in_thread(paging);
- /* Evict pages */
- for ( i = 0; i < paging->num_pages; i++ )
- {
- rc = evict_victim(paging, &victims[i], fd, i);
- if ( rc == -ENOSPC )
- break;
- if ( rc == -EINTR )
- break;
- if ( i % 100 == 0 )
- DPRINTF("%d pages evicted\n", i);
- }
-
+ i = evict_pages(paging, fd, victims, paging->num_pages);
DPRINTF("%d pages evicted. Done.\n", i);
/* Swap pages in and out */
@@ -689,13 +730,13 @@ int main(int argc, char *argv[])
if ( test_and_clear_bit(req.gfn, paging->bitmap) )
{
/* Find where in the paging file to read from */
- for ( i = 0; i < paging->num_pages; i++ )
+ for ( i = 0; i < paging->max_pages; i++ )
{
if ( victims[i].gfn == req.gfn )
break;
}
- if ( i >= paging->num_pages )
+ if ( i >= paging->max_pages )
{
DPRINTF("Couldn't find page %"PRIx64"\n", req.gfn);
goto out;
@@ -767,25 +808,12 @@ int main(int argc, char *argv[])
/* Write all pages back into the guest */
if ( interrupted == SIGTERM || interrupted == SIGINT )
{
- int num = 0;
- for ( i = 0; i < paging->max_pages; i++ )
- {
- if ( test_bit(i, paging->bitmap) )
- {
- paging->pagein_queue[num] = i;
- num++;
- if ( num == XENPAGING_PAGEIN_QUEUE_SIZE )
- break;
- }
- }
- /*
- * One more round if there are still pages to process.
- * If no more pages to process, exit loop.
- */
- if ( num )
- page_in_trigger();
- else if ( i == paging->max_pages )
+ /* If no more pages to process, exit loop. */
+ if ( !paging->num_paged_out )
break;
+
+ /* One more round if there are still pages to process. */
+ resume_pages(paging, paging->num_paged_out);
}
else
{
++++++ 24217-xenpaging_improve_mainloop_exit_handling.patch ++++++
changeset: 24217:b531f5ceddf0
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:39 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: improve mainloop exit handling
Remove the if/else logic to exit from the in case a signal arrives.
Update comments.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -805,7 +805,7 @@ int main(int argc, char *argv[])
}
}
- /* Write all pages back into the guest */
+ /* If interrupted, write all pages back into the guest */
if ( interrupted == SIGTERM || interrupted == SIGINT )
{
/* If no more pages to process, exit loop. */
@@ -814,13 +814,15 @@ int main(int argc, char *argv[])
/* One more round if there are still pages to process. */
resume_pages(paging, paging->num_paged_out);
+
+ /* Resume main loop */
+ continue;
}
- else
- {
- /* Exit on any other signal */
- if ( interrupted )
- break;
- }
+
+ /* Exit main loop on any other signal */
+ if ( interrupted )
+ break;
+
}
DPRINTF("xenpaging got signal %d\n", interrupted);
++++++ 24218-libxc_add_bitmap_clear_function.patch ++++++
changeset: 24218:a2b4ae2becdf
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:40 2011 +0100
files: tools/libxc/xc_bitops.h
description:
libxc: add bitmap_clear function
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/libxc/xc_bitops.h | 6 ++++++
1 file changed, 6 insertions(+)
Index: xen-4.1.2-testing/tools/libxc/xc_bitops.h
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_bitops.h
+++ xen-4.1.2-testing/tools/libxc/xc_bitops.h
@@ -4,6 +4,7 @@
/* bitmap operations for single threaded access */
#include <stdlib.h>
+#include <string.h>
#define BITS_PER_LONG (sizeof(unsigned long) * 8)
#define ORDER_LONG (sizeof(unsigned long) == 4 ? 5 : 6)
@@ -25,6 +26,11 @@ static inline unsigned long *bitmap_allo
return calloc(1, bitmap_size(nr_bits));
}
+static inline void bitmap_clear(unsigned long *addr, int nr_bits)
+{
+ memset(addr, 0, bitmap_size(nr_bits));
+}
+
static inline int test_bit(int nr, volatile unsigned long *addr)
{
return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
++++++ 24219-xenpaging_retry_unpageable_gfns.patch ++++++
changeset: 24219:8eba32b9598c
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:41 2011 +0100
files: tools/xenpaging/policy_default.c
description:
xenpaging: retry unpageable gfns
Nomination of gfns can fail, but may succeed later.
Thats the case for a guest that starts ballooned.
v2:
- print debug when clearing uncosumed happens
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy_default.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -32,6 +32,7 @@ static unsigned int i_mru;
static unsigned int mru_size;
static unsigned long *bitmap;
static unsigned long *unconsumed;
+static unsigned int unconsumed_cleared;
static unsigned long current_gfn;
static unsigned long max_pages;
@@ -87,8 +88,21 @@ int policy_choose_victim(xenpaging_t *pa
current_gfn++;
if ( current_gfn >= max_pages )
current_gfn = 0;
+ /* Could not nominate any gfn */
if ( wrap == current_gfn )
{
+ /* Count wrap arounds */
+ unconsumed_cleared++;
+ /* Force retry every few seconds (depends on poll() timeout) */
+ if ( unconsumed_cleared > 123)
+ {
+ /* Force retry of unconsumed gfns */
+ bitmap_clear(unconsumed, max_pages);
+ unconsumed_cleared = 0;
+ DPRINTF("clearing unconsumed, wrap %lx", wrap);
+ /* One more round before returning ENOSPC */
+ continue;
+ }
victim->gfn = INVALID_MFN;
return -ENOSPC;
}
++++++ 24220-xenpaging_install_into_LIBEXEC_dir.patch ++++++
changeset: 24220:2087f21befc2
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:42 2011 +0100
files: tools/xenpaging/Makefile
description:
xenpaging: install into LIBEXEC dir
In preparation of upcoming libxl integration,
move xenpaging binary from /usr/sbin/ to /usr/lib/xen/bin/
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/Makefile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/Makefile
+++ xen-4.1.2-testing/tools/xenpaging/Makefile
@@ -29,8 +29,8 @@ xenpaging: $(OBJS)
install: all
$(INSTALL_DIR) $(DESTDIR)/var/lib/xen/xenpaging
- $(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
- $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(SBINDIR)
+ $(INSTALL_DIR) $(DESTDIR)$(LIBEXEC)
+ $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(LIBEXEC)
clean:
rm -f *.o *~ $(DEPS) xen TAGS $(IBINS) $(LIB)
++++++ 24221-xenpaging_add_XEN_PAGING_DIR_-_libxl_xenpaging_dir_path.patch
++++++
changeset: 24221:cd5948592b10
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:43 2011 +0100
files: Config.mk config/StdGNU.mk tools/libxl/libxl.h
tools/libxl/libxl_paths.c tools/xenpaging/Makefile
description:
xenpaging: add XEN_PAGING_DIR / libxl_xenpaging_dir_path()
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
Config.mk | 1 +
config/StdGNU.mk | 2 ++
tools/xenpaging/Makefile | 2 +-
3 files changed, 4 insertions(+), 1 deletion(-)
Index: xen-4.1.2-testing/Config.mk
===================================================================
--- xen-4.1.2-testing.orig/Config.mk
+++ xen-4.1.2-testing/Config.mk
@@ -124,6 +124,7 @@ define buildmakevars2file-closure
echo "XEN_CONFIG_DIR=\"$(XEN_CONFIG_DIR)\"" >> $(1).tmp; \
echo "XEN_SCRIPT_DIR=\"$(XEN_SCRIPT_DIR)\"" >> $(1).tmp; \
echo "XEN_LOCK_DIR=\"$(XEN_LOCK_DIR)\"" >> $(1).tmp; \
+ echo "XEN_PAGING_DIR=\"$(XEN_PAGING_DIR)\"" >> $(1).tmp; \
if ! cmp $(1).tmp $(1); then mv -f $(1).tmp $(1); fi
endef
Index: xen-4.1.2-testing/config/StdGNU.mk
===================================================================
--- xen-4.1.2-testing.orig/config/StdGNU.mk
+++ xen-4.1.2-testing/config/StdGNU.mk
@@ -46,9 +46,11 @@ PRIVATE_BINDIR = $(PRIVATE_PREFIX)/bin
ifeq ($(PREFIX),/usr)
CONFIG_DIR = /etc
XEN_LOCK_DIR = /var/lock
+XEN_PAGING_DIR = /var/lib/xen/xenpaging
else
CONFIG_DIR = $(PREFIX)/etc
XEN_LOCK_DIR = $(PREFIX)/var/lock
+XEN_PAGING_DIR = $(PREFIX)/var/lib/xen/xenpaging
endif
SYSCONFIG_DIR = $(CONFIG_DIR)/$(CONFIG_LEAF_DIR)
Index: xen-4.1.2-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/Makefile
+++ xen-4.1.2-testing/tools/xenpaging/Makefile
@@ -28,7 +28,7 @@ xenpaging: $(OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LDLIBS)
install: all
- $(INSTALL_DIR) $(DESTDIR)/var/lib/xen/xenpaging
+ $(INSTALL_DIR) $(DESTDIR)$(XEN_PAGING_DIR)
$(INSTALL_DIR) $(DESTDIR)$(LIBEXEC)
$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(LIBEXEC)
++++++ 24222-xenpaging_use_guests_tot_pages_as_working_target.patch ++++++
changeset: 24222:286a741b4d86
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:45 2011 +0100
files: tools/xenpaging/policy_default.c tools/xenpaging/xenpaging.c
tools/xenpaging/xenpaging.h
description:
xenpaging: use guests tot_pages as working target
This change reverses the task of xenpaging. Before this change a fixed number
of pages was paged out. With this change the guest will not have access to
more than the given number of pages at the same time.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy_default.c | 1
tools/xenpaging/xenpaging.c | 78 ++++++++++++++++++++++++++++++---------
tools/xenpaging/xenpaging.h | 2 -
3 files changed, 61 insertions(+), 20 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -71,7 +71,6 @@ int policy_init(xenpaging_t *paging)
/* Start in the middle to avoid paging during BIOS startup */
current_gfn = max_pages / 2;
- current_gfn -= paging->num_pages / 2;
rc = 0;
out:
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -136,6 +136,21 @@ err:
return rc;
}
+static int xenpaging_get_tot_pages(xenpaging_t *paging)
+{
+ xc_interface *xch = paging->xc_handle;
+ xc_domaininfo_t domain_info;
+ int rc;
+
+ rc = xc_domain_getinfolist(xch, paging->mem_event.domain_id, 1,
&domain_info);
+ if ( rc != 1 )
+ {
+ PERROR("Error getting domain info");
+ return -1;
+ }
+ return domain_info.tot_pages;
+}
+
static void *init_page(void)
{
void *buffer;
@@ -161,7 +176,7 @@ static void *init_page(void)
return NULL;
}
-static xenpaging_t *xenpaging_init(domid_t domain_id, int num_pages)
+static xenpaging_t *xenpaging_init(domid_t domain_id, int target_tot_pages)
{
xenpaging_t *paging;
xc_domaininfo_t domain_info;
@@ -296,12 +311,7 @@ static xenpaging_t *xenpaging_init(domid
}
DPRINTF("max_pages = %d\n", paging->max_pages);
- if ( num_pages < 0 || num_pages > paging->max_pages )
- {
- num_pages = paging->max_pages;
- DPRINTF("setting num_pages to %d\n", num_pages);
- }
- paging->num_pages = num_pages;
+ paging->target_tot_pages = target_tot_pages;
/* Initialise policy */
rc = policy_init(paging);
@@ -648,7 +658,9 @@ int main(int argc, char *argv[])
xenpaging_victim_t *victims;
mem_event_request_t req;
mem_event_response_t rsp;
+ int num, prev_num = 0;
int i;
+ int tot_pages;
int rc = -1;
int rc1;
xc_interface *xch;
@@ -659,7 +671,7 @@ int main(int argc, char *argv[])
if ( argc != 3 )
{
- fprintf(stderr, "Usage: %s <domain_id> <num_pages>\n", argv[0]);
+ fprintf(stderr, "Usage: %s <domain_id> <tot_pages>\n", argv[0]);
return -1;
}
@@ -672,7 +684,7 @@ int main(int argc, char *argv[])
}
xch = paging->xc_handle;
- DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
paging->num_pages);
+ DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
paging->target_tot_pages);
/* Open file */
sprintf(filename, "page_cache_%u", paging->mem_event.domain_id);
@@ -704,9 +716,6 @@ int main(int argc, char *argv[])
/* listen for page-in events to stop pager */
create_page_in_thread(paging);
- i = evict_pages(paging, fd, victims, paging->num_pages);
- DPRINTF("%d pages evicted. Done.\n", i);
-
/* Swap pages in and out */
while ( 1 )
{
@@ -771,12 +780,8 @@ int main(int argc, char *argv[])
goto out;
}
- /* Evict a new page to replace the one we just paged in,
- * or clear this pagefile slot on exit */
- if ( interrupted )
- victims[i].gfn = INVALID_MFN;
- else
- evict_victim(paging, &victims[i], fd, i);
+ /* Clear this pagefile slot */
+ victims[i].gfn = INVALID_MFN;
}
else
{
@@ -823,6 +828,43 @@ int main(int argc, char *argv[])
if ( interrupted )
break;
+ /* Check if the target has been reached already */
+ tot_pages = xenpaging_get_tot_pages(paging);
+ if ( tot_pages < 0 )
+ goto out;
+
+ /* Resume all pages if paging is disabled or no target was set */
+ if ( paging->target_tot_pages == 0 )
+ {
+ if ( paging->num_paged_out )
+ resume_pages(paging, paging->num_paged_out);
+ }
+ /* Evict more pages if target not reached */
+ else if ( tot_pages > paging->target_tot_pages )
+ {
+ num = tot_pages - paging->target_tot_pages;
+ if ( num != prev_num )
+ {
+ DPRINTF("Need to evict %d pages to reach %d
target_tot_pages\n", num, paging->target_tot_pages);
+ prev_num = num;
+ }
+ /* Limit the number of evicts to be able to process page-in
requests */
+ if ( num > 42 )
+ num = 42;
+ evict_pages(paging, fd, victims, num);
+ }
+ /* Resume some pages if target not reached */
+ else if ( tot_pages < paging->target_tot_pages &&
paging->num_paged_out )
+ {
+ num = paging->target_tot_pages - tot_pages;
+ if ( num != prev_num )
+ {
+ DPRINTF("Need to resume %d pages to reach %d
target_tot_pages\n", num, paging->target_tot_pages);
+ prev_num = num;
+ }
+ resume_pages(paging, num);
+ }
+
}
DPRINTF("xenpaging got signal %d\n", interrupted);
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -50,7 +50,7 @@ typedef struct xenpaging {
/* number of pages for which data structures were allocated */
int max_pages;
int num_paged_out;
- int num_pages;
+ int target_tot_pages;
int policy_mru_size;
unsigned long pagein_queue[XENPAGING_PAGEIN_QUEUE_SIZE];
} xenpaging_t;
++++++
24223-xenpaging_watch_the_guests_memory-target-tot_pages_xenstore_value.patch
++++++
changeset: 24223:9e3c2ef70c8a
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:47 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: watch the guests memory/target-tot_pages xenstore value
Subsequent patches will use xenstored to store the numbers of pages
xenpaging is suppose to page-out.
Remove num_pages and use target_pages instead.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 51 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 50 insertions(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -19,8 +19,10 @@
*/
#define _XOPEN_SOURCE 600
+#define _GNU_SOURCE
#include <inttypes.h>
+#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <time.h>
@@ -35,6 +37,10 @@
#include "policy.h"
#include "xenpaging.h"
+/* Defines number of mfns a guest should use at a time, in KiB */
+#define WATCH_TARGETPAGES "memory/target-tot_pages"
+static char *watch_target_tot_pages;
+static char *dom_path;
static char watch_token[16];
static char filename[80];
static int interrupted;
@@ -72,7 +78,7 @@ static int xenpaging_wait_for_event_or_t
{
xc_interface *xch = paging->xc_handle;
xc_evtchn *xce = paging->mem_event.xce_handle;
- char **vec;
+ char **vec, *val;
unsigned int num;
struct pollfd fd[2];
int port;
@@ -111,6 +117,25 @@ static int xenpaging_wait_for_event_or_t
rc = 0;
}
}
+ else if ( strcmp(vec[XS_WATCH_PATH], watch_target_tot_pages) == 0 )
+ {
+ int ret, target_tot_pages;
+ val = xs_read(paging->xs_handle, XBT_NULL, vec[XS_WATCH_PATH],
NULL);
+ if ( val )
+ {
+ ret = sscanf(val, "%d", &target_tot_pages);
+ if ( ret > 0 )
+ {
+ /* KiB to pages */
+ target_tot_pages >>= 2;
+ if ( target_tot_pages < 0 || target_tot_pages >
paging->max_pages )
+ target_tot_pages = paging->max_pages;
+ paging->target_tot_pages = target_tot_pages;
+ DPRINTF("new target_tot_pages %d\n", target_tot_pages);
+ }
+ free(val);
+ }
+ }
free(vec);
}
}
@@ -216,6 +241,25 @@ static xenpaging_t *xenpaging_init(domid
goto err;
}
+ /* Watch xenpagings working target */
+ dom_path = xs_get_domain_path(paging->xs_handle, domain_id);
+ if ( !dom_path )
+ {
+ PERROR("Could not find domain path\n");
+ goto err;
+ }
+ if ( asprintf(&watch_target_tot_pages, "%s/%s", dom_path,
WATCH_TARGETPAGES) < 0 )
+ {
+ PERROR("Could not alloc watch path\n");
+ goto err;
+ }
+ DPRINTF("watching '%s'\n", watch_target_tot_pages);
+ if ( xs_watch(paging->xs_handle, watch_target_tot_pages, "") == false )
+ {
+ PERROR("Could not bind to xenpaging watch\n");
+ goto err;
+ }
+
p = getenv("XENPAGING_POLICY_MRU_SIZE");
if ( p && *p )
{
@@ -342,6 +386,8 @@ static xenpaging_t *xenpaging_init(domid
free(paging->mem_event.ring_page);
}
+ free(dom_path);
+ free(watch_target_tot_pages);
free(paging->bitmap);
free(paging);
}
@@ -357,6 +403,9 @@ static int xenpaging_teardown(xenpaging_
if ( paging == NULL )
return 0;
+ xs_unwatch(paging->xs_handle, watch_target_tot_pages, "");
+ xs_unwatch(paging->xs_handle, "@releaseDomain", watch_token);
+
xch = paging->xc_handle;
paging->xc_handle = NULL;
/* Tear down domain paging in Xen */
++++++ 24224-xenpaging_add_cmdline_interface_for_pager.patch ++++++
changeset: 24224:7243fd87410e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:48 2011 +0100
files: tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: add cmdline interface for pager
Introduce a cmdline handling for the pager. This simplifies libxl support,
debug and mru_size are not passed via the environment anymore.
The new interface looks like this:
xenpaging [options] -f <pagefile> -d <domain_id>
options:
-d <domid> --domain=<domid> numerical domain_id of guest. This
option is required.
-f <file> --pagefile=<file> pagefile to use. This option is
required.
-m <max_memkb> --max_memkb=<max_memkb> maximum amount of memory to handle.
-r <num> --mru_size=<num> number of paged-in pages to keep in
memory.
-d --debug enable debug output.
-h --help this output.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 139 ++++++++++++++++++++++++++++++++------------
tools/xenpaging/xenpaging.h | 1
2 files changed, 103 insertions(+), 37 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -31,6 +31,7 @@
#include <poll.h>
#include <xc_private.h>
#include <xs.h>
+#include <getopt.h>
#include "xc_bitops.h"
#include "file_ops.h"
@@ -42,12 +43,12 @@
static char *watch_target_tot_pages;
static char *dom_path;
static char watch_token[16];
-static char filename[80];
+static char *filename;
static int interrupted;
static void unlink_pagefile(void)
{
- if ( filename[0] )
+ if ( filename && filename[0] )
{
unlink(filename);
filename[0] = '\0';
@@ -201,11 +202,85 @@ static void *init_page(void)
return NULL;
}
-static xenpaging_t *xenpaging_init(domid_t domain_id, int target_tot_pages)
+static void usage(void)
+{
+ printf("usage:\n\n");
+
+ printf(" xenpaging [options] -f <pagefile> -d <domain_id>\n\n");
+
+ printf("options:\n");
+ printf(" -d <domid> --domain=<domid> numerical domain_id of
guest. This option is required.\n");
+ printf(" -f <file> --pagefile=<file> pagefile to use. This
option is required.\n");
+ printf(" -m <max_memkb> --max_memkb=<max_memkb> maximum amount of memory
to handle.\n");
+ printf(" -r <num> --mru_size=<num> number of paged-in pages
to keep in memory.\n");
+ printf(" -v --verbose enable debug output.\n");
+ printf(" -h --help this output.\n");
+}
+
+static int xenpaging_getopts(xenpaging_t *paging, int argc, char *argv[])
+{
+ int ch;
+ static const char sopts[] = "hvd:f:m:r:";
+ static const struct option lopts[] = {
+ {"help", 0, NULL, 'h'},
+ {"verbose", 0, NULL, 'v'},
+ {"domain", 1, NULL, 'd'},
+ {"pagefile", 1, NULL, 'f'},
+ {"mru_size", 1, NULL, 'm'},
+ { }
+ };
+
+ while ((ch = getopt_long(argc, argv, sopts, lopts, NULL)) != -1)
+ {
+ switch(ch) {
+ case 'd':
+ paging->mem_event.domain_id = atoi(optarg);
+ break;
+ case 'f':
+ filename = strdup(optarg);
+ break;
+ case 'm':
+ /* KiB to pages */
+ paging->max_pages = atoi(optarg) >> 2;
+ break;
+ case 'r':
+ paging->policy_mru_size = atoi(optarg);
+ break;
+ case 'v':
+ paging->debug = 1;
+ break;
+ case 'h':
+ case '?':
+ usage();
+ return 1;
+ }
+ }
+
+ argv += optind; argc -= optind;
+
+ /* Path to pagefile is required */
+ if ( !filename )
+ {
+ printf("Filename for pagefile missing!\n");
+ usage();
+ return 1;
+ }
+
+ /* Set domain id */
+ if ( !paging->mem_event.domain_id )
+ {
+ printf("Numerical <domain_id> missing!\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static xenpaging_t *xenpaging_init(int argc, char *argv[])
{
xenpaging_t *paging;
xc_domaininfo_t domain_info;
- xc_interface *xch;
+ xc_interface *xch = NULL;
xentoollog_logger *dbg = NULL;
char *p;
int rc;
@@ -215,7 +290,12 @@ static xenpaging_t *xenpaging_init(domid
if ( !paging )
goto err;
- if ( getenv("XENPAGING_DEBUG") )
+ /* Get cmdline options and domain_id */
+ if ( xenpaging_getopts(paging, argc, argv) )
+ goto err;
+
+ /* Enable debug output */
+ if ( paging->debug )
dbg = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr,
XTL_DEBUG, 0);
/* Open connection to xen */
@@ -234,7 +314,7 @@ static xenpaging_t *xenpaging_init(domid
}
/* write domain ID to watch so we can ignore other domain shutdowns */
- snprintf(watch_token, sizeof(watch_token), "%u", domain_id);
+ snprintf(watch_token, sizeof(watch_token), "%u",
paging->mem_event.domain_id);
if ( xs_watch(paging->xs_handle, "@releaseDomain", watch_token) == false )
{
PERROR("Could not bind to shutdown watch\n");
@@ -242,7 +322,7 @@ static xenpaging_t *xenpaging_init(domid
}
/* Watch xenpagings working target */
- dom_path = xs_get_domain_path(paging->xs_handle, domain_id);
+ dom_path = xs_get_domain_path(paging->xs_handle,
paging->mem_event.domain_id);
if ( !dom_path )
{
PERROR("Could not find domain path\n");
@@ -260,16 +340,6 @@ static xenpaging_t *xenpaging_init(domid
goto err;
}
- p = getenv("XENPAGING_POLICY_MRU_SIZE");
- if ( p && *p )
- {
- paging->policy_mru_size = atoi(p);
- DPRINTF("Setting policy mru_size to %d\n", paging->policy_mru_size);
- }
-
- /* Set domain id */
- paging->mem_event.domain_id = domain_id;
-
/* Initialise shared page */
paging->mem_event.shared_page = init_page();
if ( paging->mem_event.shared_page == NULL )
@@ -335,16 +405,20 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.port = rc;
- rc = xc_domain_getinfolist(xch, paging->mem_event.domain_id, 1,
- &domain_info);
- if ( rc != 1 )
+ /* Get max_pages from guest if not provided via cmdline */
+ if ( !paging->max_pages )
{
- PERROR("Error getting domain info");
- goto err;
- }
+ rc = xc_domain_getinfolist(xch, paging->mem_event.domain_id, 1,
+ &domain_info);
+ if ( rc != 1 )
+ {
+ PERROR("Error getting domain info");
+ goto err;
+ }
- /* Record number of max_pages */
- paging->max_pages = domain_info.max_pages;
+ /* Record number of max_pages */
+ paging->max_pages = domain_info.max_pages;
+ }
/* Allocate bitmap for tracking pages that have been paged out */
paging->bitmap = bitmap_alloc(paging->max_pages);
@@ -355,8 +429,6 @@ static xenpaging_t *xenpaging_init(domid
}
DPRINTF("max_pages = %d\n", paging->max_pages);
- paging->target_tot_pages = target_tot_pages;
-
/* Initialise policy */
rc = policy_init(paging);
if ( rc != 0 )
@@ -718,25 +790,18 @@ int main(int argc, char *argv[])
mode_t open_mode = S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR | S_IWGRP |
S_IWOTH;
int fd;
- if ( argc != 3 )
- {
- fprintf(stderr, "Usage: %s <domain_id> <tot_pages>\n", argv[0]);
- return -1;
- }
-
/* Initialise domain paging */
- paging = xenpaging_init(atoi(argv[1]), atoi(argv[2]));
+ paging = xenpaging_init(argc, argv);
if ( paging == NULL )
{
- fprintf(stderr, "Error initialising paging");
+ fprintf(stderr, "Error initialising paging\n");
return 1;
}
xch = paging->xc_handle;
- DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
paging->target_tot_pages);
+ DPRINTF("starting %s for domain_id %u with pagefile %s\n", argv[0],
paging->mem_event.domain_id, filename);
/* Open file */
- sprintf(filename, "page_cache_%u", paging->mem_event.domain_id);
fd = open(filename, open_flags, open_mode);
if ( fd < 0 )
{
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -52,6 +52,7 @@ typedef struct xenpaging {
int num_paged_out;
int target_tot_pages;
int policy_mru_size;
+ int debug;
unsigned long pagein_queue[XENPAGING_PAGEIN_QUEUE_SIZE];
} xenpaging_t;
++++++ 24225-xenpaging_improve_policy_mru_list_handling.patch ++++++
changeset: 24225:d47d1ad56366
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:50 2011 +0100
files: tools/xenpaging/policy.h tools/xenpaging/policy_default.c
tools/xenpaging/xenpaging.c
description:
xenpaging: improve policy mru list handling
Without this change it is not possible to page-out all guest pages, then
trigger a page-in for all pages, and then page-out everything once
again. All pages in the mru list can not be paged out because they
remain active in the internal bitmap of paged pages.
Use the mru list only if the number of paged-out pages is larger than
the mru list. If the number is smaller, start to clear the mru list. In
case the number of paged-out pages drops to zero the mru list and the
internal bitmap will be empty as well.
Also add a new interface for dropped pages. If a gfn was dropped there
is no need to adjust the mru list because dropping a page is not usage
of a page.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy.h | 2 ++
tools/xenpaging/policy_default.c | 27 ++++++++++++++++++++++++---
tools/xenpaging/xenpaging.c | 11 +++++++++--
3 files changed, 35 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/policy.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy.h
+++ xen-4.1.2-testing/tools/xenpaging/policy.h
@@ -32,6 +32,8 @@ int policy_init(xenpaging_t *paging);
int policy_choose_victim(xenpaging_t *paging, xenpaging_victim_t *victim);
void policy_notify_paged_out(unsigned long gfn);
void policy_notify_paged_in(unsigned long gfn);
+void policy_notify_paged_in_nomru(unsigned long gfn);
+void policy_notify_dropped(unsigned long gfn);
#endif // __XEN_PAGING_POLICY_H__
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -57,7 +57,7 @@ int policy_init(xenpaging_t *paging)
if ( paging->policy_mru_size > 0 )
mru_size = paging->policy_mru_size;
else
- mru_size = DEFAULT_MRU_SIZE;
+ mru_size = paging->policy_mru_size = DEFAULT_MRU_SIZE;
mru = malloc(sizeof(*mru) * mru_size);
if ( mru == NULL )
@@ -120,17 +120,38 @@ void policy_notify_paged_out(unsigned lo
clear_bit(gfn, unconsumed);
}
-void policy_notify_paged_in(unsigned long gfn)
+static void policy_handle_paged_in(unsigned long gfn, int do_mru)
{
unsigned long old_gfn = mru[i_mru & (mru_size - 1)];
if ( old_gfn != INVALID_MFN )
clear_bit(old_gfn, bitmap);
- mru[i_mru & (mru_size - 1)] = gfn;
+ if (do_mru) {
+ mru[i_mru & (mru_size - 1)] = gfn;
+ } else {
+ clear_bit(gfn, bitmap);
+ mru[i_mru & (mru_size - 1)] = INVALID_MFN;
+ }
+
i_mru++;
}
+void policy_notify_paged_in(unsigned long gfn)
+{
+ policy_handle_paged_in(gfn, 1);
+}
+
+void policy_notify_paged_in_nomru(unsigned long gfn)
+{
+ policy_handle_paged_in(gfn, 0);
+}
+
+void policy_notify_dropped(unsigned long gfn)
+{
+ clear_bit(gfn, bitmap);
+}
+
/*
* Local variables:
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -615,7 +615,14 @@ static int xenpaging_resume_page(xenpagi
/* Notify policy of page being paged in */
if ( notify_policy )
{
- policy_notify_paged_in(rsp->gfn);
+ /*
+ * Do not add gfn to mru list if the target is lower than mru size.
+ * This allows page-out of these gfns if the target grows again.
+ */
+ if (paging->num_paged_out > paging->policy_mru_size)
+ policy_notify_paged_in(rsp->gfn);
+ else
+ policy_notify_paged_in_nomru(rsp->gfn);
/* Record number of resumed pages */
paging->num_paged_out--;
@@ -869,7 +876,7 @@ int main(int argc, char *argv[])
{
DPRINTF("drop_page ^ gfn %"PRIx64" pageslot %d\n",
req.gfn, i);
/* Notify policy of page being dropped */
- policy_notify_paged_in(req.gfn);
+ policy_notify_dropped(req.gfn);
}
else
{
++++++ 24226-xenpaging_add_debug_to_show_received_watch_event..patch ++++++
changeset: 24226:c9b75ccd3ebf
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 18:26:15 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: add debug to show received watch event.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 1 +
1 file changed, 1 insertion(+)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -108,6 +108,7 @@ static int xenpaging_wait_for_event_or_t
vec = xs_read_watch(paging->xs_handle, &num);
if ( vec )
{
+ DPRINTF("path '%s' token '%s'\n", vec[XS_WATCH_PATH],
vec[XS_WATCH_TOKEN]);
if ( strcmp(vec[XS_WATCH_TOKEN], watch_token) == 0 )
{
/* If our guest disappeared, set interrupt flag and fall
through */
++++++ 24227-xenpaging_restrict_pagefile_permissions.patch ++++++
changeset: 24227:1027e7d13d02
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 18:26:16 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: restrict pagefile permissions
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -795,7 +795,7 @@ int main(int argc, char *argv[])
xc_interface *xch;
int open_flags = O_CREAT | O_TRUNC | O_RDWR;
- mode_t open_mode = S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR | S_IWGRP |
S_IWOTH;
+ mode_t open_mode = S_IRUSR | S_IWUSR;
int fd;
/* Initialise domain paging */
++++++ 24231-waitqueue_Implement_wake_up_nroneall..patch ++++++
changeset: 24231:2a81ce2b2b93
user: Keir Fraser <keir@xxxxxxx>
date: Fri Nov 25 20:27:11 2011 +0000
files: xen/common/wait.c xen/include/xen/wait.h
description:
waitqueue: Implement wake_up_{nr,one,all}.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 14 ++++++++++++--
xen/include/xen/wait.h | 6 ++++--
2 files changed, 16 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -87,13 +87,13 @@ void init_waitqueue_head(struct waitqueu
INIT_LIST_HEAD(&wq->list);
}
-void wake_up(struct waitqueue_head *wq)
+void wake_up_nr(struct waitqueue_head *wq, unsigned int nr)
{
struct waitqueue_vcpu *wqv;
spin_lock(&wq->lock);
- while ( !list_empty(&wq->list) )
+ while ( !list_empty(&wq->list) && nr-- )
{
wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list);
list_del_init(&wqv->list);
@@ -103,6 +103,16 @@ void wake_up(struct waitqueue_head *wq)
spin_unlock(&wq->lock);
}
+void wake_up_one(struct waitqueue_head *wq)
+{
+ wake_up_nr(wq, 1);
+}
+
+void wake_up_all(struct waitqueue_head *wq)
+{
+ wake_up_nr(wq, UINT_MAX);
+}
+
#ifdef CONFIG_X86
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
Index: xen-4.1.2-testing/xen/include/xen/wait.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/wait.h
+++ xen-4.1.2-testing/xen/include/xen/wait.h
@@ -28,8 +28,10 @@ struct waitqueue_head {
/* Dynamically initialise a waitqueue. */
void init_waitqueue_head(struct waitqueue_head *wq);
-/* Wake all VCPUs waiting on specified waitqueue. */
-void wake_up(struct waitqueue_head *wq);
+/* Wake VCPU(s) waiting on specified waitqueue. */
+void wake_up_nr(struct waitqueue_head *wq, unsigned int nr);
+void wake_up_one(struct waitqueue_head *wq);
+void wake_up_all(struct waitqueue_head *wq);
/* Wait on specified waitqueue until @condition is true. */
#define wait_event(wq, condition) \
++++++ 24232-waitqueue_Hold_a_reference_to_a_domain_on_a_waitqueue..patch ++++++
changeset: 24232:95d4e2e0bed3
user: Keir Fraser <keir@xxxxxxx>
date: Fri Nov 25 20:32:05 2011 +0000
files: xen/common/wait.c xen/include/xen/wait.h
description:
waitqueue: Hold a reference to a domain on a waitqueue.
Also allow waitqueues to be dynamically destroyed.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 8 ++++++++
xen/include/xen/wait.h | 3 ++-
2 files changed, 10 insertions(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -87,6 +87,11 @@ void init_waitqueue_head(struct waitqueu
INIT_LIST_HEAD(&wq->list);
}
+void destroy_waitqueue_head(struct waitqueue_head *wq)
+{
+ wake_up_all(wq);
+}
+
void wake_up_nr(struct waitqueue_head *wq, unsigned int nr)
{
struct waitqueue_vcpu *wqv;
@@ -98,6 +103,7 @@ void wake_up_nr(struct waitqueue_head *w
wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list);
list_del_init(&wqv->list);
vcpu_unpause(wqv->vcpu);
+ put_domain(wqv->vcpu->domain);
}
spin_unlock(&wq->lock);
@@ -218,6 +224,7 @@ void prepare_to_wait(struct waitqueue_he
spin_lock(&wq->lock);
list_add_tail(&wqv->list, &wq->list);
vcpu_pause_nosync(curr);
+ get_knownalive_domain(curr->domain);
spin_unlock(&wq->lock);
}
@@ -236,6 +243,7 @@ void finish_wait(struct waitqueue_head *
{
list_del_init(&wqv->list);
vcpu_unpause(curr);
+ put_domain(curr->domain);
}
spin_unlock(&wq->lock);
}
Index: xen-4.1.2-testing/xen/include/xen/wait.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/wait.h
+++ xen-4.1.2-testing/xen/include/xen/wait.h
@@ -25,8 +25,9 @@ struct waitqueue_head {
.list = LIST_HEAD_INIT((name).list) \
}
-/* Dynamically initialise a waitqueue. */
+/* Dynamically initialise/destroy a waitqueue. */
void init_waitqueue_head(struct waitqueue_head *wq);
+void destroy_waitqueue_head(struct waitqueue_head *wq);
/* Wake VCPU(s) waiting on specified waitqueue. */
void wake_up_nr(struct waitqueue_head *wq, unsigned int nr);
++++++ 24261-x86-cpuidle-Westmere-EX.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1322645021 -3600
# Node ID 64088ba60263d3a623851b43a872c93c71cc3cbf
# Parent df7cec2c6c03f07932555954948ce7c8d09e88f4
x86/cpuidle: add Westmere-EX support to hw residencies reading logic
This is in accordance with
http://software.intel.com/en-us/articles/intel-processor-identification-with-cpuid-model-and-family-numbers/
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Haitao Shan <maillists.shan@xxxxxxxxx>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -113,6 +113,7 @@ static void do_get_hw_residencies(void *
/* Westmere */
case 0x25:
case 0x2C:
+ case 0x2F:
GET_PC3_RES(hw_res->pc3);
GET_PC6_RES(hw_res->pc6);
GET_PC7_RES(hw_res->pc7);
++++++ 24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch ++++++
changeset: 24269:2cbc53a24683
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Nov 30 07:08:53 2011 -0800
files: xen/arch/x86/hvm/hvm.c xen/arch/x86/mm/mem_event.c
xen/arch/x86/mm/mem_sharing.c xen/arch/x86/mm/p2m.c xen/common/domain.c
xen/include/xen/sched.h
description:
mem_event: move mem_event_domain out of struct domain
An upcoming change may increase the size of mem_event_domain. The result
is a build failure because struct domain gets larger than a page.
Allocate the room for the three mem_event_domain members at runtime.
v2:
- remove mem_ prefix from members of new struct
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/hvm/hvm.c | 4 ++--
xen/arch/x86/mm/mem_event.c | 6 +++---
xen/arch/x86/mm/mem_sharing.c | 6 +++---
xen/arch/x86/mm/p2m.c | 18 +++++++++---------
xen/common/domain.c | 5 +++++
xen/include/xen/sched.h | 18 ++++++++++++------
6 files changed, 34 insertions(+), 23 deletions(-)
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3909,7 +3909,7 @@ static int hvm_memory_event_traps(long p
if ( (p & HVMPME_onchangeonly) && (value == old) )
return 1;
- rc = mem_event_check_ring(d, &d->mem_access);
+ rc = mem_event_check_ring(d, &d->mem_event->access);
if ( rc )
return rc;
@@ -3932,7 +3932,7 @@ static int hvm_memory_event_traps(long p
req.gla_valid = 1;
}
- mem_event_put_request(d, &d->mem_access, &req);
+ mem_event_put_request(d, &d->mem_event->access, &req);
return 1;
}
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -252,7 +252,7 @@ int mem_event_domctl(struct domain *d, x
{
case XEN_DOMCTL_MEM_EVENT_OP_PAGING:
{
- struct mem_event_domain *med = &d->mem_paging;
+ struct mem_event_domain *med = &d->mem_event->paging;
rc = -EINVAL;
switch( mec->op )
@@ -297,7 +297,7 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS:
{
- struct mem_event_domain *med = &d->mem_access;
+ struct mem_event_domain *med = &d->mem_event->access;
rc = -EINVAL;
switch( mec->op )
@@ -320,7 +320,7 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
{
if ( med->ring_page )
- rc = mem_event_disable(&d->mem_access);
+ rc = mem_event_disable(med);
}
break;
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -322,12 +322,12 @@ static struct page_info* mem_sharing_all
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
- if(mem_event_check_ring(d, &d->mem_share)) return page;
+ if(mem_event_check_ring(d, &d->mem_event->share)) return page;
req.gfn = gfn;
req.p2mt = p2m_ram_shared;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &d->mem_share, &req);
+ mem_event_put_request(d, &d->mem_event->share, &req);
return page;
}
@@ -342,7 +342,7 @@ int mem_sharing_sharing_resume(struct do
mem_event_response_t rsp;
/* Get request off the ring */
- mem_event_get_response(&d->mem_share, &rsp);
+ mem_event_get_response(&d->mem_event->share, &rsp);
/* Unpause domain/vcpu */
if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2996,7 +2996,7 @@ void p2m_mem_paging_drop_page(struct p2m
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d, &d->mem_paging) == 0)
+ if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
{
/* Send release notification to pager */
memset(&req, 0, sizeof(req));
@@ -3004,7 +3004,7 @@ void p2m_mem_paging_drop_page(struct p2m
req.gfn = gfn;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &d->mem_paging, &req);
+ mem_event_put_request(d, &d->mem_event->paging, &req);
}
}
@@ -3039,7 +3039,7 @@ void p2m_mem_paging_populate(struct p2m_
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d, &d->mem_paging) )
+ if ( mem_event_check_ring(d, &d->mem_event->paging) )
return;
memset(&req, 0, sizeof(req));
@@ -3070,7 +3070,7 @@ void p2m_mem_paging_populate(struct p2m_
else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
{
/* gfn is already on its way back and vcpu is not paused */
- mem_event_put_req_producers(&d->mem_paging);
+ mem_event_put_req_producers(&d->mem_event->paging);
return;
}
@@ -3079,7 +3079,7 @@ void p2m_mem_paging_populate(struct p2m_
req.p2mt = p2mt;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &d->mem_paging, &req);
+ mem_event_put_request(d, &d->mem_event->paging, &req);
}
/**
@@ -3157,7 +3157,7 @@ void p2m_mem_paging_resume(struct p2m_do
mfn_t mfn;
/* Pull the response off the ring */
- mem_event_get_response(&d->mem_paging, &rsp);
+ mem_event_get_response(&d->mem_event->paging, &rsp);
/* Fix p2m entry if the page was not dropped */
if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
@@ -3210,7 +3210,7 @@ void p2m_mem_access_check(unsigned long
p2m_unlock(p2m);
/* Otherwise, check if there is a memory event listener, and send the
message along */
- res = mem_event_check_ring(d, &d->mem_access);
+ res = mem_event_check_ring(d, &d->mem_event->access);
if ( res < 0 )
{
/* No listener */
@@ -3254,7 +3254,7 @@ void p2m_mem_access_check(unsigned long
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &d->mem_access, &req);
+ mem_event_put_request(d, &d->mem_event->access, &req);
/* VCPU paused, mem event request sent */
}
@@ -3264,7 +3264,7 @@ void p2m_mem_access_resume(struct p2m_do
struct domain *d = p2m->domain;
mem_event_response_t rsp;
- mem_event_get_response(&d->mem_access, &rsp);
+ mem_event_get_response(&d->mem_event->access, &rsp);
/* Unpause domain */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -298,6 +298,10 @@ struct domain *domain_create(
init_status |= INIT_gnttab;
poolid = 0;
+
+ d->mem_event = xzalloc(struct mem_event_per_domain);
+ if ( !d->mem_event )
+ goto fail;
}
if ( arch_domain_create(d, domcr_flags) != 0 )
@@ -329,6 +333,7 @@ struct domain *domain_create(
fail:
d->is_dying = DOMDYING_dead;
atomic_set(&d->refcnt, DOMAIN_DESTROYED);
+ xfree(d->mem_event);
if ( init_status & INIT_arch )
arch_domain_destroy(d);
if ( init_status & INIT_gnttab )
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -201,6 +201,16 @@ struct mem_event_domain
int xen_port;
};
+struct mem_event_per_domain
+{
+ /* Memory sharing support */
+ struct mem_event_domain share;
+ /* Memory paging support */
+ struct mem_event_domain paging;
+ /* Memory access support */
+ struct mem_event_domain access;
+};
+
struct domain
{
domid_t domain_id;
@@ -327,12 +337,8 @@ struct domain
/* Non-migratable and non-restoreable? */
bool_t disable_migrate;
- /* Memory sharing support */
- struct mem_event_domain mem_share;
- /* Memory paging support */
- struct mem_event_domain mem_paging;
- /* Memory access support */
- struct mem_event_domain mem_access;
+ /* Various mem_events */
+ struct mem_event_per_domain *mem_event;
/* Currently computed from union of all vcpu cpu-affinity masks. */
nodemask_t node_affinity;
++++++ 24270-Free_d-mem_event_on_domain_destruction..patch ++++++
changeset: 24270:08716a7f1b74
user: Keir Fraser <keir@xxxxxxx>
date: Wed Nov 30 07:12:41 2011 -0800
files: xen/common/domain.c
description:
Free d->mem_event on domain destruction.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/domain.c | 2 ++
1 file changed, 2 insertions(+)
Index: xen-4.1.2-testing/xen/common/domain.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/domain.c
+++ xen-4.1.2-testing/xen/common/domain.c
@@ -661,6 +661,8 @@ static void complete_domain_destroy(stru
/* Free page used by xen oprofile buffer. */
free_xenoprof_pages(d);
+ xfree(d->mem_event);
+
for ( i = d->max_vcpus - 1; i >= 0; i-- )
if ( (v = d->vcpu[i]) != NULL )
free_vcpu_struct(v);
++++++
24272-xenpaging_Fix_c-s_235070a29c8c3ddf7_update_machine_to_phys_mapping_during_page_deallocation.patch
++++++
changeset: 24272:62ff6a318c5d
user: Keir Fraser <keir@xxxxxxx>
date: Wed Nov 30 16:59:58 2011 -0800
files: xen/common/page_alloc.c
description:
xenpaging: Fix c/s 23507:0a29c8c3ddf7 ("update machine_to_phys_mapping[] during
page deallocation")
This patch clobbers page owner in free_heap_pages() before we are
finished using it. This means that a subsequent test to determine
whether it is safe to avoid safety TLB flushes incorrectly always
determines that it is safe to do so.
The fix is simple: we can defer the original patch's work until after
we are done with the page-owner field.
Thanks to Christian Limpach for spotting this one.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/page_alloc.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/page_alloc.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/page_alloc.c
+++ xen-4.1.2-testing/xen/common/page_alloc.c
@@ -540,10 +540,6 @@ static void free_heap_pages(
for ( i = 0; i < (1 << order); i++ )
{
- /* This page is not a guest frame any more. */
- page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
- set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY);
-
/*
* Cannot assume that count_info == 0, as there are some corner cases
* where it isn't the case and yet it isn't a bug:
@@ -567,6 +563,10 @@ static void free_heap_pages(
pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
if ( pg[i].u.free.need_tlbflush )
pg[i].tlbflush_timestamp = tlbflush_current_time();
+
+ /* This page is not a guest frame any more. */
+ page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
+ set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY);
}
avail[node][zone] += 1 << order;
++++++ 24275-x86-emul-lzcnt.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1322725849 -3600
# Node ID 76ea126f21724b72c120aff59460f7bbe9e6960d
# Parent 07cf778d517fdf661a34027af653a489489bf222
x86/emulator: properly handle lzcnt and tzcnt
These instructions are prefix selected flavors of bsf and bsr
respectively, and hence the presences of the F3 prefix must be handled
in the emulation code in order to avoid running into problems on newer
CPUs.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -990,6 +990,9 @@ static bool_t vcpu_has(
return rc == X86EMUL_OKAY;
}
+#define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops)
+#define vcpu_has_bmi1() vcpu_has(0x00000007, EBX, 3, ctxt, ops)
+
#define vcpu_must_have(leaf, reg, bit) \
generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
#define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
@@ -4114,13 +4117,24 @@ x86_emulate(
dst.val = (uint8_t)src.val;
break;
- case 0xbc: /* bsf */ {
- int zf;
+ case 0xbc: /* bsf or tzcnt */ {
+ bool_t zf;
asm ( "bsf %2,%0; setz %b1"
: "=r" (dst.val), "=q" (zf)
- : "r" (src.val), "1" (0) );
+ : "r" (src.val) );
_regs.eflags &= ~EFLG_ZF;
- if ( zf )
+ if ( (rep_prefix == REPE_PREFIX) && vcpu_has_bmi1() )
+ {
+ _regs.eflags &= ~EFLG_CF;
+ if ( zf )
+ {
+ _regs.eflags |= EFLG_CF;
+ dst.val = op_bytes * 8;
+ }
+ else if ( !dst.val )
+ _regs.eflags |= EFLG_ZF;
+ }
+ else if ( zf )
{
_regs.eflags |= EFLG_ZF;
dst.type = OP_NONE;
@@ -4128,13 +4142,28 @@ x86_emulate(
break;
}
- case 0xbd: /* bsr */ {
- int zf;
+ case 0xbd: /* bsr or lzcnt */ {
+ bool_t zf;
asm ( "bsr %2,%0; setz %b1"
: "=r" (dst.val), "=q" (zf)
- : "r" (src.val), "1" (0) );
+ : "r" (src.val) );
_regs.eflags &= ~EFLG_ZF;
- if ( zf )
+ if ( (rep_prefix == REPE_PREFIX) && vcpu_has_lzcnt() )
+ {
+ _regs.eflags &= ~EFLG_CF;
+ if ( zf )
+ {
+ _regs.eflags |= EFLG_CF;
+ dst.val = op_bytes * 8;
+ }
+ else
+ {
+ dst.val = op_bytes * 8 - 1 - dst.val;
+ if ( !dst.val )
+ _regs.eflags |= EFLG_ZF;
+ }
+ }
+ else if ( zf )
{
_regs.eflags |= EFLG_ZF;
dst.type = OP_NONE;
++++++ 24277-x86-dom0-features.patch ++++++
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1322738484 -3600
# Node ID 1f6b58c8e1ba8d27dfb97f0da96d18d3ad163317
# Parent 89f7273681696022cc44db4f2ec5b22560482869
X86: expose Intel new features to dom0
This patch expose Intel new features to dom0, including
FMA/AVX2/BMI1/BMI2/LZCNT/MOVBE.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -848,8 +848,11 @@ static void pv_cpuid(struct cpu_user_reg
break;
case 7:
if ( regs->ecx == 0 )
- b &= (cpufeat_mask(X86_FEATURE_FSGSBASE) |
- cpufeat_mask(X86_FEATURE_ERMS));
+ b &= (cpufeat_mask(X86_FEATURE_BMI1) |
+ cpufeat_mask(X86_FEATURE_AVX2) |
+ cpufeat_mask(X86_FEATURE_BMI2) |
+ cpufeat_mask(X86_FEATURE_ERMS) |
+ cpufeat_mask(X86_FEATURE_FSGSBASE));
else
b = 0;
a = c = d = 0;
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -93,6 +93,7 @@
#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental Streaming SIMD
Extensions-3 */
#define X86_FEATURE_CID (4*32+10) /* Context ID */
+#define X86_FEATURE_FMA (4*32+12) /* Fused Multiply Add */
#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
#define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capability MSR */
@@ -100,6 +101,7 @@
#define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */
#define X86_FEATURE_X2APIC (4*32+21) /* Extended xAPIC */
+#define X86_FEATURE_MOVBE (4*32+22) /* movbe instruction */
#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
#define X86_FEATURE_TSC_DEADLINE (4*32+24) /* "tdt" TSC Deadline Timer */
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
@@ -144,7 +146,10 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */
#define X86_FEATURE_FSGSBASE (7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_BMI1 (7*32+ 3) /* 1st bit manipulation extensions */
+#define X86_FEATURE_AVX2 (7*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution
Protection */
+#define X86_FEATURE_BMI2 (7*32+ 8) /* 2nd bit manipulation extensions */
#define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
++++++ 24278-x86-dom0-no-PCID.patch ++++++
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1322738563 -3600
# Node ID d9cb04ed55398ea4043c85573460afaf023aa1e9
# Parent 1f6b58c8e1ba8d27dfb97f0da96d18d3ad163317
X86: Disable PCID/INVPCID for dom0
PCID (Process-context identifier) is a facility by which a logical
processor may cache information for multiple linear-address spaces.
INVPCID is an new instruction to invalidate TLB. Refer latest Intel SDM
http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html
We disable PCID/INVPCID for dom0 and pv. Exposing them into dom0 and pv
may result in performance regression, and it would trigger GP or UD
depending on whether platform suppport INVPCID or not.
This patch disables PCID/INVPCID for dom0.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -836,6 +836,7 @@ static void pv_cpuid(struct cpu_user_reg
__clear_bit(X86_FEATURE_CX16 % 32, &c);
__clear_bit(X86_FEATURE_XTPR % 32, &c);
__clear_bit(X86_FEATURE_PDCM % 32, &c);
+ __clear_bit(X86_FEATURE_PCID % 32, &c);
__clear_bit(X86_FEATURE_DCA % 32, &c);
if ( !xsave_enabled(current) )
{
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -97,6 +97,7 @@
#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
#define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capability MSR */
+#define X86_FEATURE_PCID (4*32+17) /* Process Context ID */
#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
#define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */
@@ -151,6 +152,7 @@
#define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution
Protection */
#define X86_FEATURE_BMI2 (7*32+ 8) /* 2nd bit manipulation extensions */
#define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID (7*32+10) /* Invalidate Process Context ID */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
++++++ 24282-x86-log-dirty-bitmap-leak.patch ++++++
# HG changeset patch
# User Tim Deegan <tim@xxxxxxx>
# Date 1322749036 0
# Node ID a06cda9fb25f2d7b7b5c7da170813e4a8bb0cd67
# Parent 75f4e4d9f039ea656051e6dfd73e40d4cb32896b
x86/mm: Don't lose track of the log dirty bitmap
hap_log_dirty_init unconditionally sets the top of the log dirty
bitmap to INVALID_MFN. If there had been a bitmap allocated, it is
then leaked, and the host crashes on an ASSERT when the domain is
cleaned up.
Signed-off-by: Tim Deegan <tim@xxxxxxx>
Acked-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -665,7 +665,6 @@ void paging_log_dirty_init(struct domain
d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty;
d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty;
d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
- d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
}
/* This function fress log dirty bitmap resources. */
@@ -686,6 +685,11 @@ int paging_domain_init(struct domain *d,
if ( (rc = p2m_init(d)) != 0 )
return rc;
+ /* This must be initialized separately from the rest of the
+ * log-dirty init code as that can be called more than once and we
+ * don't want to leak any active log-dirty bitmaps */
+ d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
+
/* The order of the *_init calls below is important, as the later
* ones may rewrite some common fields. Shadow pagetables are the
* default... */
++++++ 24318-x86-mm_Fix_checks_during_foreign_mapping_of_paged_pages.patch
++++++
changeset: 24318:f25a004a6de8
user: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
date: Thu Dec 01 17:21:24 2011 +0000
files: xen/arch/x86/mm.c
description:
x86/mm: Fix checks during foreign mapping of paged pages
Check that the valid mfn is the one we are mapping, not the
mfn of the page table of the foreign domain.
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -3492,8 +3492,9 @@ int do_mmu_update(
{
l1_pgentry_t l1e = l1e_from_intpte(req.val);
p2m_type_t l1e_p2mt;
+ unsigned long l1emfn = mfn_x(
gfn_to_mfn(p2m_get_hostp2m(pg_owner),
- l1e_get_pfn(l1e), &l1e_p2mt);
+ l1e_get_pfn(l1e), &l1e_p2mt));
if ( p2m_is_paged(l1e_p2mt) )
{
@@ -3502,7 +3503,8 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l1e_p2mt &&
!mfn_valid(mfn) )
+ else if ( p2m_ram_paging_in_start == l1e_p2mt &&
+ !mfn_valid(l1emfn) )
{
rc = -ENOENT;
break;
@@ -3534,7 +3536,8 @@ int do_mmu_update(
{
l2_pgentry_t l2e = l2e_from_intpte(req.val);
p2m_type_t l2e_p2mt;
- gfn_to_mfn(p2m_get_hostp2m(pg_owner), l2e_get_pfn(l2e),
&l2e_p2mt);
+ unsigned long l2emfn = mfn_x(
+ gfn_to_mfn(p2m_get_hostp2m(pg_owner), l2e_get_pfn(l2e),
&l2e_p2mt));
if ( p2m_is_paged(l2e_p2mt) )
{
@@ -3543,7 +3546,8 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l2e_p2mt &&
!mfn_valid(mfn) )
+ else if ( p2m_ram_paging_in_start == l2e_p2mt &&
+ !mfn_valid(l2emfn) )
{
rc = -ENOENT;
break;
@@ -3563,7 +3567,8 @@ int do_mmu_update(
{
l3_pgentry_t l3e = l3e_from_intpte(req.val);
p2m_type_t l3e_p2mt;
- gfn_to_mfn(p2m_get_hostp2m(pg_owner), l3e_get_pfn(l3e),
&l3e_p2mt);
+ unsigned long l3emfn = mfn_x(
+ gfn_to_mfn(p2m_get_hostp2m(pg_owner), l3e_get_pfn(l3e),
&l3e_p2mt));
if ( p2m_is_paged(l3e_p2mt) )
{
@@ -3572,7 +3577,8 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l3e_p2mt &&
!mfn_valid(mfn) )
+ else if ( p2m_ram_paging_in_start == l3e_p2mt &&
+ !mfn_valid(l3emfn) )
{
rc = -ENOENT;
break;
@@ -3592,8 +3598,9 @@ int do_mmu_update(
{
l4_pgentry_t l4e = l4e_from_intpte(req.val);
p2m_type_t l4e_p2mt;
+ unsigned long l4emfn = mfn_x(
gfn_to_mfn(p2m_get_hostp2m(pg_owner),
- l4e_get_pfn(l4e), &l4e_p2mt);
+ l4e_get_pfn(l4e), &l4e_p2mt));
if ( p2m_is_paged(l4e_p2mt) )
{
@@ -3602,7 +3609,8 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l4e_p2mt &&
!mfn_valid(mfn) )
+ else if ( p2m_ram_paging_in_start == l4e_p2mt &&
+ !mfn_valid(l4emfn) )
{
rc = -ENOENT;
break;
++++++
24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch
++++++
changeset: 24327:8529bca7a3f0
parent: 24322:6bac46816504
user: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
date: Thu Dec 01 18:14:24 2011 +0000
files: xen/arch/x86/mm/mem_event.c xen/arch/x86/mm/mem_paging.c
xen/arch/x86/mm/p2m.c xen/include/asm-x86/p2m.h xen/include/public/domctl.h
description:
After preparing a page for page-in, allow immediate fill-in of the page contents
p2m_mem_paging_prep ensures that an mfn is backing the paged-out gfn, and
transitions to the next state in the paging state machine for that page.
Foreign mappings of the gfn will now succeed. This is the key idea, as
it allows the pager to now map the gfn and fill in its contents.
Unfortunately, it also allows any other foreign mapper to map the gfn and read
its contents. This is particularly dangerous when the populate is launched
by a foreign mapper in the first place, which will be actively retrying the
map operation and might race with the pager. Qemu-dm being a prime example.
Fix the race by allowing a buffer to be optionally passed in the prep
operation, and having the hypervisor memcpy from that buffer into the newly
prepped page before promoting the gfn type.
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/mem_event.c | 2 +-
xen/arch/x86/mm/mem_paging.c | 2 +-
xen/arch/x86/mm/p2m.c | 32 ++++++++++++++++++++++++++++++--
xen/include/asm-x86/p2m.h | 2 +-
xen/include/public/domctl.h | 8 ++++++--
5 files changed, 39 insertions(+), 7 deletions(-)
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -45,7 +45,7 @@ static int mem_event_enable(struct domai
struct domain *dom_mem_event = current->domain;
struct vcpu *v = current;
unsigned long ring_addr = mec->ring_addr;
- unsigned long shared_addr = mec->shared_addr;
+ unsigned long shared_addr = mec->u.shared_addr;
l1_pgentry_t l1e;
unsigned long gfn;
p2m_type_t p2mt;
--- a/xen/arch/x86/mm/mem_paging.c
+++ b/xen/arch/x86/mm/mem_paging.c
@@ -50,7 +50,7 @@ int mem_paging_domctl(struct domain *d,
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP:
{
unsigned long gfn = mec->gfn;
- rc = p2m_mem_paging_prep(p2m, gfn);
+ rc = p2m_mem_paging_prep(p2m, gfn, mec->u.buffer);
}
break;
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -3093,13 +3093,20 @@ void p2m_mem_paging_populate(struct p2m_
* mfn if populate was called for gfn which was nominated but not evicted. In
* this case only the p2mt needs to be forwarded.
*/
-int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn)
+int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn, uint64_t
buffer)
{
struct page_info *page;
p2m_type_t p2mt;
p2m_access_t a;
mfn_t mfn;
- int ret;
+ int ret, page_extant = 1;
+ const void *user_ptr = (const void *) buffer;
+
+ if ( user_ptr )
+ /* Sanity check the buffer and bail out early if trouble */
+ if ( (buffer & (PAGE_SIZE - 1)) ||
+ (!access_ok(user_ptr, PAGE_SIZE)) )
+ return -EINVAL;
p2m_lock(p2m);
@@ -3119,6 +3126,28 @@ int p2m_mem_paging_prep(struct p2m_domai
if ( unlikely(page == NULL) )
goto out;
mfn = page_to_mfn(page);
+ page_extant = 0;
+ }
+
+ /* If we were given a buffer, now is the time to use it */
+ if ( !page_extant && user_ptr )
+ {
+ void *guest_map;
+ int rc;
+
+ ASSERT( mfn_valid(mfn) );
+ guest_map = map_domain_page(mfn_x(mfn));
+ rc = copy_from_user(guest_map, user_ptr, PAGE_SIZE);
+ unmap_domain_page(guest_map);
+ if ( rc )
+ {
+ gdprintk(XENLOG_ERR, "Failed to load paging-in gfn %lx domain %u "
+ "bytes left %d\n",
+ gfn, p2m->domain->domain_id, rc);
+ ret = -EFAULT;
+ put_page(page); /* Don't leak pages */
+ goto out;
+ }
}
/* Fix p2m mapping */
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -524,7 +524,7 @@ void p2m_mem_paging_drop_page(struct p2m
/* Start populating a paged out frame */
void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn);
/* Prepare the p2m for paging a frame in */
-int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn);
+int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn, uint64_t
buffer);
/* Resume normal operation (in case a domain was paused) */
void p2m_mem_paging_resume(struct p2m_domain *p2m);
#else
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -741,8 +741,12 @@ struct xen_domctl_mem_event_op {
uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */
uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */
- /* OP_ENABLE */
- uint64_aligned_t shared_addr; /* IN: Virtual address of shared page */
+ union {
+ /* OP_ENABLE IN: Virtual address of shared page */
+ uint64_aligned_t shared_addr;
+ /* PAGING_PREP IN: buffer to immediately fill page in */
+ uint64_aligned_t buffer;
+ } u;
uint64_aligned_t ring_addr; /* IN: Virtual address of ring page */
/* Other OPs */
++++++
24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch
++++++
changeset: 24328:8ad47b48047d
user: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
date: Thu Dec 01 18:14:24 2011 +0000
files: tools/libxc/xc_mem_event.c tools/libxc/xc_mem_paging.c
tools/libxc/xenctrl.h
description:
Tools: Libxc wrappers to automatically fill in page oud page contents on prepare
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/libxc/xc_mem_event.c | 4 ++--
tools/libxc/xc_mem_paging.c | 23 +++++++++++++++++++++++
tools/libxc/xenctrl.h | 2 ++
3 files changed, 27 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/libxc/xc_mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_mem_event.c
+++ xen-4.1.2-testing/tools/libxc/xc_mem_event.c
@@ -24,7 +24,7 @@
#include "xc_private.h"
int xc_mem_event_control(xc_interface *xch, domid_t domain_id, unsigned int op,
- unsigned int mode, void *shared_page,
+ unsigned int mode, void *page,
void *ring_page, unsigned long gfn)
{
DECLARE_DOMCTL;
@@ -34,7 +34,7 @@ int xc_mem_event_control(xc_interface *x
domctl.u.mem_event_op.op = op;
domctl.u.mem_event_op.mode = mode;
- domctl.u.mem_event_op.shared_addr = (unsigned long)shared_page;
+ domctl.u.mem_event_op.u.shared_addr = (unsigned long)page;
domctl.u.mem_event_op.ring_addr = (unsigned long)ring_page;
domctl.u.mem_event_op.gfn = gfn;
Index: xen-4.1.2-testing/tools/libxc/xc_mem_paging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_mem_paging.c
+++ xen-4.1.2-testing/tools/libxc/xc_mem_paging.c
@@ -65,6 +65,29 @@ int xc_mem_paging_prep(xc_interface *xch
NULL, NULL, gfn);
}
+int xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
+ unsigned long gfn, void *buffer)
+{
+ int rc;
+
+ if ( !buffer )
+ return -EINVAL;
+
+ if ( ((unsigned long) buffer) & (XC_PAGE_SIZE - 1) )
+ return -EINVAL;
+
+ if ( mlock(buffer, XC_PAGE_SIZE) )
+ return -errno;
+
+ rc = xc_mem_event_control(xch, domain_id,
+ XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP,
+ XEN_DOMCTL_MEM_EVENT_OP_PAGING,
+ buffer, NULL, gfn);
+
+ (void)munlock(buffer, XC_PAGE_SIZE);
+ return rc;
+}
+
int xc_mem_paging_resume(xc_interface *xch, domid_t domain_id, unsigned long
gfn)
{
return xc_mem_event_control(xch, domain_id,
Index: xen-4.1.2-testing/tools/libxc/xenctrl.h
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xenctrl.h
+++ xen-4.1.2-testing/tools/libxc/xenctrl.h
@@ -1742,6 +1742,8 @@ int xc_mem_paging_nominate(xc_interface
unsigned long gfn);
int xc_mem_paging_evict(xc_interface *xch, domid_t domain_id, unsigned long
gfn);
int xc_mem_paging_prep(xc_interface *xch, domid_t domain_id, unsigned long
gfn);
+int xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
+ unsigned long gfn, void *buffer);
int xc_mem_paging_resume(xc_interface *xch, domid_t domain_id,
unsigned long gfn);
++++++
24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch
++++++
changeset: 24329:a8f5faa127c4
user: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
date: Thu Dec 01 18:14:24 2011 +0000
files: tools/xenpaging/xenpaging.c
description:
Teach xenpaging to use the new and non-racy xc_mem_paging_load interface
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/xenpaging/xenpaging.c | 43 +++++++++++++++++++++----------------------
1 file changed, 21 insertions(+), 22 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -45,6 +45,7 @@ static char *dom_path;
static char watch_token[16];
static char *filename;
static int interrupted;
+static void *paging_buffer = NULL;
static void unlink_pagefile(void)
{
@@ -438,6 +439,13 @@ static xenpaging_t *xenpaging_init(int a
goto err;
}
+ paging_buffer = init_page();
+ if ( !paging_buffer )
+ {
+ ERROR("Creating page aligned load buffer");
+ goto err;
+ }
+
return paging;
err:
@@ -649,10 +657,20 @@ static int xenpaging_populate_page(xenpa
unsigned char oom = 0;
DPRINTF("populate_page < gfn %"PRI_xen_pfn" pageslot %d\n", gfn, i);
+
+ /* Read page */
+ ret = read_page(fd, paging_buffer, i);
+ if ( ret != 0 )
+ {
+ ERROR("Error reading page");
+ goto out;
+ }
+
do
{
/* Tell Xen to allocate a page for the domain */
- ret = xc_mem_paging_prep(xch, paging->mem_event.domain_id, gfn);
+ ret = xc_mem_paging_load(xch, paging->mem_event.domain_id, gfn,
+ paging_buffer);
if ( ret != 0 )
{
if ( errno == ENOMEM )
@@ -662,33 +680,14 @@ static int xenpaging_populate_page(xenpa
sleep(1);
continue;
}
- PERROR("Error preparing %"PRI_xen_pfn" for page-in", gfn);
- goto out_map;
+ PERROR("Error loading %"PRI_xen_pfn" during page-in", gfn);
+ goto out;
}
}
while ( ret && !interrupted );
- /* Map page */
- ret = -EFAULT;
- page = xc_map_foreign_pages(xch, paging->mem_event.domain_id,
- PROT_READ | PROT_WRITE, &gfn, 1);
- if ( page == NULL )
- {
- PERROR("Error mapping page %"PRI_xen_pfn": page is null", gfn);
- goto out_map;
- }
-
- /* Read page */
- ret = read_page(fd, page, i);
- if ( ret != 0 )
- {
- PERROR("Error reading page %"PRI_xen_pfn"", gfn);
- goto out;
- }
out:
- munmap(page, PAGE_SIZE);
- out_map:
return ret;
}
++++++
24344-tools-x86_64_Fix_cpuid_inline_asm_to_not_clobber_stacks_red_zone.patch
++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1322836274 28800
# Node ID 72f4e4cb7440c6ab64d4c08dfdc3158112cc95ac
# Parent 109b99239b21275ee2249873dcdb9a413741142d
tools/x86_64: Fix cpuid() inline asm to not clobber stack's red zone
Pushing stuff onto the stack on x86-64 when we do not specify
-mno-red-zone is unsafe. Since the complicated asm is due to register
pressure on i386, we simply implement an all-new simpler alternative
for x86-64.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxxxx>
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1322844002 28800
# Node ID 491c3ebf1d371d03fdd0aabe82b0f422037c67ba
# Parent 72f4e4cb7440c6ab64d4c08dfdc3158112cc95ac
tools/libxc: Fix x86_32 build breakage in previous changeset.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -42,23 +42,23 @@ static int hypervisor_is_64bit(xc_interf
static void cpuid(const unsigned int *input, unsigned int *regs)
{
unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
- asm (
#ifdef __i386__
+ /* Use the stack to avoid reg constraint failures with some gcc flags */
+ asm (
"push %%ebx; push %%edx\n\t"
-#else
- "push %%rbx; push %%rdx\n\t"
-#endif
"cpuid\n\t"
"mov %%ebx,4(%4)\n\t"
"mov %%edx,12(%4)\n\t"
-#ifdef __i386__
"pop %%edx; pop %%ebx\n\t"
-#else
- "pop %%rdx; pop %%rbx\n\t"
-#endif
: "=a" (regs[0]), "=c" (regs[2])
: "0" (input[0]), "1" (count), "S" (regs)
: "memory" );
+#else
+ asm (
+ "cpuid"
+ : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+ : "0" (input[0]), "2" (count) );
+#endif
}
/* Get the manufacturer brand name of the host processor. */
--- a/tools/misc/xen-detect.c
+++ b/tools/misc/xen-detect.c
@@ -35,18 +35,21 @@
static void cpuid(uint32_t idx, uint32_t *regs, int pv_context)
{
- asm volatile (
#ifdef __i386__
-#define R(x) "%%e"#x"x"
-#else
-#define R(x) "%%r"#x"x"
-#endif
- "push "R(a)"; push "R(b)"; push "R(c)"; push "R(d)"\n\t"
+ /* Use the stack to avoid reg constraint failures with some gcc flags */
+ asm volatile (
+ "push %%eax; push %%ebx; push %%ecx; push %%edx\n\t"
"test %1,%1 ; jz 1f ; ud2a ; .ascii \"xen\" ; 1: cpuid\n\t"
"mov %%eax,(%2); mov %%ebx,4(%2)\n\t"
"mov %%ecx,8(%2); mov %%edx,12(%2)\n\t"
- "pop "R(d)"; pop "R(c)"; pop "R(b)"; pop "R(a)"\n\t"
+ "pop %%edx; pop %%ecx; pop %%ebx; pop %%eax\n\t"
: : "a" (idx), "c" (pv_context), "S" (regs) : "memory" );
+#else
+ asm volatile (
+ "test %5,%5 ; jz 1f ; ud2a ; .ascii \"xen\" ; 1: cpuid\n\t"
+ : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+ : "0" (idx), "1" (pv_context), "2" (0) );
+#endif
}
static int check_for_xen(int pv_context)
++++++ 24357-firmware-no-_PS0-_PS3.patch ++++++
References: bnc#711219
# HG changeset patch
# User Xudong Hao <xudong.hao@xxxxxxxxx>
# Date 1323113706 0
# Node ID 832fa3f3543298a7125cd5f996d1e28dd7ba47b1
# Parent 60ea36c0512b779f291bb6c007e1f05c16054ec2
tools/firmware: remove "_PS0/3" Method
Do not expose the ACPI power management "_PS0/3" Method to guest
firmware. According to section 3.4 of the APCI specification 4.0, PCI
device control the device power through its own specification but not
through APCI.
Qemu pushes "_PS0/3" to guest will cause a mess between ACPI PM and
PCI PM as a result of incorrect ACPI table shipped with the guest
BIOS, it may cause a failure of PCI device PM state transition(from
PCI_UNKNOWN to PCI_D0).
Signed-off-by: Xudong Hao <xudong.hao@xxxxxxxxx>
Signed-off-by: Haitao Shan <haitao.shan@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/firmware/hvmloader/acpi/mk_dsdt.c
+++ b/tools/firmware/hvmloader/acpi/mk_dsdt.c
@@ -251,8 +251,6 @@ int main(int argc, char **argv)
* the ACPI event:
* _EJ0: eject a device
* _STA: return a device's status, e.g. enabled or removed
- * Other methods are optional:
- * _PS0/3: put them here for debug purpose
*
* Eject button would generate a general-purpose event, then the
* control method for this event uses Notify() to inform OSPM which
@@ -271,14 +269,6 @@ int main(int argc, char **argv)
stmt("Name", "_ADR, 0x%08x", ((slot & ~7) << 13) | (slot & 7));
/* _SUN == dev */
stmt("Name", "_SUN, 0x%08x", slot >> 3);
- push_block("Method", "_PS0, 0");
- stmt("Store", "0x%02x, \\_GPE.DPT1", slot);
- stmt("Store", "0x80, \\_GPE.DPT2");
- pop_block();
- push_block("Method", "_PS3, 0");
- stmt("Store", "0x%02x, \\_GPE.DPT1", slot);
- stmt("Store", "0x83, \\_GPE.DPT2");
- pop_block();
push_block("Method", "_EJ0, 1");
stmt("Store", "0x%02x, \\_GPE.DPT1", slot);
stmt("Store", "0x88, \\_GPE.DPT2");
++++++ 24358-kexec-compat-overflow.patch ++++++
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
# Date 1323114166 0
# Node ID 9961a6d5356a57685b06f65133c6ade5041e3356
# Parent 832fa3f3543298a7125cd5f996d1e28dd7ba47b1
KEXEC: fix kexec_get_range_compat to fail vocally.
Fail with -ERANGE rather than silently truncating 64bit values (a
physical address and size) into 32bit integers for dom0 to consume.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Simplify the bitwise arithmetic a bit.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -395,6 +395,10 @@ static int kexec_get_range_compat(XEN_GU
ret = kexec_get_range_internal(&range);
+ /* Dont silently truncate physical addresses or sizes. */
+ if ( (range.start | range.size) & ~(unsigned long)(~0u) )
+ return -ERANGE;
+
if ( ret == 0 ) {
XLAT_kexec_range(&compat_range, &range);
if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) )
++++++ 24359-x86-domU-features.patch ++++++
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1323170838 0
# Node ID a0befa32e927cc147aaee9bce42c51f53580a875
# Parent 9961a6d5356a57685b06f65133c6ade5041e3356
X86: expose Intel new features to pv/hvm
Intel recently release some new features, including
FMA/AVX2/BMI1/BMI2/LZCNT/MOVBE.
Refer to http://software.intel.com/file/36945
This patch expose these new features to pv and hvm.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -74,6 +74,7 @@
#define X86_FEATURE_TM2 8 /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 9 /* Supplemental Streaming SIMD Exts-3 */
#define X86_FEATURE_CID 10 /* Context ID */
+#define X86_FEATURE_FMA 12 /* Fused Multiply Add */
#define X86_FEATURE_CX16 13 /* CMPXCHG16B */
#define X86_FEATURE_XTPR 14 /* Send Task Priority Messages */
#define X86_FEATURE_PDCM 15 /* Perf/Debug Capability MSR */
@@ -81,6 +82,7 @@
#define X86_FEATURE_SSE4_1 19 /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 20 /* Streaming SIMD Extensions 4.2 */
#define X86_FEATURE_X2APIC 21 /* x2APIC */
+#define X86_FEATURE_MOVBE 22 /* movbe instruction */
#define X86_FEATURE_POPCNT 23 /* POPCNT instruction */
#define X86_FEATURE_TSC_DEADLINE 24 /* "tdt" TSC Deadline Timer */
#define X86_FEATURE_AES 25 /* AES acceleration instructions */
@@ -125,7 +127,10 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
#define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_BMI1 3 /* 1st group bit manipulation extensions */
+#define X86_FEATURE_AVX2 5 /* AVX2 instructions */
#define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 8 /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */
#endif /* __LIBXC_CPUFEATURE_H */
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -148,7 +148,8 @@ static void intel_xc_cpuid_policy(
int is_64bit = hypervisor_is_64bit(xch) && is_pae;
/* Only a few features are advertised in Intel's 0x80000001. */
- regs[2] &= (is_64bit ? bitmaskof(X86_FEATURE_LAHF_LM) : 0);
+ regs[2] &= (is_64bit ? bitmaskof(X86_FEATURE_LAHF_LM) : 0) |
+ bitmaskof(X86_FEATURE_ABM);
regs[3] &= ((is_pae ? bitmaskof(X86_FEATURE_NX) : 0) |
(is_64bit ? bitmaskof(X86_FEATURE_LM) : 0) |
(is_64bit ? bitmaskof(X86_FEATURE_SYSCALL) : 0) |
@@ -256,9 +257,11 @@ static void xc_cpuid_hvm_policy(
regs[2] &= (bitmaskof(X86_FEATURE_XMM3) |
bitmaskof(X86_FEATURE_PCLMULQDQ) |
bitmaskof(X86_FEATURE_SSSE3) |
+ bitmaskof(X86_FEATURE_FMA) |
bitmaskof(X86_FEATURE_CX16) |
bitmaskof(X86_FEATURE_SSE4_1) |
bitmaskof(X86_FEATURE_SSE4_2) |
+ bitmaskof(X86_FEATURE_MOVBE) |
bitmaskof(X86_FEATURE_POPCNT) |
bitmaskof(X86_FEATURE_AES) |
bitmaskof(X86_FEATURE_F16C) |
@@ -303,7 +306,10 @@ static void xc_cpuid_hvm_policy(
case 0x00000007: /* Intel-defined CPU features */
if ( input[1] == 0 ) {
- regs[1] &= (bitmaskof(X86_FEATURE_SMEP) |
+ regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+ bitmaskof(X86_FEATURE_AVX2) |
+ bitmaskof(X86_FEATURE_SMEP) |
+ bitmaskof(X86_FEATURE_BMI2) |
bitmaskof(X86_FEATURE_ERMS) |
bitmaskof(X86_FEATURE_FSGSBASE));
} else
@@ -427,8 +433,11 @@ static void xc_cpuid_pv_policy(
case 7:
if ( input[1] == 0 )
- regs[1] &= (bitmaskof(X86_FEATURE_FSGSBASE) |
- bitmaskof(X86_FEATURE_ERMS));
+ regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+ bitmaskof(X86_FEATURE_AVX2) |
+ bitmaskof(X86_FEATURE_BMI2) |
+ bitmaskof(X86_FEATURE_ERMS) |
+ bitmaskof(X86_FEATURE_FSGSBASE));
else
regs[1] = 0;
regs[0] = regs[2] = regs[3] = 0;
++++++ 24360-x86-pv-domU-no-PCID.patch ++++++
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1323170884 0
# Node ID d313582d4fa2157332f1d50e599aebca36c41b3b
# Parent a0befa32e927cc147aaee9bce42c51f53580a875
X86: Disable PCID/INVPCID for pv
This patch disable PCID/INVPCID for pv.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -78,6 +78,7 @@
#define X86_FEATURE_CX16 13 /* CMPXCHG16B */
#define X86_FEATURE_XTPR 14 /* Send Task Priority Messages */
#define X86_FEATURE_PDCM 15 /* Perf/Debug Capability MSR */
+#define X86_FEATURE_PCID 17 /* Process Context ID */
#define X86_FEATURE_DCA 18 /* Direct Cache Access */
#define X86_FEATURE_SSE4_1 19 /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 20 /* Streaming SIMD Extensions 4.2 */
@@ -132,5 +133,6 @@
#define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 8 /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID 10 /* Invalidate Process Context ID */
#endif /* __LIBXC_CPUFEATURE_H */
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -427,6 +427,7 @@ static void xc_cpuid_pv_policy(
}
clear_bit(X86_FEATURE_XTPR, regs[2]);
clear_bit(X86_FEATURE_PDCM, regs[2]);
+ clear_bit(X86_FEATURE_PCID, regs[2]);
clear_bit(X86_FEATURE_DCA, regs[2]);
set_bit(X86_FEATURE_HYPERVISOR, regs[2]);
break;
++++++ 24389-amd-fam10-gart-tlb-walk-err.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1323765911 -3600
# Node ID 868d82faf6511de3b3edce18cc6a9e1c938f0b8f
# Parent 7ca56cca09ade16645fb4806be2c5b2b0bc3332b
x86, amd: Disable GartTlbWlkErr when BIOS forgets it
This patch disables GartTlbWlk errors on AMD Fam10h CPUs if the BIOS
forgets to do is (or is just too old). Letting these errors enabled
can cause a sync-flood on the CPU causing a reboot.
The AMD BKDG recommends disabling GART TLB Wlk Error completely.
Based on a Linux patch from Joerg Roedel <joerg.roedel@xxxxxxx>; see e.g.
https://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=patch;h=5bbc097d890409d8eff4e3f1d26f11a9d6b7c07e
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/cpu/mcheck/amd_f10.c
+++ b/xen/arch/x86/cpu/mcheck/amd_f10.c
@@ -46,6 +46,7 @@
#include <asm/msr.h>
#include "mce.h"
+#include "mce_quirks.h"
#include "x86_mca.h"
@@ -91,9 +92,14 @@ amd_f10_handler(struct mc_info *mi, uint
/* AMD Family10 machine check */
enum mcheck_type amd_f10_mcheck_init(struct cpuinfo_x86 *c)
{
+ enum mcequirk_amd_flags quirkflag = mcequirk_lookup_amd_quirkdata(c);
+
if (amd_k8_mcheck_init(c) == mcheck_none)
return mcheck_none;
+ if (quirkflag == MCEQUIRK_F10_GART)
+ mcequirk_amd_apply(quirkflag);
+
x86_mce_callback_register(amd_f10_handler);
return mcheck_amd_famXX;
--- a/xen/arch/x86/cpu/mcheck/mce_amd_quirks.c
+++ b/xen/arch/x86/cpu/mcheck/mce_amd_quirks.c
@@ -29,6 +29,8 @@ static const struct mce_quirkdata mce_am
MCEQUIRK_K7_BANK0 },
{ 0xf /* cpu family */, ANY /* all models */, ANY /* all steppings */,
MCEQUIRK_K8_GART },
+ { 0x10 /* cpu family */, ANY /* all models */, ANY /* all steppings */,
+ MCEQUIRK_F10_GART },
};
enum mcequirk_amd_flags
@@ -54,6 +56,8 @@ mcequirk_lookup_amd_quirkdata(struct cpu
int mcequirk_amd_apply(enum mcequirk_amd_flags flags)
{
+ u64 val;
+
switch (flags) {
case MCEQUIRK_K7_BANK0:
return 1; /* first bank */
@@ -67,6 +71,10 @@ int mcequirk_amd_apply(enum mcequirk_amd
wrmsrl(MSR_IA32_MC4_CTL, ~(1ULL << 10));
wrmsrl(MSR_IA32_MC4_STATUS, 0ULL);
break;
+ case MCEQUIRK_F10_GART:
+ if (rdmsr_safe(MSR_AMD64_MCx_MASK(4), val) == 0)
+ wrmsr_safe(MSR_AMD64_MCx_MASK(4), val | (1 << 10));
+ break;
}
return 0;
--- a/xen/arch/x86/cpu/mcheck/mce_quirks.h
+++ b/xen/arch/x86/cpu/mcheck/mce_quirks.h
@@ -33,8 +33,9 @@ struct mce_quirkdata {
*/
enum mcequirk_amd_flags {
- MCEQUIRK_K7_BANK0 = 0x1,
- MCEQUIRK_K8_GART = 0x2,
+ MCEQUIRK_K7_BANK0 = 1,
+ MCEQUIRK_K8_GART,
+ MCEQUIRK_F10_GART
};
enum mcequirk_intel_flags {
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -98,6 +98,8 @@
#define CMCI_EN (1UL<<30)
#define CMCI_THRESHOLD_MASK 0x7FFF
+#define MSR_AMD64_MC0_MASK 0xc0010044
+
#define MSR_IA32_MC1_CTL 0x00000404
#define MSR_IA32_MC1_CTL2 0x00000281
#define MSR_IA32_MC1_STATUS 0x00000405
@@ -151,6 +153,8 @@
#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
+#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
+
#define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186
++++++ 24391-x86-pcpu-version.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1323766131 -3600
# Node ID 3f4ffde189f228d88e534865023fd795f77f0d05
# Parent 77528dbced3ea74901be6b1aeddedda22bfdaf63
x86: add platform hypercall to retrieve pCPU-s' family, model, and stepping
With the recent hotplug changes to the Xen part of the microcode
loading, this allows the kernel driver to avoid unnecessary calls into
the hypervisor during pCPU hot-enabling: Knowing that the hypervisor
retains the data for already booted CPUs, only data for CPUs with a
different signature needs to be passed down. Since the microcode
loading code can be pretty verbose, avoiding to invoke it can make the
log much easier to look at in case of problems.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -469,6 +469,42 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
}
break;
+ case XENPF_get_cpu_version:
+ {
+ struct xenpf_pcpu_version *ver = &op->u.pcpu_version;
+
+ if ( !get_cpu_maps() )
+ {
+ ret = -EBUSY;
+ break;
+ }
+
+ if ( (ver->xen_cpuid >= NR_CPUS) || !cpu_online(ver->xen_cpuid) )
+ {
+ memset(ver->vendor_id, 0, sizeof(ver->vendor_id));
+ ver->family = 0;
+ ver->model = 0;
+ ver->stepping = 0;
+ }
+ else
+ {
+ const struct cpuinfo_x86 *c = &cpu_data[ver->xen_cpuid];
+
+ memcpy(ver->vendor_id, c->x86_vendor_id, sizeof(ver->vendor_id));
+ ver->family = c->x86;
+ ver->model = c->x86_model;
+ ver->stepping = c->x86_mask;
+ }
+
+ ver->max_present = cpumask_last(&cpu_present_map);
+
+ put_cpu_maps();
+
+ if ( copy_field_to_guest(u_xenpf_op, op, u.pcpu_version) )
+ ret = -EFAULT;
+ }
+ break;
+
case XENPF_cpu_online:
{
int cpu = op->u.cpu_ol.cpuid;
--- a/xen/arch/x86/x86_64/platform_hypercall.c
+++ b/xen/arch/x86/x86_64/platform_hypercall.c
@@ -3,7 +3,7 @@
*/
#include <xen/config.h>
-#include <xen/types.h>
+#include <xen/lib.h>
#include <compat/platform.h>
DEFINE_XEN_GUEST_HANDLE(compat_platform_op_t);
@@ -26,8 +26,13 @@ DEFINE_XEN_GUEST_HANDLE(compat_platform_
#define xen_processor_power_t compat_processor_power_t
#define set_cx_pminfo compat_set_cx_pminfo
-#define xenpf_pcpuinfo compat_pf_pcpuinfo
-#define xenpf_pcpuinfo_t compat_pf_pcpuinfo_t
+#define xen_pf_pcpuinfo xenpf_pcpuinfo
+CHECK_pf_pcpuinfo;
+#undef xen_pf_pcpuinfo
+
+#define xen_pf_pcpu_version xenpf_pcpu_version
+CHECK_pf_pcpu_version;
+#undef xen_pf_pcpu_version
#define xenpf_enter_acpi_sleep compat_pf_enter_acpi_sleep
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -425,6 +425,21 @@ struct xenpf_pcpuinfo {
typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t);
+#define XENPF_get_cpu_version 48
+struct xenpf_pcpu_version {
+ /* IN */
+ uint32_t xen_cpuid;
+ /* OUT */
+ /* The maxium cpu_id that is present */
+ uint32_t max_present;
+ char vendor_id[12];
+ uint32_t family;
+ uint32_t model;
+ uint32_t stepping;
+};
+typedef struct xenpf_pcpu_version xenpf_pcpu_version_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t);
+
#define XENPF_cpu_online 56
#define XENPF_cpu_offline 57
struct xenpf_cpu_ol
@@ -468,6 +483,7 @@ struct xen_platform_op {
struct xenpf_getidletime getidletime;
struct xenpf_set_processor_pminfo set_pminfo;
struct xenpf_pcpuinfo pcpu_info;
+ struct xenpf_pcpu_version pcpu_version;
struct xenpf_cpu_ol cpu_ol;
struct xenpf_cpu_hotadd cpu_add;
struct xenpf_mem_hotadd mem_add;
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -61,6 +61,17 @@
! memory_reservation memory.h
! pod_target memory.h
? physdev_pci_mmcfg_reserved physdev.h
+! pct_register platform.h
+! power_register platform.h
+? processor_csd platform.h
+! processor_cx platform.h
+! processor_flags platform.h
+! processor_performance platform.h
+! processor_power platform.h
+? processor_px platform.h
+! psd_package platform.h
+? xenpf_pcpuinfo platform.h
+? xenpf_pcpu_version platform.h
! sched_poll sched.h
? sched_remote_shutdown sched.h
? sched_shutdown sched.h
@@ -73,12 +84,3 @@
! vcpu_set_singleshot_timer vcpu.h
? xenoprof_init xenoprof.h
? xenoprof_passive xenoprof.h
-! power_register platform.h
-? processor_csd platform.h
-! processor_cx platform.h
-! processor_flags platform.h
-! processor_power platform.h
-! pct_register platform.h
-? processor_px platform.h
-! psd_package platform.h
-! processor_performance platform.h
++++++ 24411-x86-ucode-AMD-Fam15.patch ++++++
References: bnc#736824
# HG changeset patch
# User Christoph Egger <Christoph.Egger@xxxxxxx>
# Date 1323943209 -3600
# Node ID ca5f588bd203c9207e0988fcc80f43d83eed5420
# Parent 25f8952313ae683f41b634163f62651185d7be38
x86/ucode: fix for AMD Fam15 CPUs
Remove hardcoded maximum size a microcode patch can have. This is
dynamic now.
The microcode patch for family15h can be larger than 2048 bytes and
gets silently truncated.
Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx>
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
--- 2011-12-14.orig/xen/arch/x86/microcode_amd.c 2011-12-15
14:55:15.000000000 +0100
+++ 2011-12-14/xen/arch/x86/microcode_amd.c 2011-12-15 14:59:47.000000000
+0100
@@ -27,18 +27,10 @@
#include <asm/processor.h>
#include <asm/microcode.h>
-#define pr_debug(x...) ((void)0)
-
#define UCODE_MAGIC 0x00414d44
#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
#define UCODE_UCODE_TYPE 0x00000001
-#define UCODE_MAX_SIZE (2048)
-#define DEFAULT_UCODE_DATASIZE (896)
-#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd))
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
-#define DWSIZE (sizeof(uint32_t))
-
/* serialize access to the physical write */
static DEFINE_SPINLOCK(microcode_update_lock);
@@ -99,7 +91,7 @@ static int microcode_fits(void *mc, int
}
if ( mc_header->patch_id <= uci->cpu_sig.rev )
- return -EINVAL;
+ return 0;
printk(KERN_DEBUG "microcode: CPU%d found a matching microcode "
"update with version 0x%x (current=0x%x)\n",
@@ -147,8 +139,12 @@ static int apply_microcode(int cpu)
return 0;
}
-static int get_next_ucode_from_buffer_amd(void *mc, const void *buf,
- size_t size, unsigned long *offset)
+static int get_next_ucode_from_buffer_amd(
+ void **mc,
+ size_t *mc_size,
+ const void *buf,
+ size_t size,
+ unsigned long *offset)
{
struct microcode_header_amd *mc_header;
size_t total_size;
@@ -181,8 +177,17 @@ static int get_next_ucode_from_buffer_am
return -EINVAL;
}
- memset(mc, 0, UCODE_MAX_SIZE);
- memcpy(mc, (const void *)(&bufp[off + 8]), total_size);
+ if ( *mc_size < total_size )
+ {
+ xfree(*mc);
+ *mc = xmalloc_bytes(total_size);
+ if ( !*mc )
+ return -ENOMEM;
+ *mc_size = total_size;
+ }
+ else if ( *mc_size > total_size )
+ memset(*mc + total_size, 0, *mc_size - total_size);
+ memcpy(*mc, mc_header, total_size);
*offset = off + total_size + 8;
@@ -236,10 +241,10 @@ static int cpu_request_microcode(int cpu
{
const uint32_t *buf_pos;
unsigned long offset = 0;
- int error = 0;
- int ret;
+ int error;
struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu);
void *mc;
+ size_t mc_size;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
@@ -260,7 +265,9 @@ static int cpu_request_microcode(int cpu
return -EINVAL;
}
- mc = xmalloc_bytes(UCODE_MAX_SIZE);
+ /* Size of 1st microcode patch in bytes */
+ mc_size = buf_pos[offset / sizeof(*buf_pos) + 1];
+ mc = xmalloc_bytes(mc_size);
if ( mc == NULL )
{
printk(KERN_ERR "microcode: error! "
@@ -276,24 +284,33 @@ static int cpu_request_microcode(int cpu
* It's possible the data file has multiple matching ucode,
* lets keep searching till the latest version
*/
- while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) ==
0)
+ while ( (error = get_next_ucode_from_buffer_amd(&mc, &mc_size, buf, size,
+ &offset)) == 0 )
{
+ uci->mc.mc_amd = mc;
+
error = microcode_fits(mc, cpu);
if (error <= 0)
continue;
error = apply_microcode(cpu);
if (error == 0)
+ {
+ error = 1;
break;
+ }
}
/* On success keep the microcode patch for
* re-apply on resume.
*/
- if (error) {
+ if ( error <= 0 )
+ {
xfree(mc);
mc = NULL;
}
+ else
+ error = 0;
uci->mc.mc_amd = mc;
out:
++++++ 24412-x86-AMD-errata-model-shift.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1323955725 -3600
# Node ID 99caac2e35df41cbece606f663cb5570a62613c3
# Parent ca5f588bd203c9207e0988fcc80f43d83eed5420
x86/AMD: use correct shift count when merging model and stepping
... for legacy errata matching.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -216,7 +216,7 @@ int cpu_has_amd_erratum(const struct cpu
}
/* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 8) | cpu->x86_mask;
+ ms = (cpu->x86_model << 4) | cpu->x86_mask;
while ((range = va_arg(ap, int))) {
if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
(ms >= AMD_MODEL_RANGE_START(range)) &&
++++++ 24417-amd-erratum-573.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1324046740 -3600
# Node ID 1452fb248cd513832cfbbd1100b9b72a0dde7ea6
# Parent 01c8b27e3d7d4ad2b469be9922bb04b5eb0195e8
x86/emulator: workaround for AMD erratum 573
The only cases where we might end up emulating fsincos (as any other
x87 operations without memory operands) are
- when a HVM guest is in real mode (not applicable on AMD)
- between two half page table updates in PAE mode (unlikely, and not
doing the emulation here does affect only performance, not
correctness)
- when a guest maliciously (or erroneously) modifies an (MMIO or page
table update) instruction under emulation (unspecified behavior)
Hence, in order to avoid the erratum to cause harm to the entire host,
don't emulate fsincos on the affected AMD CPU families.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -3,5 +3,7 @@
#include <string.h>
#include <public/xen.h>
+#define cpu_has_amd_erratum(nr) 0
+
#include "x86_emulate/x86_emulate.h"
#include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -10,8 +10,15 @@
*/
#include <asm/x86_emulate.h>
+#include <asm/processor.h> /* current_cpu_info */
+#include <asm/amd.h> /* cpu_has_amd_erratum() */
/* Avoid namespace pollution. */
#undef cmpxchg
+#undef cpuid
+#undef wbinvd
+
+#define cpu_has_amd_erratum(nr) \
+ cpu_has_amd_erratum(¤t_cpu_data, AMD_ERRATUM_##nr)
#include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2621,6 +2621,9 @@ x86_emulate(
case 0xd9: /* FPU 0xd9 */
switch ( modrm )
{
+ case 0xfb: /* fsincos */
+ fail_if(cpu_has_amd_erratum(573));
+ /* fall through */
case 0xc0 ... 0xc7: /* fld %stN */
case 0xc8 ... 0xcf: /* fxch %stN */
case 0xd0: /* fnop */
@@ -2646,7 +2649,6 @@ x86_emulate(
case 0xf8: /* fprem */
case 0xf9: /* fyl2xp1 */
case 0xfa: /* fsqrt */
- case 0xfb: /* fsincos */
case 0xfc: /* frndint */
case 0xfd: /* fscale */
case 0xfe: /* fsin */
--- a/xen/include/asm-x86/amd.h
+++ b/xen/include/asm-x86/amd.h
@@ -138,6 +138,12 @@
AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), \
AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf))
+#define AMD_ERRATUM_573
\
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x10, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf))
+
struct cpuinfo_x86;
int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
++++++ 24429-mceinj-tool.patch ++++++
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1324219200 0
# Node ID 9587ccc2ae3192fd5625a87fa58e840377471867
# Parent 5b4b7e565ab82b06940889f2be7e30042b2881fc
X86-MCE: fix a bug of xen-mceinj tool
Fix a bug of xen-mceinj tool which used to test mce by software way.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/tests/mce-test/tools/xen-mceinj.c
+++ b/tools/tests/mce-test/tools/xen-mceinj.c
@@ -134,8 +134,12 @@ static int mca_cpuinfo(xc_interface *xc_
{
struct xen_mc mc;
+ memset(&mc, 0, sizeof(struct xen_mc));
+
mc.cmd = XEN_MC_physcpuinfo;
- if (xc_mca_op(xc_handle, &mc))
+ mc.interface_version = XEN_MCA_INTERFACE_VERSION;
+
+ if (!xc_mca_op(xc_handle, &mc))
return mc.u.mc_physcpuinfo.ncpus;
else
return 0;
++++++ 24447-x86-TXT-INIT-SIPI-delay.patch ++++++
# HG changeset patch
# User Gang Wei <gang.wei@xxxxxxxxx>
# Date 1325153274 0
# Node ID a7b2610b8e5c9a15b1f5de9a3eabf7f19d0b4199
# Parent 2863b2f43a3bc9268885379d6fd55ed325b8c0a2
X86: Add a delay between INIT & SIPIs for tboot AP bring-up in X2APIC case
Without this delay, Xen could not bring APs up while working with
TXT/tboot, because tboot needs some time in APs to handle INIT before
becoming ready for receiving SIPIs (this delay was removed as part of
c/s 23724 by Tim Deegan).
Signed-off-by: Gang Wei <gang.wei@xxxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -42,6 +42,7 @@
#include <asm/msr.h>
#include <asm/mtrr.h>
#include <asm/time.h>
+#include <asm/tboot.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
@@ -473,6 +474,18 @@ static int wakeup_secondary_cpu(int phys
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
} while ( send_status && (timeout++ < 1000) );
}
+ else if ( tboot_in_measured_env() )
+ {
+ /*
+ * With tboot AP is actually spinning in a mini-guest before
+ * receiving INIT. Upon receiving INIT ipi, AP need time to VMExit,
+ * update VMCS to tracking SIPIs and VMResume.
+ *
+ * While AP is in root mode handling the INIT the CPU will drop
+ * any SIPIs
+ */
+ udelay(10);
+ }
/*
* Should we send STARTUP IPIs ?
++++++ 24448-x86-pt-irq-leak.patch ++++++
References: bnc#735806
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1325492779 -3600
# Node ID 3a22ed3ec534799b3cab55b0dc0a7380e701ecbe
# Parent a7b2610b8e5c9a15b1f5de9a3eabf7f19d0b4199
x86/passthrough: don't leak guest IRQs
As unmap_domain_pirq_emuirq() fails on a never mapped pIRQ, it must not
be called for the non-emu-IRQ case (to prevent the entire unmap
operation failing).
Based on a suggestion from Stefano.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Tested-by: Yongjie Ren <yongjie.ren@xxxxxxxxx>
Acked-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -228,7 +228,8 @@ static int physdev_unmap_pirq(struct phy
if ( is_hvm_domain(d) )
{
spin_lock(&d->event_lock);
- ret = unmap_domain_pirq_emuirq(d, unmap->pirq);
+ if ( domain_pirq_to_emuirq(d, unmap->pirq) != IRQ_UNBOUND )
+ ret = unmap_domain_pirq_emuirq(d, unmap->pirq);
spin_unlock(&d->event_lock);
if ( unmap->domid == DOMID_SELF || ret )
goto free_domain;
++++++ 24478-libxl_add_feature_flag_to_xenstore_for_XS_RESET_WATCHES.patch
++++++
changeset: 24478:ef99b8571a6f
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Jan 05 19:40:40 2012 +0100
files: tools/libxl/libxl_create.c tools/python/xen/xend/XendDomainInfo.py
description:
libxl: add feature flag to xenstore for XS_RESET_WATCHES
Tell guest about availibilty of xenstoreds XS_RESET_WATCHES function.
Guests can not issue this command unconditionally because some buggy
toolstacks (such as EC2) do not ignore unknown commands properly.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
Acked-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
tools/libxl/libxl_create.c | 1 +
tools/python/xen/xend/XendDomainInfo.py | 1 +
2 files changed, 2 insertions(+)
Index: xen-4.1.2-testing/tools/libxl/libxl_create.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxl/libxl_create.c
+++ xen-4.1.2-testing/tools/libxl/libxl_create.c
@@ -382,6 +382,7 @@ retry_transaction:
libxl__xs_writev(&gc, t, libxl__sprintf(&gc, "%s/platform", dom_path),
info->platformdata);
xs_write(ctx->xsh, t, libxl__sprintf(&gc,
"%s/control/platform-feature-multiprocessor-suspend", dom_path), "1", 1);
+ xs_write(ctx->xsh, t, libxl__sprintf(&gc,
"%s/control/platform-feature-xs_reset_watches", dom_path), "1", 1);
if (!xs_transaction_end(ctx->xsh, t, 0)) {
if (errno == EAGAIN) {
t = 0;
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1770,6 +1770,7 @@ class XendDomainInfo:
f('store/port', self.store_port)
f('store/ring-ref', self.store_mfn)
+ f('control/platform-feature-xs_reset_watches', True)
if arch.type == "x86":
f('control/platform-feature-multiprocessor-suspend', True)
++++++ 2XXXX-vif-bridge.patch ++++++
# HG changeset patch
# User Jim Fehlig <jfehlig@xxxxxxxx>
# Date 1319581952 21600
# Node ID 74da2a3a1db1476d627f42e4a99e9e720cc6774d
# Parent 6c583d35d76dda2236c81d9437ff9d57ab02c006
Prevent vif-bridge from adding user-created tap interfaces to a bridge
Exit vif-bridge script if there is no device info in xenstore, preventing
it from adding user-created taps to bridges.
Signed-off-by: Jim Fehlig <jfehlig@xxxxxxxx>
diff -r 6c583d35d76d -r 74da2a3a1db1 tools/hotplug/Linux/vif-bridge
--- a/tools/hotplug/Linux/vif-bridge Thu Oct 20 15:36:01 2011 +0100
+++ b/tools/hotplug/Linux/vif-bridge Tue Oct 25 16:32:32 2011 -0600
@@ -31,6 +31,13 @@
dir=$(dirname "$0")
. "$dir/vif-common.sh"
+
+domu=$(xenstore_read_default "$XENBUS_PATH/domain" "")
+if [ -z "$domu" ]
+then
+ log debug "No device details in $XENBUS_PATH, exiting."
+ exit 0
+fi
bridge=${bridge:-}
bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge")
++++++ 32on64-extra-mem.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2932,7 +2932,7 @@ class XendDomainInfo:
self.guest_bitsize = self.image.getBitSize()
# Make sure there's enough RAM available for the domain
- balloon.free(memory + shadow + vtd_mem, self)
+ balloon.free(memory + shadow + vtd_mem + 512, self)
# Set up the shadow memory
shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
++++++ README.SuSE ++++++
README for the Xen packages
===========================
This file contains SUSE-specific instructions and suggestions for using Xen.
For more in-depth documentation of using Xen on SUSE, consult the
virtualization chapter in the SLES or SUSE Linux manual, or read up-to-date
virtualization information, including a list of known issues, at
http://www.novell.com/documentation/vmserver/.
For more complete documentation on Xen itself, please install one of the
xen-doc-* packages and read the documentation installed into
/usr/share/doc/packages/xen/.
About
-----
Xen allows you to run multiple virtual machines on a single physical machine.
See the Xen homepage for more information:
http://www.cl.cam.ac.uk/research/srg/netos/xen/
If you want to use Xen, you need to install the Xen hypervisor and a number of
supporting packages. During the initial SUSE installation (or when installing
from YaST) check-mark the "Xen Virtual Machine Host Server" pattern. If,
instead, you wish to install Xen manually later, click on the "Install
Hypervisor and Tools" icon in YaST.
If you want to install and manage VMs graphically, be sure to install a
graphical desktop environment like KDE or GNOME. The following optional
packages are needed to manage VMs graphically. Note that "Install Hypervisor
and Tools" installs all the packages below:
vm-install (Optional, to install VMs)
virt-manager (Optional, to manage VMs graphically)
virt-viewer (Optional, to view VMs outside virt-manager)
Additional packages:
nbd-client (Optional, to access virtual disks stored on NBD servers)
open-iscsi (Optional, to access virtual disks stored on iSCSI targets)
You then need to reboot your machine. Instead of booting a normal Linux
kernel, you will boot the Xen hypervisor and a slightly changed Linux kernel.
This Linux kernel runs in the first virtual machine and will drive most of
your hardware.
This approach is called paravirtualization, since it is a partial
virtualization (the Linux kernel needs to be changed slightly, to make the
virtualization easier). It results in very good performance (consult
http://www.cl.cam.ac.uk/research/srg/netos/xen/performance.html) but has the
downside of unchanged operating systems not being supported. However, new
hardware features (e.g., Intel VT and AMD-V) are overcoming this limitation.
Terminology
-----------
The Xen open-source community has a number of terms that you should be
familiar with.
A "domain" is Xen's term for a virtual machine.
"Domain 0" is the first virtual machine. It can control all other virtual
machines. It also (usually) controls the physical hardware. A kernel used in
domain 0 may sometimes be referred to as a dom0 kernel.
"Domain U" is any virtual machine other than domain 0. The "U" indicates it
is unprivileged (that is, it cannot control other domains). A kernel used in
an unprivileged domain may be referred to as a domU kernel.
Novell documentation will use the more industry-standard term "virtual
machine", or "VM", rather than "domain" where possible. And to that end,
domain 0 will be called the "virtual machine server", since it essentially the
server on which the other VMs run. All other domains are simply "virtual
machines".
The acronym "HVM" refers to a hardware-assisted virtual machine. These are
VMs that have not been modified (e.g., Windows) and therefore need hardware
support such as Intel VT or AMD-V to run on Xen.
Kernels
-------
Xen supports two kinds of kernels: A privileged kernel (which boots the
machine, controls other VMs, and usually controls all your physical hardware)
and unprivileged kernels (which can't control other VMs, and usually don't need
drivers for physical hardware). The privileged kernel boots first (as the VM
server); an unprivileged kernel is used in all subsequent VMs.
The VM server takes control of the boot process after Xen has initialized the
CPU and the memory. This VM contains a privileged kernel and all the hardware
drivers.
For the other virtual machines, you usually don't need the hardware drivers.
(It is possible to hide a PCI device from the VM server and re-assign it to
another VM for direct access, but that is a more advanced topic.) Instead you
use virtual network and block device drivers in the unprivileged VMs to access
the physical network and block drivers in the VM server.
For simplicity, SUSE ships a single Xen-enabled Linux kernel, rather than
separate privileged and unprivileged kernels. As most of the hardware drivers
are modules anyway, using this kernel as an unprivileged kernel has very
little extra overhead.
The kernel is contained in the kernel-xen package, which you need to install to
use Xen.
Booting
-------
If you installed Xen during the initial SUSE installation, or installed one
of the kernel-xen* packages later, a "XEN" option should exist in your Grub
bootloader. Select that to boot SUSE on top of Xen.
If you want to add additional entries, or modify the existing ones, you will
have to edit Grub yourself. All Xen entries in the Grub configuration file
(usually /boot/grub/menu.lst) look something like this:
title XEN
root (hd0,5)
kernel /xen.gz
module /vmlinuz-xen <parameters>
module /initrd-xen
Replace (hd0,5) with the partition that holds your /boot directory in
grub-speak, e.g., hda1 -> (hd0,0) and sda5 -> (hd2,4).
Normally, xen.gz requires no parameters. If you want to add parameters,
see below.
Replace "<parameters>" with the kernel parameters that you want to pass to
your kernel. These should be very similar, if not identical, to those passed
to a normal kernel that you boot on bare iron.
Once you have booted this configuration successfully, you are running Xen with
a privileged kernel on top of it.
Xen Boot Parameters
-------------------
Normally, xen.gz requires no parameters. However, in special cases (such as
debugging or a dedicated VM server) you may wish to pass it parameters.
In particular in case of problems you may want to attach a serial terminal and
direct Xen to send its output not only to the screen, but also to that
terminal. In order to do so, add "console=vga,com<n> com<n>=<baud>" (without
the quotes and with <n> replaced by the serial port number - generally 1 or 2 -
and with <baud> replaced by the baud rate the serial terminal is configured
for) to the xen.gz line.
For a more complete discussion of possible parameters, see the user
documentation in the xen-doc-* packages.
Init scripts
------------
Before you can create additional VMs (or use any other xm command) xend must
be running. This init script is part of the xen-tools package, and it is
activated at installation time. You can (de)activate it using insserv. You
can also start it manually with "rcxend start".
The deprecated xendomains script is also shipped, but disabled by default. In
SLES 10 GA (xen 3.0.2) and older, this script allowed VMs to be started and
stopped automatically when the machine starts and stops. In SLES 10 SP1 (xen
3.0.4) and newer, the proper way to start and stop VMs automatically is to set
the "on_xend_start" and "on_xend_stop" settings in the VMs configuration.
(Deprecating xendomains was necessary because xend, not the configuration file
in /etc/xen/vm, is now the authoritative source for the VM's settings.)
Consult the online documentation for more information.
Creating a VM with vm-install
-----------------------------
The vm-install program (part of the vm-install package, and accessible
through YaST's Control Center) is the recommended method to create VMs. This
program handles creating both the VM's configuration file and disk(s). It can
help install any operating system, not just SUSE.
wizard will start. Otherwise, a text wizard will start.
Each VM needs to have its own root filesystem. The root filesystem can live
on a block device (e.g., a hard disk partition, or an LVM2 or EVMS volume) or
in a file that holds the filesystem image.
VMs can share filesystems, such as /usr or /opt, that are mounted read-only
from _all_ VMs. Never try to share a filesystem that is mounted read-write;
filesystem corruption will result. For sharing writable data between VMs, use
NFS or other networked or cluster filesystems.
When defining the virtual network adapter(s), we recommend using a static MAC
for the VM rather than allowing Xen to randomly select one each time the VM
boots. (See "Network Troubleshooting" below.) XenSource has been allocated a
range of MAC addresses with the OUI of 00-16-3E. By using MACs from this
range you can be sure they will not conflict with any physical adapters.
Once you have the VM configured, click "OK". The wizard will now create a
configuration file for the VM, and create a disk image. The disk image will
exist in /var/lib/xen/images, and a corresponding config file will exist in
/etc/xen/vm. The operating system's installation program will then run within
the VM.
When the VM shuts down (because the installation -- or at least the first
stage of it -- is done), the wizard finalizes the VM's configuration and
restarts the VM.
The creation of VMs can be automated; read the vm-install man page for more
details. The installation of an OS within the VM can be automated if the OS
supports it.
Creating a VM Manually
----------------------
If you create a VM manually (as opposed to using vm-install, which is the
recommended way), you will need to create a disk (or reuse an existing one)
and a configuration file.
If you are using a disk or disk image that is already installed with an
operating system, you'll probably need to replace its kernel with a
Xen-enabled kernel.
The kernel and ramdisk used to bootstrap the VM must match any kernel modules
that might be present in the VM's disk. It is possible to manually copy the
kernel and ramdisk from the VM's disk (for example, after updating the kernel
within that VM) to the VM server's filesystem. However, an easier (and less
error-prone) method is to use something called the "domUloader". Before a new
VM is started, this loader automatically copies the kernel and ramdisk into
the VM server's filesystem, so that it can be used to bootstrap the new VM.
See /etc/xen/examples/xmexample.domUloader for an example.
Next, make a copy of one of the /etc/xen/examples/* files, and modify it to
suit your needs. For paravirtualized VMs, start with
/etc/xen/examples/xmexample1; for fully virtualized VMs, start with
/etc/xen/examples/xmexample.hvm. You'll need to change (at very least) the
"name" and "disk" parameters.
Managing Virtual Machines
-------------------------
VMs can be managed from the command line or from virt-manager.
Before a VM can be started, xend must be informed of it. vm-install will
automatically import new VM configurations into xend. However, if you copy a
VM from another machine, or manually create a VM configuration file, you will
need to import it into xend with a command like:
xm new my-vm
If your VM's configuration file is not located in /etc/xen/vm, you must
specify the full path. This imports the configuration into xend (and
therefore virt-manager becomes aware of it, also).
Now to start the VM:
xm start my-vm
or start it graphically from virt-manager.
Have a look at running sessions with "xm list". Note the ID of the newly
created VM. Attach to the VM's text console with "xm console <ID>" (replacing
ID with the VM's ID). Attaching to multiple VM consoles is most conveniently
done with the terminal multiplexer "screen".
Have a look at the other xm commands by typing "xm help". Note that most xm
commands must be done as root.
Using the Mouse via VNC in Fully Virtual Mode
---------------------------------------------
In a fully virtualized VM, the mouse may be emulated as a PS/2 mouse, USB
mouse, or USB tablet. The vm-install tool selects the best emulation that is
known to be automatically detected and supported by the operating system.
However, when accessing some fully virtualized operating systems via VNC, the
mouse may be difficult to control if the VM is emulating a PS/2 mouse. PS/2
provides mouse deltas, but VNC only provides absolute coordinates. In such
cases, you may want to manually switch the operating system and VM to use a
USB tablet.
Emulation of a SummaSketch graphics tablet is provided for this reason. To
use the Summa emulation, you will need to configure your fully virtualized OS.
Note that the virtual tablet is connected to the second virtual serial port
(/dev/ttyS1 or COM2).
Most Linux distributions ship with appropriate drivers, and only need to be
configured. To configure gpm, edit /etc/sysconfig/mouse and add these lines:
MOUSETYPE="summa"
XMOUSETYPE="SUMMA"
DEVICE=/dev/ttyS1
The format and location of your configuration file could vary depending upon
your Linux distribution. The goal is to run the gpm daemon as follows:
gpm -t summa -m /dev/ttyS1
X also needs to be configured to use the Summa emulation. Add the following
stanza to /etc/X11/xorg.conf, or use your distribution's tools to add these
settings:
Section "InputDevice"
Identifier "Mouse0"
Driver "summa"
Option "Device" "/dev/ttyS1"
Option "InputFashion" "Tablet"
Option "Mode" "Absolute"
Option "Name" "EasyPen"
Option "Compatible" "True"
Option "Protocol" "Auto"
Option "SendCoreEvents" "on"
Option "Vendor" "GENIUS"
EndSection
After making these changes, restart gpm and X.
To ensure the VM is emulating a USB tablet, add these lines to the
configuration file in /etc/xen/vm:
usb=1
usbdevice='tablet'
Then re-import the configuration into xend:
xm new my-vm
HVM Console in Fully Virtual Mode
---------------------------------
When running a VM in fully virtual mode, a special console is available that
provides some additional ways to control the VM. Press Ctrl-Alt-2 to access
the console; press Ctrl-Alt-1 to return to the VM. While at the console,
type "help" for help.
The two most important commands are "send-key" and "change". The "send-key"
command allows you to send any key sequence to the VM, which might otherwise
be intercepted by your local window manager.
The "change" command allows the target of a block device to be changed; for
example, use it to change from one CD ISO to another. Some versions of Xen
have this command disabled for security reasons. Consult the online
documentation for workarounds.
Networking
----------
Your virtual machines become much more useful if you can reach them via the
network. Starting with openSUSE11.1 and SLE11, networking in domain 0 is
configured and managed via YaST. The yast2-networking module can be used
to create and manage bridged networks. During initial installation, a bridged
networking proposal will be presented if the "Xen Virtual Machine Host Server"
pattern is selected. The proposal will also be presented if you install Xen
after initial installation using the "Install Hypervisor and Tools" module in
YaST.
The default proposal creates a virtual bridge in domain 0 for each active
ethernet device, enslaving the device to the bridge. Consider a machine
containing two ethernet devices (eth0 and eth1), both with active carriers.
YaST will create br0 and br1, enslaving the eth0 and eth1 devices repectively.
VMs get a virtual network interface (e.g. eth0), which is visible in domain 0
as vifN.0 and connected to the bridge. This means that if you set up an IP
address in the VMs belonging to the same subnet as br0 from your domain 0,
you'll be able to communicate not only with the other slave VMs, but also with
domain 0 and with the external network. If you have a DHCP server running in
your network, your VMs should succeed in getting an IP address.
Be aware that this may have unwanted security implications. You may want to
opt for routing instead of bridging, so you can set up firewalling rules in
domain 0.
Please read about the network configuration in the Xen manual. You can set up
bridging or routing for other interfaces also.
For debugging, here's what happens on bootup of a domU:
- xenstored saves the device setup in xenstore
- domU is created
- vifN.0 shows up in domain 0 and a hotplug event is triggered
- hotplug is /sbin/udev; udev looks at /etc/udev/rules.d/40-xen.rules and
calls /etc/xen/scripts/vif-bridge online
- vif-bridge set the vifN.0 device up and enslaves it to the bridge
- eth0 shows up in domU (hotplug event triggered)
Similar things happen for block devices, except that /etc/xen/scripts/block is
called.
It's not recommended to use ifplugd nor NetworkManager for managing the
interfaces if you use bridging mode. Use routing with nat or proxy-arp
in that case. You also need to do that in case you want to send out packets
on wireless; you can't bridge Xen "ethernet" packets into 802.11 packets.
Thread-Local Storage
--------------------
For some time now, the glibc thread library (NPTL) has used a shortcut to
access thread-local variables at a negative segment offset from the segment
selector GS instead of reading the linear address from the TDB (offset 0).
Unfortunately, this optimization has been made the default by the glibc and
gcc maintainers, as it saves one indirection. For Xen this is bad: The access
to these variables will trap, and Xen will need to use some tricks to make the
access work. It does work, but it's very slow.
SUSE Linux 9.1 and SLES 9 were prior to this change, and thus are not
affected. SUSE Linux 9.2 and 9.3 are affected. For SUSE Linux 10.x and SLES
10, we have disabled negative segment references in gcc and glibc, and so
these are not affected. Other non-SUSE Linux distributions may be affected.
For affected distributions, one way to work around the problem is to rename
the /lib/tls directory, so the pre-i686 version gets used, where no such
tricks are done. An example LSB-compliant init script which automates these
steps is installed at /usr/share/doc/packages/xen/boot.xen. This script
renames /lib/tls when running on Xen, and restores it when not running on Xen.
Modify this script to work with your specific distribution.
Mono has a similar problem, but this has been fixed in SUSE Linux 10.1 and
SLES 10. Older or non-SUSE versions of Mono may have a performance impact.
Security
--------
Domain 0 has control over all domains. This means that care should be taken to
keep domain 0 safe; ideally you strip it down to only do as little there as
possible, preferably with no local users except for the system administrator.
Most commands in domain 0 can only be performed as root, but this protection
scheme only has moderate security and might be defeated. In case domain 0 is
compromised, all other domains are compromised as well.
To allow relocation of VMs (migration), the receiving machine listens on TCP
port 8002. You might want to put firewall rules in place in domain 0 to
restrict this to machines which you trust. You have some access control in
xend-config.sxp as well by tweaking the xend-relocation-hosts-allow
setting. Relocating VMs with sensitive data is not a good idea in untrusted
networks, since the data is not sent encrypted.
The memory protections for the domUs are effective; so far no way to break out
of a virtual machine is known. A VM is an effective jail.
Limitations
-----------
When booting, Linux reserves data structures matching the amount of RAM found.
This has the side-effect that you can't dynamically grow the memory beyond
what the kernel has been booted with. But you can trick domU Linux to prepare
for a larger amount of RAM by passing the mem= boot parameter.
The export of virtual hard disks from files in Xen can be handled via the
loopback driver (although in Xen >= 3.0.4, this is can be replaced by the
"blktap" user-space driver.) If you are still using loopback, it may be
possible to run out of loopback devices, as by default only 64 are supported.
You can change this by inserting:
options loop max_loop=128
into /etc/modprobe.conf.local in domain 0.
Network Troubleshooting
-----------------------
First ensure the VM server is configured correctly and can access the network.
Do not use ifplugd or NetworkManager, neither are bridge aware.
Specify a static virtual MAC in the VM's configuration file. Random MACs can
be problematic, since with each boot of the VM it appears that some hardware
has been removed (the previous random MAC) and new hardware is present (the
new random MAC). This can cause network configuration files (which were
intended for the old MAC) to not be matched up with the new virtual hardware.
In the VM's filesystem, ensure the ifcfg-eth* files are named appropriately.
For example, if you do decide to use a randomly-selected MAC for the VM, the
ifcfg-eth* file must not include the MAC in its name; name it generically
("ifcfg-eth0") instead. If you use a static virtual MAC for the VM, be sure
that is reflected in the file's name.
Troubleshooting
---------------
First try to get Linux running on bare iron before trying with Xen.
Be sure your Xen hypervisor (xen) and VM kernels (kernel-xen) are compatible.
The hypervisor and domain 0 kernel are a matched set, and usually must be
upgraded together. Consult the online documentation for a matrix of supported
32- and 64-bit combinations
On certain machines with 2GB or less of RAM, domain 0 Linux may fail to boot,
printing the following messages:
PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
...
Kernel panic - not syncing: PCI-DMA: Memory would be corrupted
Fix this by adding "swiotlb=16" to the Linux kernel command line, which
reserves additional memory for the swiotlb (the actual number to be used here
of course depends on the system configuration).
If you have trouble early in the boot, try passing pnpacpi=off to the Linux
kernel. If you have trouble with interrupts or timers, passing lapic to Xen
may help. Xen and Linux understand similar ACPI boot parameters. Try the
options acpi=off,force,strict,ht,noirq or acpi_skip_timer_override.
Other useful debugging options to Xen may be nosmp, noreboot, mem=1024M,
sync_console, noirqbalance (Dell). For a complete list of Xen boot options,
consult chapter 11.3 of the Xen users' manual.
If domain 0 Linux crashes on X11 startup, please try to boot into runlevel 3.
To debug Xen or domain 0 Linux crashes or hangs, it may be useful to use the
debug-enabled hypervisor, and/or to prevent automatic rebooting. Change your
Grub configuration from something like this:
kernel (hd0,5)/xen.gz
To something like this:
kernel (hd0,5)/xen-dbg.gz noreboot
After rebooting, the Xen hypervisor will write any error messages to the log
file (viewable with the "xm dmesg" command).
If problems persist, check if a newer version is available. Well-tested
versions will be shipped with SUSE and via YaST Online Update. More frequent
(but less supported) updates are available on Novell's Forge site:
http://forge.novell.com/modules/xfmod/project/?xenpreview
Upgrading the Host Operating System
-----------------------------------
When upgrading the host operating system from one major release to another
(for example, SLES 10 to SLES 11 or openSUSE 11.4 to openSUSE 12.1) or when
applying a service pack like SLES 11 SP2 to SLES 11 SP1 all running VMs must
be shut down before the upgrade process is begun.
Known Issues
------------
For a list of known issues and work-arounds, see
http://www.novell.com/documentation/vmserver/.
Disclaimer
----------
Xen performed amazingly well in our tests and proved very stable. Still, you
should be careful when using it, just like you'd be careful if you boot an
experimental kernel. Expect that it may not boot and be prepared to have a
fall-back solution for that scenario. Be prepared that it may not support all
of your hardware. And for the worst of all cases, have your most valuable
data backed up. (This is always a good idea, of course.)
Feedback
--------
In case you have remarks about, problems with, ideas for, or praise for Xen,
please report it back to the xen-devel list:
xen-devel@xxxxxxxxxxxxxxxxxxx
If you find issues with the packaging or setup done by Novell/SUSE, please
report it to:
http://www.suse.de/feedback/
ENJOY!
Your Novell SUSE Team.
++++++ _link ++++++
<link project="openSUSE:12.1" package="xen"
baserev="aa6de9a40241df7973eeaba172315a03">
<patches>
<branch/>
</patches>
</link>
++++++ altgr_2.patch ++++++
When access domU from Windows VNC client, spanish keyboard altgr key
doesn't work. According to log info, we found that the keycodes passed
from vncclient to qemu vncserver have something wrong. When altgr and "2"
pressed, keycodes vncserver receives are:
ALT_R down,
CTRL_L down,
CTRL_L up,
ATL_R up,
"2" down,
"2" up,
...
Since when send "2" down, there is no altgr modifier, the char displayed
on screen will be "2" but not "@".
To solve this problem, there is another patch applied by upstream which
sends an additional altgr modifier before "2" down in the above case.
It works well when domU is windows, but on sles10 sp3 domU, sometimes it
display "@" and sometimes it still displays "2", especially when press
altgr+2 continuously.
For the sles10 sp3 domU problem, maybe because there are two many alt_r (same
keycode as altgr on "es") up and down events and the domU OS couldn't handle
it well.
To furtherly solve this problem, I write this patch, when vncserver
is "es" and receives a alt_r keysym (this is already abnormal since "es" has
no alt_r), then treat the alt_r as alt_l. This can avoid too many altgr
keycodes up and down events and make sure the intentionally added altgr keycode
can take effect.
Signed-off by Chunyan Liu (cyliu@xxxxxxxxxx)
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/vnc.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
@@ -1308,6 +1308,9 @@ static void do_key_event(VncState *vs, i
shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36];
altgr_keys = vs->modifiers_state[0xb8];
+ if ( !strcmp(keyboard_layout,"es") && sym == 0xffea )
+ sym = 0xffe9;
+
keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
if (keycode == 0) {
fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym);
++++++ baselibs.conf ++++++
xen-libs
++++++ bdrv_default_rwflag.patch ++++++
Subject: modify default read/write flag in bdrv_init.
Signed-off by Chunyan Liu <cyliu@xxxxxxxxxx>
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/vl.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/vl.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/vl.c
@@ -2617,6 +2617,8 @@ int drive_init(struct drive_opt *arg, in
strncpy(drives_table[nb_drives].serial, serial, sizeof(serial));
nb_drives++;
+ bdrv_flags = BDRV_O_RDWR;
+
switch(type) {
case IF_IDE:
case IF_XEN:
@@ -2630,6 +2632,7 @@ int drive_init(struct drive_opt *arg, in
break;
case MEDIA_CDROM:
bdrv_set_type_hint(bdrv, BDRV_TYPE_CDROM);
+ bdrv_flags &= ~BDRV_O_RDWR;
break;
}
break;
@@ -2650,7 +2653,6 @@ int drive_init(struct drive_opt *arg, in
}
if (!file[0])
return -2;
- bdrv_flags = 0;
if (snapshot) {
bdrv_flags |= BDRV_O_SNAPSHOT;
cache = 2; /* always use write-back with snapshot */
++++++ bdrv_open2_fix_flags.patch ++++++
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/block.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/block.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/block.c
@@ -350,7 +350,7 @@ int bdrv_file_open(BlockDriverState **pb
int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
{
- return bdrv_open2(bs, filename, flags, NULL);
+ return bdrv_open2(bs, filename, flags|BDRV_O_RDWR, NULL);
}
int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
@@ -419,12 +419,13 @@ int bdrv_open2(BlockDriverState *bs, con
}
bs->drv = drv;
bs->opaque = qemu_mallocz(drv->instance_size);
- /* Note: for compatibility, we open disk image files as RDWR, and
- RDONLY as fallback */
if (!(flags & BDRV_O_FILE))
- open_flags = (flags & BDRV_O_ACCESS) | (flags & BDRV_O_CACHE_MASK);
+ open_flags = flags;
else
open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
+ if (!(open_flags & BDRV_O_RDWR))
+ bs->read_only = 1;
+
ret = drv->bdrv_open(bs, filename, open_flags);
if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/hw/usb-msd.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/hw/usb-msd.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/hw/usb-msd.c
@@ -551,7 +551,7 @@ USBDevice *usb_msd_init(const char *file
s = qemu_mallocz(sizeof(MSDState));
bdrv = bdrv_new("usb");
- if (bdrv_open2(bdrv, filename, 0, drv) < 0)
+ if (bdrv_open2(bdrv, filename, BDRV_O_RDWR, drv) < 0)
goto fail;
s->bs = bdrv;
*pbs = bdrv;
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/qemu-img.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/qemu-img.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/qemu-img.c
@@ -32,7 +32,7 @@
#endif
/* Default to cache=writeback as data integrity is not important for qemu-tcg.
*/
-#define BRDV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_O_FLAGS BDRV_O_CACHE_WB
static void QEMU_NORETURN error(const char *fmt, ...)
{
@@ -185,7 +185,7 @@ static int read_password(char *buf, int
#endif
static BlockDriverState *bdrv_new_open(const char *filename,
- const char *fmt)
+ const char *fmt, int flags)
{
BlockDriverState *bs;
BlockDriver *drv;
@@ -201,7 +201,7 @@ static BlockDriverState *bdrv_new_open(c
} else {
drv = &bdrv_raw;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, flags, drv) < 0) {
error("Could not open '%s'", filename);
}
if (bdrv_is_encrypted(bs)) {
@@ -253,7 +253,7 @@ static int img_create(int argc, char **a
size = 0;
if (base_filename) {
BlockDriverState *bs;
- bs = bdrv_new_open(base_filename, NULL);
+ bs = bdrv_new_open(base_filename, NULL, BDRV_O_RDWR);
bdrv_get_geometry(bs, &size);
size *= 512;
bdrv_delete(bs);
@@ -332,7 +332,7 @@ static int img_commit(int argc, char **a
} else {
drv = NULL;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_RDWR, drv) < 0) {
error("Could not open '%s'", filename);
}
ret = bdrv_commit(bs);
@@ -455,7 +455,8 @@ static int img_convert(int argc, char **
total_sectors = 0;
for (bs_i = 0; bs_i < bs_n; bs_i++) {
- bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt);
+ bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt,
+ BDRV_O_CACHE_WB|BDRV_O_RDONLY);
if (!bs[bs_i])
error("Could not open '%s'", argv[optind + bs_i]);
bdrv_get_geometry(bs[bs_i], &bs_sectors);
@@ -483,7 +484,7 @@ static int img_convert(int argc, char **
}
}
- out_bs = bdrv_new_open(out_filename, out_fmt);
+ out_bs = bdrv_new_open(out_filename, out_fmt, BDRV_O_CACHE_WB|BDRV_O_RDWR);
bs_i = 0;
bs_offset = 0;
@@ -706,7 +707,7 @@ static int img_info(int argc, char **arg
} else {
drv = NULL;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_FLAGS|BDRV_O_RDWR, drv) < 0) {
error("Could not open '%s'", filename);
}
bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
@@ -810,7 +811,7 @@ static void img_snapshot(int argc, char
if (!bs)
error("Not enough memory");
- if (bdrv_open2(bs, filename, 0, NULL) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_RDWR, NULL) < 0) {
error("Could not open '%s'", filename);
}
++++++ bdrv_open2_flags_2.patch ++++++
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
@@ -237,6 +237,7 @@ static int open_disk(struct td_state *s,
BlockDriver* drv;
char* devname;
static int devnumber = 0;
+ int flags = readonly ? BDRV_O_RDONLY : BDRV_O_RDWR;
int i;
DPRINTF("Opening %s as blktap%d\n", path, devnumber);
@@ -259,7 +260,7 @@ static int open_disk(struct td_state *s,
DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");
/* Open the image */
- if (bdrv_open2(bs, path, 0, drv) != 0) {
+ if (bdrv_open2(bs, path, flags, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/xenstore.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
@@ -136,7 +136,8 @@ static void insert_media(void *opaque)
else
format = &bdrv_raw;
- bdrv_open2(bs, media_filename[i], 0, format);
+ /* Temporary BDRV_O_RDWR */
+ bdrv_open2(bs, media_filename[i], BDRV_O_RDWR, format);
#ifdef CONFIG_STUBDOM
{
char *buf, *backend, *params_path, *params;
@@ -511,7 +512,8 @@ void xenstore_parse_domain_config(int hv
}
for (i = 0; i < num; i++) {
- format = NULL; /* don't know what the format is yet */
+ flags = 0;
+ format = NULL; /* don't know what the format is yet */
/* read the backend path */
xenstore_get_backend_path(&bpath, "vbd", danger_path, hvm_domid,
e_danger[i]);
if (bpath == NULL)
@@ -597,6 +599,17 @@ void xenstore_parse_domain_config(int hv
format = &bdrv_raw;
}
+ /* read the mode of the device */
+ if (pasprintf(&buf, "%s/mode", bpath) == -1)
+ continue;
+ free(mode);
+ mode = xs_read(xsh, XBT_NULL, buf, &len);
+
+ if (!strcmp(mode, "r") || !strcmp(mode, "ro"))
+ flags |= BDRV_O_RDONLY;
+ if (!strcmp(mode, "w") || !strcmp(mode, "rw"))
+ flags |= BDRV_O_RDWR;
+
#if 0
/* Phantom VBDs are disabled because the use of paths
* from guest-controlled areas in xenstore is unsafe.
@@ -664,7 +677,7 @@ void xenstore_parse_domain_config(int hv
#ifdef CONFIG_STUBDOM
if (pasprintf(&danger_buf, "%s/device/vbd/%s", danger_path,
e_danger[i]) == -1)
continue;
- if (bdrv_open2(bs, danger_buf, BDRV_O_CACHE_WB /* snapshot and
write-back */, &bdrv_raw) == 0) {
+ if (bdrv_open2(bs, danger_buf, flags|BDRV_O_CACHE_WB /* snapshot and
write-back */, &bdrv_raw) == 0) {
pstrcpy(bs->filename, sizeof(bs->filename), params);
}
#else
@@ -716,7 +729,7 @@ void xenstore_parse_domain_config(int hv
fprintf(stderr, "Using file %s in read-%s mode\n", bs->filename,
is_readonly ? "only" : "write");
- if (bdrv_open2(bs, params, BDRV_O_CACHE_WB /* snapshot and
write-back */, format) < 0) {
+ if (bdrv_open2(bs, params, flags|BDRV_O_CACHE_WB /* snapshot and
write-back */, format) < 0) {
fprintf(stderr, "qemu: could not open vbd '%s' or hard disk
image '%s' (drv '%s' format '%s')\n", buf, params, drv ? drv : "?", format ?
format->format_name : "0");
} else {
char* snapshot = get_snapshot_name(atoi(e_danger[i]));
++++++ blktap-close-fifos.patch ++++++
Index: xen-4.1.2-testing/tools/blktap/drivers/blktapctrl.c
===================================================================
--- xen-4.1.2-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-4.1.2-testing/tools/blktap/drivers/blktapctrl.c
@@ -280,7 +280,7 @@ static int del_disktype(blkif_t *blkif)
* qemu-dm instance. We may close the file handle only if there is
* no other disk left for this domain.
*/
- if (dtypes[type]->use_ioemu)
+ if (dtypes[type]->use_ioemu && dtypes[type]->idnum != DISK_TYPE_AIO)
return !qemu_instance_has_disks(blkif->tappid);
/* Caller should close() if no single controller, or list is empty. */
++++++ blktap-pv-cdrom.patch ++++++
++++ 803 lines (skipped)
++++++ blktap.patch ++++++
bug #239173
bug #242953
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3296,7 +3296,7 @@ class XendDomainInfo:
(fn, BOOTLOADER_LOOPBACK_DEVICE))
vbd = {
- 'mode': 'RO',
+ 'mode': 'RW',
'device': BOOTLOADER_LOOPBACK_DEVICE,
}
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/xenstore.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
@@ -447,9 +447,9 @@ void xenstore_parse_domain_config(int hv
{
char **e_danger = NULL;
char *buf = NULL;
- char *fpath = NULL, *bpath = NULL,
+ char *fpath = NULL, *bpath = NULL, *btype = NULL,
*dev = NULL, *params = NULL, *drv = NULL;
- int i, ret;
+ int i, ret, is_tap;
unsigned int len, num, hd_index, pci_devid = 0;
BlockDriverState *bs;
BlockDriver *format;
@@ -486,6 +486,14 @@ void xenstore_parse_domain_config(int hv
e_danger[i]);
if (bpath == NULL)
continue;
+ /* check to see if type is tap or not */
+ if (pasprintf(&buf, "%s/type", bpath) == -1)
+ continue;
+ free(btype);
+ btype = xs_read(xsh, XBT_NULL, buf, &len);
+ if (btype == NULL)
+ continue;
+ is_tap = !strncmp(btype, "tap", 3);
/* read the name of the device */
if (pasprintf(&buf, "%s/dev", bpath) == -1)
continue;
@@ -775,6 +783,7 @@ void xenstore_parse_domain_config(int hv
free(mode);
free(params);
free(dev);
+ free(btype);
free(bpath);
free(buf);
free(danger_buf);
++++++ blktapctrl-default-to-ioemu.patch ++++++
Index: xen-4.1.2-testing/tools/blktap/drivers/tapdisk.h
===================================================================
--- xen-4.1.2-testing.orig/tools/blktap/drivers/tapdisk.h
+++ xen-4.1.2-testing/tools/blktap/drivers/tapdisk.h
@@ -168,7 +168,7 @@ static disk_info_t aio_disk = {
"raw image (aio)",
"aio",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_aio,
#endif
@@ -179,7 +179,7 @@ static disk_info_t sync_disk = {
"raw image (sync)",
"sync",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_sync,
#endif
@@ -190,7 +190,7 @@ static disk_info_t vmdk_disk = {
"vmware image (vmdk)",
"vmdk",
1,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_vmdk,
#endif
@@ -212,7 +212,7 @@ static disk_info_t qcow_disk = {
"qcow disk (qcow)",
"qcow",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_qcow,
#endif
@@ -223,7 +223,7 @@ static disk_info_t qcow2_disk = {
"qcow2 disk (qcow2)",
"qcow2",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_qcow2,
#endif
++++++ block-dmmd ++++++
#! /bin/bash
# Usage: block-dmmd [add args | remove args]
#
# the xm config file should have something like:
# dmmd:md;/dev/md0;md;/dev/md1;lvm;/dev/vg1/lv1
# or
# dmmd:lvm;/dev/vg1/lv1;lvm;/dev/vg1/lv2;md;/dev/md0
# note the last device will be used for VM
# History:
# 2009-06-09, mh@xxxxxxxxxx:
# Emit debugging messages into a temporary file; if no longer needed,
# just comment the exec I/O redirection below
# Make variables used in functions local to avoid global overridings
# Use vgscan and vgchange where required
# Use the C locale to avoid dealing with localized messages
# Assign output from assembling an MD device to a variable to aid
debugging
# We do not want to deal with localized messages:
LANG=C
LC_MESSAGES=C
export LANG LC_MESSAGES
dir=$(dirname "$0")
. "$dir/block-common.sh"
#exec >> /tmp/block-dmmd-`date +%F_%T.%N`.log 2>&1
echo ""
date
set -x
echo shell-flags: $-
command=$1
# We check for errors ourselves:
set +e
function run_mdadm()
{
local mdadm_cmd=$1
local msg
local rc
msg="`/sbin/mdadm $mdadm_cmd 2>&1`"
rc=$?
case "$msg" in
*"has been started"* | *"already active"* )
return 0
;;
*"is already in use"* )
: hmm, might be used by another device in this domU
: leave it to upper layers to detect a real error
return 2
;;
* )
return $rc
;;
esac
return 1
}
function activate_md()
{
local par=$1
local already_active=0 cfg dev rc t
if [ ${par} = ${par%%(*} ]; then
# No configuration file specified:
dev=$par
cfd=
else
dev=${par%%(*}
t=${par#*(}
cfg="-c ${t%%)*}"
fi
if /sbin/mdadm -Q -D $dev; then
already_active=1
fi
run_mdadm "-A $dev $cfg"
rc=$?
if [ $already_active -eq 1 ] && [ $rc -eq 2 ]; then
return 0
fi
return $rc
}
function deactivate_md ()
{
local par=$1 # Make it explicitly local
## We need the device name only while deactivating
/sbin/mdadm -S ${par%%(*}
return $?
}
function activate_lvm ()
{
# First scan for PVs and VGs; we may then have to activate the VG
# first, but can ignore errors:
# /sbin/pvscan || :
# /sbin/vgscan --mknodes || :
# /sbin/vgchange -ay ${1%/*} || :
/sbin/lvchange -ay $1
return $?
}
function deactivate_lvm ()
{
/sbin/lvchange -an $1
if [ $? -eq 0 ]; then
# We may have to deactivate the VG now, but can ignore errors:
# /sbin/vgchange -an ${1%/*} || :
# Maybe we need to cleanup the LVM cache:
# /sbin/vgscan --mknodes || :
return 0
fi
return 1
}
BP=100
SP=$BP
VBD=
declare -a stack
function push ()
{
if [ -z "$1" ]; then
return
fi
let "SP -= 1"
stack[$SP]="${1}"
return
}
function pop ()
{
VBD=
if [ "$SP" -eq "$BP" ]; then
return
fi
VBD=${stack[$SP]}
let "SP += 1"
return
}
function activate_dmmd ()
{
# echo $1 $2
case $1 in
md)
activate_md $2
return
;;
lvm)
activate_lvm $2
return
;;
esac
}
function deactivate_dmmd()
{
case "$1" in
md)
deactivate_md $2
return
;;
lvm)
deactivate_lvm $2
return
;;
esac
}
function cleanup_stack ()
{
while [ 1 ]; do
pop
if [ -z "$VBD" ]; then
break
fi
deactivate_dmmd $VBD
done
return
}
function parse_par ()
{
local ac par rc s t # Make these explicitly local vars
ac=$1
par="$2"
echo "parse_paring $1, $2"
par="$par;"
while [ 1 ]; do
t=${par%%;*}
if [ -z "$t" ]; then
return 0
fi
par=${par#*;}
s=${par%%;*}
if [ -z "$s" ]; then
return 1
fi
par=${par#*;}
echo "type is $t, dev is $s"
if [ "$ac" = "activate" ]; then
activate_dmmd $t $s
rc=$?
if [ $rc -ne 0 ]; then
return 1
fi
fi
echo "push $t $s"
push "$t $s"
done
}
echo $command
case "$command" in
add)
p=`xenstore-read $XENBUS_PATH/params` || true
claim_lock "dmmd"
dmmd=$p
echo "before parse_par $dmmd"
parse_par activate "$dmmd"
rc=$?
echo "reach here with rc: $rc"
if [ $rc -ne 0 ]; then
cleanup_stack
release_lock "dmmd"
exit 1
fi
lastparam=${dmmd##*;}
usedevice=${lastparam%(*}
claim_lock "block"
xenstore-write $XENBUS_PATH/node "$usedevice"
write_dev "$usedevice"
release_lock "block"
release_lock "dmmd"
exit 0
;;
remove)
p=`xenstore-read $XENBUS_PATH/params` || true
claim_lock "dmmd"
dmmd=$p
parse_par noactivate "$dmmd"
cleanup_stack
release_lock "dmmd"
exit 0
;;
esac
++++++ block-iscsi ++++++
#!/bin/bash
# Usage: block-iscsi [add tgtname | remove dev]
#
# This assumes you're running a correctly configured
# iscsi target (server) at the other end!
# Note that we assume that the passwords for discovery (if needed)
# are in /etc/iscsid.conf
# and the node session passwords (if required) in the
# open-iscsi database below /var/lib/open-iscsi/node.db
#
# (c) Kurt Garloff <kurt@xxxxxxxxxx>, 2006-09-04, GNU GPL
# Contributors: Jim Fehlig <jfehlig@xxxxxxxxxx>
# Stefan de Konink <skinkie@xxxxxxxxx>
dir=$(dirname "$0")
. "$dir/block-common.sh"
# echo "DBG:xen/scripts/block-iscsi $1 $2 XENBUS_PATH=$XENBUS_PATH $par $node"
find_sdev()
{
unset dev
for session in /sys/class/iscsi_session/session*; do
if [ "$1" = "`cat $session/targetname 2>/dev/null`" ]; then
dev=`basename $session/device/target*/*:0:*/block*/*`
return
fi
done
}
find_sdev_rev()
{
unset tgt
for session in /sys/class/iscsi_session/session*; do
dev=`basename $session/device/target*/*:0:*/block*/*`
if [ "$dev" = "$1" ]; then
tgt=`cat $session/targetname 2>/dev/null`
return
fi
done
}
case "$command" in
add)
# load modules and start iscsid
/etc/init.d/open-iscsi status >/dev/null 2>&1 ||
{ /etc/init.d/open-iscsi start >/dev/null 2>&1; sleep 1; }
par=`xenstore-read $XENBUS_PATH/params` || true
TGTID=`echo $par | sed "s/\/\///g"`
while read rec uuid; do
if [ "$uuid" = "$TGTID" ]; then
find_sdev $TGTID
if [ -z "$dev" ]; then
/sbin/iscsiadm -m node -T $uuid -p $rec --login || exit 2
sleep 4
find_sdev $TGTID
fi
xenstore-write $XENBUS_PATH/node /dev/$dev
write_dev /dev/$dev
exit 0
fi
done < <(/sbin/iscsiadm -m node)
exit 1
;;
remove)
node=`xenstore-read $XENBUS_PATH/node` || true
dev=$node; dev=${dev#/dev/}
find_sdev_rev $dev
if [ -x /sbin/blockdev -a -n "$node" ]; then blockdev --flushbufs "$node";
fi
test -z "$tgt" && exit 2
/sbin/iscsiadm -m node -T $tgt --logout
exit 1
;;
esac
++++++ block-nbd ++++++
#!/bin/sh
# Usage: block-nbd [bind server ctl_port |unbind node]
#
# The node argument to unbind is the name of the device node we are to
# unbind.
#
# This assumes you're running a correctly configured server at the other end!
dir=$(dirname "$0")
. "$dir/block-common.sh"
#set -x
par=`xenstore-read $XENBUS_PATH/params` || true
#echo $par
case "$command" in
add)
modprobe nbd
for dev in /dev/nbd*; do
if nbd-client $par $dev; then
xenstore-write $XENBUS_PATH/node $dev
write_dev $dev
exit 0
fi
done
exit 1
;;
remove)
node=`xenstore-read $XENBUS_PATH/node` || true
nbd-client -d $node
exit 0
;;
esac
++++++ block-npiv ++++++
#!/bin/bash
# Usage: block-npiv [add npiv | remove dev]
dir=$(dirname "$0")
. "$dir/block-npiv-common.sh"
. "$dir/block-common.sh"
#set -x
#command=$1
case "$command" in
add)
# Params is one big arg, with fields separated by hyphens:
# single path:
# FABRIC-VPWWPN-VPWWNN-TGTWWPN-LUN#
# multipath:
# {FABRIC1.FABRIC2}-{VPWWPN1.VPWWPN2.VPWWPN3}-VPWWNN-TGTWWPN-LUN#
# arg 2 - Fabric Name
# arg 3 - VPORT's WWPN
# arg 4 - VPORT's WWNN
# arg 5 - Target's WWPN
# arg 6 - LUN # on Target
# no wwn contains a leading 0x - it is a 16 character hex value
# You may want to optionally pick a specific adapter ?
par=`xenstore-read $XENBUS_PATH/params` || true
#par=$2
NPIVARGS=$par;
LUN=${NPIVARGS##*-*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $LUN = $NPIVARGS ; then exit 1; fi
TGTWWPN=${NPIVARGS##*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $TGTWWPN = $NPIVARGS ; then exit 1; fi
VPORTWWNN=${NPIVARGS##*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWNN = $NPIVARGS ; then exit 1; fi
VPORTWWPNS=${NPIVARGS##*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWPNS = $NPIVARGS ; then exit 1; fi
FABRICNMS=$NPIVARGS
# Ensure we compare everything using lower-case hex characters
TGTWWPN=`echo $TGTWWPN | tr A-Z a-z`
VPORTWWPNS=`echo $VPORTWWPNS | tr A-Z a-z |sed 's/[{.}]/ /g'`
VPORTWWNN=`echo $VPORTWWNN | tr A-Z a-z`
FABRICNMS=`echo $FABRICNMS | tr A-Z a-z |sed 's/[{.}]/ /g'`
claim_lock "npiv"
paths=0
for FABRICNM in $FABRICNMS; do
for VPORTWWPN in $VPORTWWPNS; do
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then
create_vport $FABRICNM $VPORTWWPN $VPORTWWNN
if [ $? -ne 0 ] ; then exit 2; fi
sleep 8
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 3; fi
fi
find_sdev $vhost $TGTWWPN $LUN
if test -z "$dev"; then
echo "- - -" > /sys/class/scsi_host/$vhost/scan
sleep 2
find_sdev $vhost $TGTWWPN $LUN
fi
if test -z "$dev"; then
exit 4
fi
paths=$(($paths+1))
done
done
release_lock "npiv"
if test $paths -gt 1; then
xenstore-write $XENBUS_PATH/multipath 1
/etc/init.d/multipathd start
if test $? -ne 0 ; then exit 4; fi
dm=`multipath -l /dev/$dev | grep dm | cut -f2 -d' '`
else
xenstore-write $XENBUS_PATH/multipath 0
dm=$dev
fi
if test ! -z "$dm"; then
xenstore-write $XENBUS_PATH/node /dev/$dm
write_dev /dev/$dm
exit 0
fi
exit 4
;;
remove)
node=`xenstore-read $XENBUS_PATH/node` || true
multipath=`xenstore-read $XENBUS_PATH/multipath` || true
# this is really screwy. the first delete of a lun will
# terminate the entire vport (all luns)
if test $multipath = 1; then
par=`xenstore-read $XENBUS_PATH/params` || true
NPIVARGS=$par;
FABRICNMS=${NPIVARGS%%-*}; NPIVARGS=${NPIVARGS#*-}
VPORTWWPNS=${NPIVARGS%%-*}
VPORTWWPNS=`echo $VPORTWWPNS | tr A-Z a-z |sed 's/[{.}]/ /g'`
FABRICNMS=`echo $FABRICNMS | tr A-Z a-z |sed 's/[{.}]/ /g'`
for FABRICNM in $FABRICNMS; do
for VPORTWWPN in $VPORTWWPNS; do
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 5; fi
flush_nodes_on_vhost $vhost
delete_vhost $vhost
done
done
else
dev=$node; dev=${dev#/dev/}
find_vhost_from_dev $dev
if test -z "$vhost" ; then exit 5; fi
flush_nodes_on_vhost $vhost
delete_vhost $vhost
fi
exit 0
;;
esac
++++++ block-npiv-common.sh ++++++
# Look for the NPIV vport with the WWPN
# $1 contains the WWPN (assumes it does not contain a leading "0x")
# $2 contains the FABRICNM (assumes it does not contain "0x")
find_vhost()
{
unset vhost
# look in upstream locations
for fchost in /sys/class/fc_vports/* ; do
if test -e $fchost/port_name ; then
wwpn=`cat $fchost/port_name | sed -e s/^0x//`
if test $wwpn = $1 ; then
# Note: makes the assumption the vport will always have an scsi_host
child
vhost=`ls -d $fchost/device/host*`
vhost=`basename $vhost`
fname=`cat /sys/class/fc_host/$vhost/fabric_name | sed -e s/^0x//`
if test $fname = $2 ; then
return
fi
fi
fi
done
# look in vendor-specific locations
# Emulex - just looks like another scsi_host - so look at fc_hosts...
for fchost in /sys/class/fc_host/* ; do
if test -e $fchost/port_name ; then
wwpn=`cat $fchost/port_name | sed -e s/^0x//`
if test $wwpn = $1 ; then
# Note: makes the assumption the vport will always have an scsi_host
child
vhost=`basename $fchost`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if test $fname = $2 ; then
return
fi
fi
fi
done
}
# Create a NPIV vport on the fabric w/ FABRICNM, with WWPN,WWNN
# $1 contains FABRICNM
# $2 contains the VPORT WWPN
# $3 contains the VPORT WWNN
# (assumes no name contains a leading "0x")
create_vport()
{
# find a base adapter with npiv support that is on the right fabric
# Look via upstream interfaces
for fchost in /sys/class/fc_host/* ; do
if test -e $fchost/vport_create ; then
# is the link up, w/ NPIV support ?
pstate=`cat $fchost/port_state`
ptype=`cat $fchost/port_type | cut -c 1-5`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then
vmax=`cat $fchost/max_npiv_vports`
vinuse=`cat $fchost/npiv_vports_inuse`
avail=`expr $vmax - $vinuse`
if [ $avail -gt 0 ] ; then
# create the vport
echo $2":"$3 > $fchost/vport_create
if [ $? -eq 0 ] ; then
return 0
fi
# failed - so we'll just look for the next adapter
fi
fi
fi
done
# Look in vendor-specific locations
# Emulex: interfaces mirror upstream, but are under adapter scsi_host
for shost in /sys/class/scsi_host/* ; do
if [ -e $shost/vport_create ] ; then
fchost=`ls -d $shost/device/fc_host*`
# is the link up, w/ NPIV support ?
if [ -e $fchost/port_state ] ; then
pstate=`cat $fchost/port_state`
ptype=`cat $fchost/port_type | cut -c 1-5`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then
vmax=`cat $shost/max_npiv_vports`
vinuse=`cat $shost/npiv_vports_inuse`
avail=`expr $vmax - $vinuse`
if [ $avail -gt 0 ] ; then
# create the vport
echo $2":"$3 > $shost/vport_create
if [ $? -eq 0 ] ; then
return 0
fi
# failed - so we'll just look for the next adapter
fi
fi
fi
fi
done
# BFA are under adapter scsi_host
for shost in /sys/class/scsi_host/* ; do
if [ -e $shost/vport_create ] ; then
fchost=`ls -d $shost/device/fc_host/*`
# is the link up, w/ NPIV support ?
if [ -e $fchost/port_state ] ; then
pstate=`cat $fchost/port_state`
ptype=`cat $fchost/port_type | cut -c 1-5`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then
# create the vport
echo $2":"$3 > $shost/vport_create
if [ $? -eq 0 ] ; then
return 0
fi
# failed - so we'll just look for the next adapter
fi
fi
fi
done
return 1
}
# Look for the LUN on the indicated scsi_host (which is an NPIV vport)
# $1 is the scsi_host name (normalized to simply the hostX name)
# $2 is the WWPN of the tgt port the lun is on
# Note: this implies we don't support a multipath'd lun, or we
# are explicitly identifying a "path"
# $3 is the LUN number of the scsi device
find_sdev()
{
unset dev
hostno=${1/*host/}
for sdev in /sys/class/scsi_device/${hostno}:*:$3 ; do
if test -e $sdev/device/../fc_trans*/target${hostno}*/port_name ; then
tgtwwpn=`cat $sdev/device/../fc_trans*/target${hostno}*/port_name | sed
-e s/^0x//`
if test $tgtwwpn = $2 ; then
if test -e $sdev/device/block* ; then
dev=`ls $sdev/device/block*`
dev=${dev##*/}
return
fi
fi
fi
done
}
# Look for the NPIV vhost based on a scsi "sdX" name
# $1 is the "sdX" name
find_vhost_from_dev()
{
unset vhost
hostno=`readlink /sys/block/$1/device`
hostno=${hostno##*/}
hostno=${hostno%%:*}
if test -z "$hostno" ; then return; fi
vhost="host"$hostno
}
# We're about to terminate a vhost based on a scsi device
# Flush all nodes on that vhost as they are about to go away
# $1 is the vhost
flush_nodes_on_vhost()
{
if test ! -x /sbin/blockdev ; then return; fi
hostno=${1/*host/}
for sdev in /sys/class/scsi_device/${hostno}:* ; do
if test -e $sdev/device/block* ; then
dev=`ls $sdev/device/block*`
dev="/dev/"$dev
if test -n "$dev"; then
blockdev --flushbufs $dev
fi
fi
done
}
# Terminate a NPIV vhost
# $1 is vhost
delete_vhost()
{
# use upstream interface
for vport in /sys/class/fc_vports/* ; do
if test -e $vport/device/$1 ; then
if test -e $vport/vport_delete ; then
echo "1" > $vport/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi
fi
done
# use vendor specific interface
# Emulex
if test -e /sys/class/fc_host/$1/device/../scsi_host*/lpfc_drvr_version ; then
shost=`ls -1d /sys/class/fc_host/$1/device/../scsi_host* | sed
s/.*scsi_host://`
vportwwpn=`cat /sys/class/fc_host/$1/port_name | sed s/^0x//`
vportwwnn=`cat /sys/class/fc_host/$1/node_name | sed s/^0x//`
echo "$vportwwpn:$vportwwnn" > /sys/class/scsi_host/$shost/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi
# Qlogic
if test -e /sys/class/fc_host/$1/device/../scsi_host*/driver_version ; then
shost=`ls -1d /sys/class/fc_host/$1/device/../scsi_host* | sed
s/.*scsi_host://`
vportwwpn=`cat /sys/class/fc_host/$1/port_name | sed s/^0x//`
vportwwnn=`cat /sys/class/fc_host/$1/node_name | sed s/^0x//`
echo "$vportwwpn:$vportwwnn" > /sys/class/scsi_host/$shost/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi
# BFA
if test -e /sys/class/fc_host/$1/device/../scsi_host/*/driver_name ; then
shost=`ls -1d /sys/class/fc_host/$1/device/../scsi_host/* | sed
s#.*scsi_host/##`
vportwwpn=`cat /sys/class/fc_host/$1/port_name | sed s/^0x//`
vportwwnn=`cat /sys/class/fc_host/$1/node_name | sed s/^0x//`
echo "$vportwwpn:$vportwwnn" > /sys/class/scsi_host/$shost/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi
exit 6
}
vport_status()
{
# Look via upstream interfaces
for fchost in /sys/class/fc_host/* ; do
if test -e $fchost/vport_create ; then
vport_status_display $fchost $fchost
fi
done
# Look in vendor-specific locations
# Emulex: interfaces mirror upstream, but are under adapter scsi_host
for shost in /sys/class/scsi_host/* ; do
if [ -e $shost/vport_create ] ; then
fchost=`ls -d $shost/device/fc_host*`
vport_status_display $fchost $shost
fi
done
return 0
}
vport_status_display()
{
echo
echo "fc_host: " $2
echo "port_state: " `cat $1/port_state`
echo "port_type: " `cat $1/port_type`
echo "fabric_name: " `cat $1/fabric_name`
echo "max_npiv_vports: " `cat $2/max_npiv_vports`
echo "npiv_vports_inuse: " `cat $2/npiv_vports_inuse`
echo "modeldesc: " `cat $2/modeldesc`
echo "speed: " `cat $1/speed`
return 0
}
++++++ block-npiv-vport ++++++
#!/bin/bash
# Usage: block-npiv-vport [create npivargs | delete vportwwpn | status]
dir=$(dirname "$0")
. "$dir/block-npiv-common.sh"
#set -x
command=$1
params=$2
case "$command" in
create)
# Params is one big arg, with fields separated by hyphens:
# FABRIC-VPWWPN-VPWWNN-TGTWWPN-LUN#
# arg 2 - Fabric Name
# arg 3 - VPORT's WWPN
# arg 4 - VPORT's WWNN
# arg 5 - Target's WWPN
# arg 6 - LUN # on Target
# no wwn contains a leading 0x - it is a 16 character hex value
# You may want to optionally pick a specific adapter ?
NPIVARGS=$params;
LUN=${NPIVARGS##*-*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $LUN = $NPIVARGS ; then exit 1; fi
TGTWWPN=${NPIVARGS##*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $TGTWWPN = $NPIVARGS ; then exit 1; fi
VPORTWWNN=${NPIVARGS##*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWNN = $NPIVARGS ; then exit 1; fi
VPORTWWPN=${NPIVARGS##*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWPN = $NPIVARGS ; then exit 1; fi
FABRICNM=$NPIVARGS
# Ensure we compare everything using lower-case hex characters
TGTWWPN=`echo $TGTWWPN | tr A-Z a-z`
VPORTWWPN=`echo $VPORTWWPN | tr A-Z a-z`
VPORTWWNN=`echo $VPORTWWNN | tr A-Z a-z`
FABRICNM=`echo $FABRICNM | tr A-Z a-z`
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then
create_vport $FABRICNM $VPORTWWPN $VPORTWWNN
if [ $? -ne 0 ] ; then exit 2; fi
sleep 8
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 3; fi
fi
exit 0
;;
delete)
# Params is VPORT's WWPN
# no wwn contains a leading 0x - it is a 16 character hex value
VPORTWWPN=$params
# Ensure we compare everything using lower-case hex characters
VPORTWWPN=`echo $VPORTWWPN | tr A-Z a-z`
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 4; fi
delete_vhost $vhost
exit 0
;;
status)
vport_status
exit 0
;;
*)
echo "Usage: block-npiv-vport [create npivargs | delete vportwwpn | status]"
exit 1
;;
esac
++++++ boot.local.xenU ++++++
#! /bin/sh
#
# Copyright (c) 1996 SuSE GmbH Nuernberg, Germany. All rights reserved.
#
# Author: Werner Fink <werner@xxxxxxx>, 1996
# Burchard Steinbild <bs@xxxxxxx>, 1996
#
# /etc/init.d/boot.local
#
# script with local commands to be executed from init on system startup
#
#
# Here you should add things, that should happen directly after booting
# before we're going to the first run level.
#
date
# echo "$MACHINE: running $0 $*"
my_REDIRECT="$(echo $REDIRECT | sed 's#^/dev/##')"
my_DEVICE="$(echo $my_REDIRECT | sed 's#^tty##')"
my_SPEED="$(stty speed)"
# echo REDIRECT $REDIRECT $my_REDIRECT
# echo my_DEVICE $my_DEVICE
# echo my_SPEED $my_SPEED
# compose a line like that for inittab
# S0:12345:respawn:/sbin/agetty -L 9600 ttyS0 vt102
case $my_REDIRECT in
ttyS*)
echo adding this line to inittab
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt102"
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt102" >> /etc/inittab
echo $my_REDIRECT >> /etc/securetty
;;
hvc*)
echo adding this line to inittab
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt320"
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt320" >> /etc/inittab
echo $my_REDIRECT >> /etc/securetty
;;
*)
echo "no modification in inittab needed for: $my_REDIRECT"
;;
esac
telinit q
# Changes for Xen
test -f /lib/modules/`uname -r`/modules.dep || depmod -ae
CMDLINE=`cat /proc/cmdline | grep 'ip='`
if test ! -z "$CMDLINE"; then
OLDIFS=$IFS
IFS=":"
read ip oth mask gw hostname dev dhcp rest < /proc/cmdline
IFS=$OLDIFS
hostname $hostname
ip=`echo $ip | sed 's/ip= *//'`
if test ! -z "$ip"; then
if test -z "$mask"; then
if [ ${ip%/*} = $ip ]; then
ip="$ip/27"
fi
echo "ip addr add $ip dev $dev"
ip addr add $ip dev $dev
ip link set $dev up
else
ifconfig add $ip netmask $mask $dev
fi
fi
if test "${dhcp#dhcp}" != "$dhcp"; then
ifup-dhcp $dev
fi
fi
++++++ boot.xen ++++++
#! /bin/sh
# Copyright (c) 2005-2006 SUSE Linux AG, Nuernberg, Germany.
# All rights reserved.
#
# /etc/init.d/boot.xen
#
# LSB compatible service control script; see http://www.linuxbase.org/spec/
#
### BEGIN INIT INFO
# Provides: Xen
# Required-Start: boot.localfs
# Should-Start: boot.localnet
# Required-Stop: boot.localfs
# Should-Stop:
# Default-Start: B
# Default-Stop:
# Short-Description: Switch on and off TLS depending on whether Xen is running
# Description: Xen gets a major performance hit by the way
# recent glibc (& gcc) set up the TLS offset, as it needs to
# play segmentation tricks. This can be avoided by moving away
# the tls libs.
### END INIT INFO
. /etc/rc.status
# Reset status of this service
rc_reset
case "$1" in
start)
echo -n "Starting Xen setup "
if test -d /proc/xen; then
export LD_ASSUME_KERNEL=2.4.21
echo -n "Xen running "
fi
if test -d /proc/xen -a -d /lib/tls; then
echo -n "move /lib/tls away "
mv /lib/tls /lib/tls.save
elif test ! -d /proc/xen -a -d /lib/tls.save; then
echo -n "move back /lib/tls "
mv /lib/tls.save /lib/tls
fi
rc_status -v
;;
stop)
# rc_status -v
;;
try-restart|condrestart)
$0 restart
# Remember status and be quiet
rc_status
;;
restart)
## Stop the service and regardless of whether it was
## running or not, start it again.
$0 start
# Remember status and be quiet
rc_status
;;
force-reload)
$0 try-restart
rc_status
;;
reload)
rc_failed 3
rc_status -v
;;
status)
echo -n "Checking for Xen "
# Return value is slightly different for the status command:
# 0 - service up and running
# 1 - service dead, but /var/run/ pid file exists
# 2 - service dead, but /var/lock/ lock file exists
# 3 - service not running (unused)
# 4 - service status unknown :-(
# 5--199 reserved (5--99 LSB, 100--149 distro, 150--199 appl.)
if test -d /proc/xen; then
if test -d /lib/tls; then
echo -n "Xen running, /lib/tls existing "
rc_failed 1
else
echo -n "Xen running, /lib/tls not existing "
fi
else
if test -d /lib/tls.save; then
echo -n "Xen not running, /lib/tls existing "
rc_failed 2
else
echo -n "Xen not running, /lib/tls not existing "
rc_failed 3
fi
fi
rc_status -v
;;
*)
echo "Usage: $0
{start|stop|status|try-restart|restart|force-reload|reload}"
exit 1
;;
esac
rc_exit
++++++ bridge-bonding.diff ++++++
Index: xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
@@ -249,6 +249,9 @@ op_start () {
claim_lock "network-bridge"
+ local bonded=""
+ [ -e /sys/class/net/${netdev}/bonding ] && bonded="yes"
+
vlans=$(find_active_vlans "${netdev}")
for vlan in $vlans ; do ifdown $vlan ; done
@@ -266,18 +269,32 @@ op_start () {
ip link set ${netdev} down
ip addr flush ${netdev}
fi
- ip link set ${netdev} name ${pdev}
- ip link set ${tdev} name ${bridge}
-
- setup_physical_bridge_port ${pdev}
- # Restore slaves
- if [ -n "${slaves}" ]; then
- ip link set ${pdev} up
- ifenslave ${pdev} ${slaves}
+ if [ "x${bonded}" = "xyes" ]
+ then
+ ip link set ${tdev} name ${bridge}
+ ln -sf /etc/sysconfig/network/ifcfg-${netdev}
/etc/sysconfig/network/ifcfg-${pdev}
+ ifup ${pdev}
+ local gw=`ip route show dev ${pdev} | fgrep default | sed 's/default
via //'`
+ ip addr flush ${pdev}
+ rm -f /etc/sysconfig/network/ifcfg-${pdev}
+ brctl addif ${bridge} ${pdev}
+ ip link set ${bridge} up
+ [ -n "$gw" ] && ip route add default via ${gw}
+ else
+ ip link set ${netdev} name ${pdev}
+ ip link set ${tdev} name ${bridge}
+
+ setup_bridge_port ${pdev}
+
+ # Restore slaves
+ if [ -n "${slaves}" ]; then
+ ip link set ${pdev} up
+ ifenslave ${pdev} ${slaves}
+ fi
+ add_to_bridge2 ${bridge} ${pdev}
+ do_ifup ${bridge}
fi
- add_to_bridge2 ${bridge} ${pdev}
- do_ifup ${bridge}
for vlan in $vlans ; do ifup $vlan ; done
++++++ bridge-opensuse.patch ++++++
Index: xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
@@ -278,19 +278,19 @@ op_stop () {
transfer_addrs ${bridge} ${pdev}
if ! ifdown ${bridge}; then
get_ip_info ${bridge}
- fi
- ip link set ${pdev} down
- ip addr flush ${bridge}
+ ip link set ${pdev} down
+ ip addr flush ${bridge}
- brctl delif ${bridge} ${pdev}
- ip link set ${bridge} down
+ brctl delif ${bridge} ${pdev}
+ ip link set ${bridge} down
- ip link set ${bridge} name ${tdev}
+ ip link set ${bridge} name ${tdev}
+ brctl delbr ${tdev}
+ fi
+ ip link set ${pdev} down
ip link set ${pdev} name ${netdev}
do_ifup ${netdev}
- brctl delbr ${tdev}
-
release_lock "network-bridge"
}
++++++ bridge-record-creation.patch ++++++
Index: xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
@@ -257,6 +257,11 @@ op_start () {
create_bridge ${tdev}
+ # Record creation of bridge in /dev/.sysconfig/network/xenbridges so other
+ # tools, e.g. yast2 lan, know that Xen bridging is active.
+ [ -d /dev/.sysconfig/network/xenbridges ] || mkdir
/dev/.sysconfig/network/xenbridges
+ touch /dev/.sysconfig/network/xenbridges/${bridge}
+
preiftransfer ${netdev}
transfer_addrs ${netdev} ${tdev}
# Remember slaves for bonding interface.
@@ -338,6 +343,13 @@ op_stop () {
ip link set ${pdev} name ${netdev}
do_ifup ${netdev}
+ # Remove record of bridge from /dev/.sysconfig/network/xenbridges ...
+ rm -f /dev/.sysconfig/network/xenbridges/${bridge}
+ # ... and directory itself if empty
+ if [ -z "$(ls -A /dev/.sysconfig/network/xenbridges 2>/dev/null)" ]; then
+ rmdir /dev/.sysconfig/network/xenbridges
+ fi
+
for vlan in $vlans ; do ifup $vlan ; done
release_lock "network-bridge"
++++++ bridge-vlan.diff ++++++
Index: xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
@@ -193,6 +193,28 @@ antispoofing () {
iptables -A FORWARD -m physdev --physdev-in ${pdev} -j ACCEPT
}
+find_active_vlans() {
+ local netdev=$1
+ local vlan
+ local vlans
+ vlans=""
+ for vifcfg in /etc/sysconfig/network/ifcfg-vlan* ; do
+ vlan=${vifcfg/*\/ifcfg-}
+ if [ "$vlan" = "vlan*" ]; then
+ continue
+ fi
+ . $vifcfg
+ etherdevice="$ETHERDEVICE"
+ if [ -x /sbin/getcfg-interface ]; then
+ etherdevice=$(/sbin/getcfg-interface "$ETHERDEVICE")
+ fi
+ if [ "$ETHERDEVICE" = "$netdev" ] || [ "$etherdevice" = "$netdev" ] ;
then
+ link_exists "$vlan" && vlans="$vlans $vlan"
+ fi
+ done
+ echo "$vlans"
+}
+
# Usage: show_status dev bridge
# Print ifconfig and routes.
show_status () {
@@ -227,6 +249,9 @@ op_start () {
claim_lock "network-bridge"
+ vlans=$(find_active_vlans "${netdev}")
+ for vlan in $vlans ; do ifdown $vlan ; done
+
create_bridge ${tdev}
preiftransfer ${netdev}
@@ -254,6 +279,8 @@ op_start () {
add_to_bridge2 ${bridge} ${pdev}
do_ifup ${bridge}
+ for vlan in $vlans ; do ifup $vlan ; done
+
if [ ${antispoof} = 'yes' ] ; then
antispoofing
fi
@@ -275,6 +302,9 @@ op_stop () {
claim_lock "network-bridge"
+ vlans=$(find_active_vlans "${netdev}")
+ for vlan in $vlans ; do ifdown $vlan ; done
+
transfer_addrs ${bridge} ${pdev}
if ! ifdown ${bridge}; then
get_ip_info ${bridge}
@@ -291,6 +321,8 @@ op_stop () {
ip link set ${pdev} name ${netdev}
do_ifup ${netdev}
+ for vlan in $vlans ; do ifup $vlan ; done
+
release_lock "network-bridge"
}
++++++ build-tapdisk-ioemu.patch ++++++
Date: Tue, 10 Mar 2009 16:32:40 +0100
Subject: [PATCH 3/6] ioemu: Build tapdisk-ioemu binary
When changing away from the old ioemu, changes in the Makefiles
resulted in tapdisk-ioemu appearing there, but actually not
being built. This patch re-enables the build of tapdisk-ioemu.
Signed-off-by: Kevin Wolf <kwolf@xxxxxxx>
---
Makefile | 22 +++++++++++++++-------
configure | 2 +-
qemu-tool.c | 2 +-
tapdisk-ioemu.c | 17 -----------------
4 files changed, 17 insertions(+), 26 deletions(-)
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/Makefile
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/Makefile
@@ -46,14 +46,6 @@ $(filter %-user,$(SUBDIR_RULES)): libqem
recurse-all: $(SUBDIR_RULES)
-CPPFLAGS += -I$(XEN_ROOT)/tools/libxc
-CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib
-CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore
-CPPFLAGS += -I$(XEN_ROOT)/tools/include
-
-tapdisk-ioemu: tapdisk-ioemu.c cutils.c block.c block-raw.c block-cow.c
block-qcow.c aes.c block-vmdk.c block-cloop.c block-dmg.c block-bochs.c
block-vpc.c block-vvfat.c block-qcow2.c hw/xen_blktap.c osdep.c
- $(CC) -DQEMU_TOOL $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) $(LDFLAGS)
$(BASE_LDFLAGS) -o $@ $^ -lz $(LIBS)
-
#######################################################################
# BLOCK_OBJS is code used by both qemu system emulation and qemu-img
@@ -72,6 +64,21 @@ endif
BLOCK_OBJS += block-raw-posix.o
endif
+#######################################################################
+# tapdisk-ioemu
+
+hw/tapdisk-xen_blktap.o: hw/xen_blktap.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $<
+tapdisk-ioemu.o: tapdisk-ioemu.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $<
+
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/libxc
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/include
+tapdisk-ioemu: tapdisk-ioemu.o $(BLOCK_OBJS) qemu-tool.o
hw/tapdisk-xen_blktap.o
+ $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS)
+
######################################################################
# libqemu_common.a: Target independent part of system emulation. The
# long term path is to suppress *all* target specific code in case of
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/configure
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/configure
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/configure
@@ -1511,7 +1511,7 @@ bsd)
;;
esac
-tools=
+tools="tapdisk-ioemu"
if test `expr "$target_list" : ".*softmmu.*"` != 0 ; then
tools="qemu-img\$(EXESUF) $tools"
if [ "$linux" = "yes" ] ; then
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/qemu-tool.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/qemu-tool.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/qemu-tool.c
@@ -68,7 +68,7 @@ void qemu_bh_delete(QEMUBH *bh)
qemu_free(bh);
}
-int qemu_set_fd_handler2(int fd,
+int __attribute__((weak)) qemu_set_fd_handler2(int fd,
IOCanRWHandler *fd_read_poll,
IOHandler *fd_read,
IOHandler *fd_write,
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/tapdisk-ioemu.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/tapdisk-ioemu.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/tapdisk-ioemu.c
@@ -12,34 +12,12 @@
extern void qemu_aio_init(void);
extern void qemu_aio_poll(void);
-extern void bdrv_init(void);
-
-extern void *qemu_mallocz(size_t size);
-extern void qemu_free(void *ptr);
extern void *fd_start;
int domid = 0;
FILE* logfile;
-void term_printf(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
-}
-
-void term_print_filename(const char *filename)
-{
- term_printf(filename);
-}
-
-
-typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
-typedef int IOCanRWHandler(void *opaque);
-typedef void IOHandler(void *opaque);
-
typedef struct IOHandlerRecord {
int fd;
IOCanRWHandler *fd_read_poll;
@@ -103,7 +81,6 @@ int main(void)
logfile = stderr;
bdrv_init();
- qemu_aio_init();
init_blktap();
/* Daemonize */
@@ -115,8 +92,6 @@ int main(void)
* completed aio operations.
*/
while (1) {
- qemu_aio_poll();
-
max_fd = -1;
FD_ZERO(&rfds);
for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next)
++++++ capslock_enable.patch ++++++
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/vnc.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
@@ -1342,6 +1342,11 @@ static void do_key_event(VncState *vs, i
}
break;
case 0x3a: /* CapsLock */
+ if(!down){
+ vs->modifiers_state[keycode] ^= 1;
+ kbd_put_keycode(keycode | 0x80);
+ }
+ return;
case 0x45: /* NumLock */
if (down) {
kbd_put_keycode(keycode & 0x7f);
++++++ cdrom-removable.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/server/HalDaemon.py
===================================================================
--- /dev/null
+++ xen-4.1.2-testing/tools/python/xen/xend/server/HalDaemon.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+# -*- mode: python; -*-
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2007 Pat Campbell <plc@xxxxxxxxxx>
+# Copyright (C) 2007 Novell Inc.
+#============================================================================
+
+"""hald (Hardware Abstraction Layer Daemon) watcher for Xen management
+ of removable block device media.
+
+"""
+
+import gobject
+import dbus
+import dbus.glib
+import os
+import types
+import sys
+import signal
+import traceback
+from xen.xend.xenstore.xstransact import xstransact, complete
+from xen.xend.xenstore.xsutil import xshandle
+from xen.xend import PrettyPrint
+from xen.xend import XendLogging
+from xen.xend.XendLogging import log
+
+DEVICE_TYPES = ['vbd', 'tap']
+
+class HalDaemon:
+ """The Hald block device watcher for XEN
+ """
+
+ """Default path to the log file. """
+ logfile_default = "/var/log/xen/hald.log"
+
+ """Default level of information to be logged."""
+ loglevel_default = 'INFO'
+
+
+ def __init__(self):
+
+ XendLogging.init(self.logfile_default, self.loglevel_default)
+ log.debug( "%s", "__init__")
+
+ self.udi_dict = {}
+ self.debug = 0
+ self.dbpath = "/local/domain/0/backend"
+ self.bus = dbus.SystemBus()
+ self.hal_manager_obj = self.bus.get_object('org.freedesktop.Hal',
'/org/freedesktop/Hal/Manager')
+ self.hal_manager = dbus.Interface( self.hal_manager_obj,
'org.freedesktop.Hal.Manager')
+ self.gatherBlockDevices()
+ self.registerDeviceCallbacks()
+
+ def run(self):
+ log.debug( "%s", "In new run" );
+ try:
+ self.mainloop = gobject.MainLoop()
+ self.mainloop.run()
+ except KeyboardInterrupt, ex:
+ log.debug('Keyboard exception handler: %s', ex )
+ self.mainloop.quit()
+ except Exception, ex:
+ log.debug('Generic exception handler: %s', ex )
+ self.mainloop.quit()
+
+ def __del__(self):
+ log.debug( "%s", "In del " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def shutdown(self):
+ log.debug( "%s", "In shutdown now " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def stop(self):
+ log.debug( "%s", "In stop now " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def gatherBlockDevices(self):
+
+ # Get all the current devices from hal and save in a dictionary
+ try:
+ device_names = self.hal_manager.GetAllDevices()
+ i = 0;
+ for name in device_names:
+ #log.debug("device name, device=%s",name)
+ dev_obj = self.bus.get_object ('org.freedesktop.Hal', name)
+ dev = dbus.Interface (dev_obj, 'org.freedesktop.Hal.Device')
+ dev_properties =
dev_obj.GetAllProperties(dbus_interface="org.freedesktop.Hal.Device")
+ if dev_properties.has_key('block.device'):
+ dev_str = dev_properties['block.device']
+ dev_major = dev_properties['block.major']
+ dev_minor = dev_properties['block.minor']
+ udi_info = {}
+ udi_info['device'] = dev_str
+ udi_info['major'] = dev_major
+ udi_info['minor'] = dev_minor
+ udi_info['udi'] = name
+ self.udi_dict[i] = udi_info
+ i = i + 1
+ except Exception, ex:
+ print >>sys.stderr, 'Exception gathering block devices:', ex
+ log.warn("Exception gathering block devices (%s)",ex)
+
+ #
+ def registerDeviceCallbacks(self):
+ # setup the callbacks for when the gdl changes
+ self.hal_manager.connect_to_signal('DeviceAdded',
self.device_added_callback)
+ self.hal_manager.connect_to_signal('DeviceRemoved',
self.device_removed_callback)
+
+ #
+ def unRegisterDeviceCallbacks(self):
+ # setup the callbacks for when the gdl changes
+
self.hal_manager.remove_signal_receiver(self.device_added_callback,'DeviceAdded')
+
self.hal_manager.remove_signal_receiver(self.device_removed_callback,'DeviceRemoved')
+
+ #
+ def device_removed_callback(self,udi):
+ log.debug('UDI %s was removed',udi)
+ self.show_dict(self.udi_dict)
+ for key in self.udi_dict:
+ udi_info = self.udi_dict[key]
+ if udi_info['udi'] == udi:
+ device = udi_info['device']
+ major = udi_info['major']
+ minor = udi_info['minor']
+ self.change_xenstore( "remove", device, major, minor)
+
+ # Adds device to dictionary if not already there
+ def device_added_callback(self,udi):
+ log.debug('UDI %s was added', udi)
+ self.show_dict(self.udi_dict)
+ dev_obj = self.bus.get_object ('org.freedesktop.Hal', udi)
+ dev = dbus.Interface (dev_obj, 'org.freedesktop.Hal.Device')
+ device = dev.GetProperty ('block.device')
+ major = dev.GetProperty ('block.major')
+ minor = dev.GetProperty ('block.minor')
+ udi_info = {}
+ udi_info['device'] = device
+ udi_info['major'] = major
+ udi_info['minor'] = minor
+ udi_info['udi'] = udi
+ already = 0
+ cnt = 0;
+ for key in self.udi_dict:
+ info = self.udi_dict[key]
+ if info['udi'] == udi:
+ already = 1
+ break
+ cnt = cnt + 1
+ if already == 0:
+ self.udi_dict[cnt] = udi_info;
+ log.debug('UDI %s was added, device:%s major:%s minor:%s
index:%d\n', udi, device, major, minor, cnt)
+ self.change_xenstore( "add", device, major, minor)
+
+ # Debug helper, shows dictionary contents
+ def show_dict(self,dict=None):
+ if self.debug == 0 :
+ return
+ if dict == None :
+ dict = self.udi_dict
+ for key in dict:
+ log.debug('udi_info %s udi_info:%s',key,dict[key])
+
+ # Set or clear xenstore media-present depending on the action argument
+ # for every vbd that has this block device
+ def change_xenstore(self,action, device, major, minor):
+ for type in DEVICE_TYPES:
+ path = self.dbpath + '/' + type
+ domains = xstransact.List(path)
+ log.debug('domains: %s', domains)
+ for domain in domains: # for each domain
+ devices = xstransact.List( path + '/' + domain)
+ log.debug('devices: %s',devices)
+ for device in devices: # for each vbd device
+ str = device.split('/')
+ vbd_type = None;
+ vbd_physical_device = None
+ vbd_media = None
+ vbd_device_path = path + '/' + domain + '/' + device
+ listing = xstransact.List(vbd_device_path)
+ for entry in listing: # for each entry
+ item = path + '/' + entry
+ value = xstransact.Read( vbd_device_path + '/' + entry)
+ log.debug('%s=%s',item,value)
+ if item.find('media-present') != -1:
+ vbd_media = item;
+ vbd_media_path = item
+ if item.find('physical-device') != -1:
+ vbd_physical_device = value;
+ if item.find('type') != -1:
+ vbd_type = value;
+ if vbd_type is not None and vbd_physical_device is not None
and vbd_media is not None :
+ inode = vbd_physical_device.split(':')
+ imajor = parse_hex(inode[0])
+ iminor = parse_hex(inode[1])
+ log.debug("action:%s major:%s- minor:%s- imajor:%s-
iminor:%s- inode: %s",
+ action,major,minor, imajor, iminor, inode)
+ if int(imajor) == int(major) and int(iminor) ==
int(minor):
+ if action == "add":
+ xs_dict = {'media': "1"}
+ xstransact.Write(vbd_device_path,
'media-present', "1" )
+ log.debug("wrote xenstore media-present 1
path:%s",vbd_media_path)
+ else:
+ xstransact.Write(vbd_device_path,
'media-present', "0" )
+ log.debug("wrote xenstore media 0
path:%s",vbd_media_path)
+
+def mylog( fmt, *args):
+ f = open('/tmp/haldaemon.log', 'a')
+ print >>f, "HalDaemon ", fmt % args
+ f.close()
+
+
+def parse_hex(val):
+ try:
+ if isinstance(val, types.StringTypes):
+ return int(val, 16)
+ else:
+ return val
+ except ValueError:
+ return None
+
+if __name__ == "__main__":
+ watcher = HalDaemon()
+ watcher.run()
+ print 'Falling off end'
+
+
Index: xen-4.1.2-testing/tools/python/xen/xend/server/Hald.py
===================================================================
--- /dev/null
+++ xen-4.1.2-testing/tools/python/xen/xend/server/Hald.py
@@ -0,0 +1,125 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2007 Pat Campbell <plc@xxxxxxxxxx>
+# Copyright (C) 2007 Novell Inc.
+#============================================================================
+
+import errno
+import types
+import os
+import sys
+import time
+import signal
+from traceback import print_exc
+
+from xen.xend.XendLogging import log
+
+class Hald:
+ def __init__(self):
+ self.ready = False
+ self.running = True
+
+ def run(self):
+ """Starts the HalDaemon process
+ """
+ self.ready = True
+ try:
+ myfile = self.find("xen/xend/server/HalDaemon.py")
+ args = (["python", myfile ])
+ self.pid = self.daemonize("python", args )
+ #log.debug( "%s %s pid:%d", "Hald.py starting ", args, self.pid )
+ except:
+ self.pid = -1
+ log.debug("Unable to start HalDaemon process")
+
+ def shutdown(self):
+ """Shutdown the HalDaemon process
+ """
+ log.debug("%s pid:%d", "Hald.shutdown()", self.pid)
+ self.running = False
+ self.ready = False
+ if self.pid != -1:
+ try:
+ os.kill(self.pid, signal.SIGINT)
+ except:
+ print_exc()
+
+ def daemonize(self,prog, args):
+ """Runs a program as a daemon with the list of arguments. Returns the
PID
+ of the daemonized program, or returns 0 on error.
+ Copied from xm/create.py instead of importing to reduce coupling
+ """
+ r, w = os.pipe()
+ pid = os.fork()
+
+ if pid == 0:
+ os.close(r)
+ w = os.fdopen(w, 'w')
+ os.setsid()
+ try:
+ pid2 = os.fork()
+ except:
+ pid2 = None
+ if pid2 == 0:
+ os.chdir("/")
+ env = os.environ.copy()
+ env['PYTHONPATH'] = self.getpythonpath()
+ for fd in range(0, 256):
+ try:
+ os.close(fd)
+ except:
+ pass
+ os.open("/dev/null", os.O_RDWR)
+ os.dup2(0, 1)
+ os.dup2(0, 2)
+ os.execvpe(prog, args, env)
+ os._exit(1)
+ else:
+ w.write(str(pid2 or 0))
+ w.close()
+ os._exit(0)
+ os.close(w)
+ r = os.fdopen(r)
+ daemon_pid = int(r.read())
+ r.close()
+ os.waitpid(pid, 0)
+ #log.debug( "daemon_pid: %d", daemon_pid )
+ return daemon_pid
+
+ def getpythonpath(self):
+ str = " "
+ for p in sys.path:
+ if str != " ":
+ str = str + ":" + p
+ else:
+ if str != "":
+ str = p
+ return str
+
+ def find(self,path, matchFunc=os.path.isfile):
+ """Find a module in the sys.path
+ From web page:
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52224
+ """
+ for dirname in sys.path:
+ candidate = os.path.join(dirname, path)
+ if matchFunc(candidate):
+ return candidate
+ raise Error("Can't find file %s" % path)
+
+if __name__ == "__main__":
+ watcher = Hald()
+ watcher.run()
+ time.sleep(10)
+ watcher.shutdown()
Index: xen-4.1.2-testing/tools/python/xen/xend/server/SrvServer.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/server/SrvServer.py
+++ xen-4.1.2-testing/tools/python/xen/xend/server/SrvServer.py
@@ -57,6 +57,7 @@ from xen.web.SrvDir import SrvDir
from SrvRoot import SrvRoot
from XMLRPCServer import XMLRPCServer
+from xen.xend.server.Hald import Hald
xoptions = XendOptions.instance()
@@ -252,6 +253,8 @@ def _loadConfig(servers, root, reload):
if xoptions.get_xend_unix_xmlrpc_server():
servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False))
+ servers.add(Hald())
+
def create():
root = SrvDir()
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/xenstore.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
@@ -18,6 +18,7 @@
#include "exec-all.h"
#include "sysemu.h"
+#include "console.h"
#include "hw.h"
#include "pci.h"
#include "qemu-timer.h"
@@ -595,6 +596,21 @@ void xenstore_parse_domain_config(int hv
#endif
bs = bdrv_new(dev);
+
+ /* if cdrom physical put a watch on media-present */
+ if (bdrv_get_type_hint(bs) == BDRV_TYPE_CDROM) {
+ if (drv && !strcmp(drv, "phy")) {
+ if (pasprintf(&buf, "%s/media-present", bpath) != -1) {
+ if (bdrv_is_inserted(bs))
+ xs_write(xsh, XBT_NULL, buf, "1", strlen("1"));
+ else {
+ xs_write(xsh, XBT_NULL, buf, "0", strlen("0"));
+ }
+ xs_watch(xsh, buf, "media-present");
+ }
+ }
+ }
+
/* check if it is a cdrom */
if (danger_type && !strcmp(danger_type, "cdrom")) {
bdrv_set_type_hint(bs, BDRV_TYPE_CDROM);
@@ -1028,6 +1044,50 @@ static void xenstore_process_vcpu_set_ev
return;
}
+static void xenstore_process_media_change_event(char **vec)
+{
+ char *media_present = NULL;
+ unsigned int len;
+
+ media_present = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len);
+
+ if (media_present) {
+ BlockDriverState *bs;
+ char *buf = NULL, *cp = NULL, *path = NULL, *dev = NULL;
+
+ path = strdup(vec[XS_WATCH_PATH]);
+ cp = strstr(path, "media-present");
+ if (cp){
+ *(cp-1) = '\0';
+ pasprintf(&buf, "%s/dev", path);
+ dev = xs_read(xsh, XBT_NULL, buf, &len);
+ if (dev) {
+ if ( !strncmp(dev, "xvd", 3)) {
+ memmove(dev, dev+1, strlen(dev));
+ dev[0] = 'h';
+ dev[1] = 'd';
+ }
+ bs = bdrv_find(dev);
+ if (!bs) {
+ term_printf("device not found\n");
+ return;
+ }
+ if (strcmp(media_present, "0") == 0 && bs) {
+ bdrv_close(bs);
+ }
+ else if (strcmp(media_present, "1") == 0 &&
+ bs != NULL && bs->drv == NULL) {
+ if (bdrv_open(bs, bs->filename, 0 /* snapshot */) < 0) {
+ fprintf(logfile, "%s() qemu: could not open cdrom disk
'%s'\n",
+ __func__, bs->filename);
+ }
+ bs->media_changed = 1;
+ }
+ }
+ }
+ }
+}
+
void xenstore_process_event(void *opaque)
{
char **vec, *offset, *bpath = NULL, *buf = NULL, *drv = NULL, *image =
NULL;
@@ -1063,6 +1123,11 @@ void xenstore_process_event(void *opaque
xenstore_watch_callbacks[i].cb(vec[XS_WATCH_TOKEN],
xenstore_watch_callbacks[i].opaque);
+ if (!strcmp(vec[XS_WATCH_TOKEN], "media-present")) {
+ xenstore_process_media_change_event(vec);
+ goto out;
+ }
+
hd_index = drive_name_to_index(vec[XS_WATCH_TOKEN]);
if (hd_index == -1) {
fprintf(stderr,"medium change watch on `%s' -"
++++++ change-vnc-passwd.patch ++++++
Add support of change-vnc-password while vm is running.
Signed-off-by: Chunyan Liu <cyliu@xxxxxxxxxx>
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/vl.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/vl.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/vl.c
@@ -200,7 +200,7 @@ DriveInfo drives_table[MAX_DRIVES+1];
int nb_drives;
enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
int vga_ram_size;
-static DisplayState *display_state;
+DisplayState *display_state;
int nographic;
static int curses;
static int sdl;
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/vnc.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
@@ -2591,6 +2591,7 @@ int vnc_display_password(DisplayState *d
if (password && password[0]) {
if (!(vs->password = qemu_strdup(password)))
return -1;
+ vs->auth = VNC_AUTH_VNC;
}
return 0;
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/xenstore.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
@@ -25,6 +25,7 @@
#include "qemu-timer.h"
#include "qemu-xen.h"
+extern DisplayState *display_state;
struct xs_handle *xsh = NULL;
static char *media_filename[MAX_DRIVES+1];
static QEMUTimer *insert_timer = NULL;
@@ -1006,6 +1007,19 @@ static void xenstore_process_dm_command_
} else if (!strncmp(command, "continue", len)) {
fprintf(logfile, "dm-command: continue after state save\n");
xen_pause_requested = 0;
+ } else if (!strncmp(command, "chgvncpasswd", len)) {
+ fprintf(logfile, "dm-command: change vnc passwd\n");
+ if (pasprintf(&path,
+ "/local/domain/0/backend/vfb/%u/0/vncpasswd", domid) == -1) {
+ fprintf(logfile, "out of memory reading dm command parameter\n");
+ goto out;
+ }
+ par = xs_read(xsh, XBT_NULL, path, &len);
+ if (!par)
+ goto out;
+ if (vnc_display_password(display_state, par) == 0)
+ xenstore_record_dm_state("vncpasswdchged");
+ free(par);
} else if (!strncmp(command, "usb-add", len)) {
fprintf(logfile, "dm-command: usb-add a usb device\n");
if (pasprintf(&path,
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1488,6 +1488,20 @@ class XendDomainInfo:
target = max_target
self.setMemoryTarget(target)
+ def chgvncpasswd(self, passwd):
+ if self._stateGet() != DOM_STATE_HALTED:
+ path = '/local/domain/0/backend/vfb/%u/0/' % self.getDomid()
+ xstransact.Write(path, 'vncpasswd', passwd)
+ self.image.signalDeviceModel("chgvncpasswd", "vncpasswdchged")
+
+ for dev_uuid, (dev_type, dev_info) in self.info['devices'].items():
+ if dev_type == 'vfb':
+ dev_info['vncpasswd'] = passwd
+ dev_info['other_config']['vncpasswd'] = passwd
+ self.info.device_update(dev_uuid, cfg_xenapi = dev_info)
+ break
+ xen.xend.XendDomain.instance().managed_config_save(self)
+
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
@param target: In MiB.
Index: xen-4.1.2-testing/tools/python/xen/xend/server/XMLRPCServer.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/server/XMLRPCServer.py
+++ xen-4.1.2-testing/tools/python/xen/xend/server/XMLRPCServer.py
@@ -95,7 +95,7 @@ methods = ['device_create', 'device_conf
'destroyDevice','getDeviceSxprs',
'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown',
'send_sysrq', 'getVCPUInfo', 'waitForDevices',
- 'getRestartCount', 'getBlockDeviceClass']
+ 'getRestartCount', 'getBlockDeviceClass', 'chgvncpasswd']
exclude = ['domain_create', 'domain_restore']
Index: xen-4.1.2-testing/tools/python/xen/xm/main.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xm/main.py
+++ xen-4.1.2-testing/tools/python/xen/xm/main.py
@@ -21,6 +21,7 @@
"""Grand unified management application for Xen.
"""
+import getpass
import atexit
import cmd
import os
@@ -289,6 +290,9 @@ SUBCOMMAND_HELP = {
'getenforce' : ('', 'Returns the current enforcing mode for the Flask
XSM module (Enforcing,Permissive)'),
'setenforce' : ('[ (Enforcing|1) | (Permissive|0) ]',
'Modifies the current enforcing mode for the Flask XSM
module'),
+ #change vnc password
+ 'change-vnc-passwd' : ('<Domain>',\
+ 'Change vnc password'),
}
SUBCOMMAND_OPTIONS = {
@@ -421,6 +425,7 @@ common_commands = [
"usb-del",
"domstate",
"vcpu-set",
+ "change-vnc-passwd",
]
domain_commands = [
@@ -462,6 +467,7 @@ domain_commands = [
"vcpu-list",
"vcpu-pin",
"vcpu-set",
+ "change-vnc-passwd",
]
host_commands = [
@@ -3881,6 +3887,10 @@ def xm_cpupool_migrate(args):
else:
server.xend.cpu_pool.migrate(domname, poolname)
+def xm_chgvncpasswd(args):
+ arg_check(args, "change-vnc-passwd", 1)
+ pwd = getpass.getpass("Enter new password: ")
+ server.xend.domain.chgvncpasswd(args[0], pwd)
commands = {
"shell": xm_shell,
@@ -3993,6 +4003,8 @@ commands = {
"usb-del": xm_usb_del,
#domstate
"domstate": xm_domstate,
+ #change vnc password:
+ "change-vnc-passwd": xm_chgvncpasswd,
}
## The commands supported by a separate argument parser in xend.xm.
++++++ change_home_server.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3132,6 +3132,11 @@ class XendDomainInfo:
self._cleanup_phantom_devs(paths)
self._cleanupVm()
+ if "change_home_server" in self.info:
+ chs = self.info["change_home_server"]
+ if (type(chs) is str and chs == "False") or \
+ (type(chs) is bool and chs is False):
+ self.setChangeHomeServer(None)
if ("transient" in self.info["other_config"] and \
bool(self.info["other_config"]["transient"])) or \
("change_home_server" in self.info and \
++++++ check_device_status.patch ++++++
Improve check_device_status to handle HA cases
In HA environment, sometimes xenstore status has changed but ev.wait() cannot
get the signal, it will wait until timeout, thus incorrect device status is
returned. To fix this problem, we do not depend on ev.wait() result, but read
xenstore directly to get correct device status.
Index: xen-4.1.2-testing/tools/python/xen/xend/server/DevController.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/server/DevController.py
+++ xen-4.1.2-testing/tools/python/xen/xend/server/DevController.py
@@ -149,7 +149,10 @@ class DevController:
(status, err) = self.waitForBackend(devid)
if status == Timeout:
- self.destroyDevice(devid, False)
+ #Clean timeout backend resource
+ dev = self.convertToDeviceNumber(devid)
+ self.writeBackend(dev, HOTPLUG_STATUS_NODE, HOTPLUG_STATUS_ERROR)
+ self.destroyDevice(devid, True)
raise VmError("Device %s (%s) could not be connected. "
"Hotplug scripts not working." %
(devid, self.deviceClass))
@@ -554,7 +557,17 @@ class DevController:
xswatch(statusPath, hotplugStatusCallback, ev, result)
- ev.wait(DEVICE_CREATE_TIMEOUT)
+ for i in range(1, 50):
+ ev.wait(DEVICE_CREATE_TIMEOUT/50)
+ status = xstransact.Read(statusPath)
+ if status is not None:
+ if status == HOTPLUG_STATUS_ERROR:
+ result['status'] = Error
+ elif status == HOTPLUG_STATUS_BUSY:
+ result['status'] = Busy
+ else:
+ result['status'] = Connected
+ break
err = xstransact.Read(backpath, HOTPLUG_ERROR_NODE)
@@ -571,7 +584,12 @@ class DevController:
xswatch(statusPath, deviceDestroyCallback, ev, result)
- ev.wait(DEVICE_DESTROY_TIMEOUT)
+ for i in range(1, 50):
+ ev.wait(DEVICE_DESTROY_TIMEOUT/50)
+ status = xstransact.Read(statusPath)
+ if status is None:
+ result['status'] = Disconnected
+ break
return result['status']
++++++ checkpoint-rename.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendCheckpoint.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendCheckpoint.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendCheckpoint.py
@@ -172,7 +172,7 @@ def save(fd, dominfo, network, live, dst
dominfo.destroy()
dominfo.testDeviceComplete()
try:
- dominfo.setName(domain_name, False)
+ dominfo.setName(domain_name)
except VmError:
# Ignore this. The name conflict (hopefully) arises because we
# are doing localhost migration; if we are doing a suspend of a
++++++ del_usb_xend_entry.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1296,8 +1296,15 @@ class XendDomainInfo:
frontpath =
self.getDeviceController(deviceClass).frontendPath(dev)
backpath = xstransact.Read(frontpath, "backend")
thread.start_new_thread(self.getDeviceController(deviceClass).finishDeviceCleanup,
(backpath, path))
-
- rc = self.getDeviceController(deviceClass).destroyDevice(devid,
force)
+ if deviceClass =='vusb':
+ dev =
self.getDeviceController(deviceClass).convertToDeviceNumber(devid)
+ state = self.getDeviceController(deviceClass).readBackend(dev,
'state')
+ if state == '1':
+ rc =
self.getDeviceController(deviceClass).destroyDevice(devid, True)
+ else:
+ rc =
self.getDeviceController(deviceClass).destroyDevice(devid, force)
+ else:
+ rc =
self.getDeviceController(deviceClass).destroyDevice(devid, force)
if not force and rm_cfg:
# The backend path, other than the device itself,
# has to be passed because its accompanied frontend
++++++ disable-xl-when-using-xend.patch ++++++
Print a warning and exit xl if xend is running. It is not
recommened to use libxenlight in conjunction with legacy xend
toolstack.
xl could be useful even when xend is running, e.g. to debug
xend itself, so add a '-f' option to override the exit.
Index: xen-4.1.2-testing/tools/libxl/xl.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxl/xl.c
+++ xen-4.1.2-testing/tools/libxl/xl.c
@@ -88,12 +88,16 @@ int main(int argc, char **argv)
char *config_file;
void *config_data = 0;
int config_len = 0;
+ int force = 0;
- while ((opt = getopt(argc, argv, "+v")) >= 0) {
+ while ((opt = getopt(argc, argv, "+vf")) >= 0) {
switch (opt) {
case 'v':
if (minmsglevel > 0) minmsglevel--;
break;
+ case 'f':
+ force = 1;
+ break;
default:
fprintf(stderr, "unknown global option\n");
exit(2);
@@ -107,6 +111,18 @@ int main(int argc, char **argv)
exit(1);
}
opterr = 0;
+ /*
+ * On SUSE, if xend is running (and user isn't asking for help),
+ * print a warning and exit unless forced.
+ */
+ if ((system("/usr/sbin/xend status") == 0) && strcmp(cmd, "help")) {
+ if (force == 0) {
+ fprintf(stderr, "WARNING: xend is running! It is not recommended "
+ "using libxenlight in\nconjunction with the legacy xend "
+ "toolstack. Use -f (force) to override\n");
+ exit(1);
+ }
+ }
logger = xtl_createlogger_stdiostream(stderr, minmsglevel, 0);
if (!logger) exit(1);
Index: xen-4.1.2-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.1.2-testing/tools/libxl/xl_cmdimpl.c
@@ -1725,7 +1725,7 @@ void help(const char *command)
struct cmd_spec *cmd;
if (!command || !strcmp(command, "help")) {
- printf("Usage xl [-v] <subcommand> [args]\n\n");
+ printf("Usage xl [-v] [-f] <subcommand> [args]\n\n");
printf("xl full list of subcommands:\n\n");
for (i = 0; i < cmdtable_len; i++)
printf(" %-20s%s\n",
@@ -1733,7 +1733,7 @@ void help(const char *command)
} else {
cmd = cmdtable_lookup(command);
if (cmd) {
- printf("Usage: xl [-v] %s %s\n\n%s.\n\n",
+ printf("Usage: xl [-v] [-f] %s %s\n\n%s.\n\n",
cmd->cmd_name,
cmd->cmd_usage,
cmd->cmd_desc);
++++++ disable_emulated_device.diff ++++++
Index:
xen-4.1.2-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
===================================================================
---
xen-4.1.2-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ xen-4.1.2-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -426,6 +426,11 @@ static int __devinit platform_pci_init(s
platform_mmio = mmio_addr;
platform_mmiolen = mmio_len;
+ /*
+ * Disconnect the emulated devices.
+ */
+ outl(1, (ioaddr + 4));
+
ret = init_hypercall_stubs();
if (ret < 0)
goto out;
++++++ dom-print.patch ++++++
Index: xen-4.1.2-testing/xen/arch/x86/domain.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/domain.c
+++ xen-4.1.2-testing/xen/arch/x86/domain.c
@@ -150,15 +150,30 @@ void dump_pageframe_info(struct domain *
printk("Memory pages belonging to domain %u:\n", d->domain_id);
- if ( d->tot_pages >= 10 )
+ if ( d->tot_pages >= 10 && d->is_dying < DOMDYING_dead )
{
printk(" DomPage list too long to display\n");
}
else
{
+ unsigned long total[PGT_type_mask
+ / (PGT_type_mask & -PGT_type_mask) + 1] = {};
+
spin_lock(&d->page_alloc_lock);
page_list_for_each ( page, &d->page_list )
{
+ unsigned int index = (page->u.inuse.type_info & PGT_type_mask)
+ / (PGT_type_mask & -PGT_type_mask);
+
+ if ( ++total[index] > 16 )
+ {
+ switch ( page->u.inuse.type_info & PGT_type_mask )
+ {
+ case PGT_none:
+ case PGT_writable_page:
+ continue;
+ }
+ }
printk(" DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
++++++ domUloader.py ++++++
#!/usr/bin/env python
# domUloader.py
"""Loader for kernel and (optional) ramdisk from domU filesystem
Given a physical disk (or disk image) for a domU and the path of a kernel and
optional ramdisk, copies the kernel and ramdisk from the domU disk to a
temporary location in dom0.
The --entry parameter specifies the location of the kernel (and optional
ramdisk) within the domU filesystem. dev is the disk as seen by domU.
Filenames are relative to that filesystem.
The disk is passed as the last parameter. It must be a block device or raw
disk image. More complex disk images (QCOW, VMDK, etc) must already be
configured via blktap and presented as a block device.
The script writes an sxpr specifying the locations of the copied kernel and
ramdisk into the file specified by --output (default is stdout).
Limitations:
- It is assumed both kernel and ramdisk are on the same filesystem.
- domUs might use LVM; the script currently does not have support for setting
up LVM mappings for domUs; it's not trivial and we might risk namespace
conflicts. If you want to use LVM inside domUs, set up a small non-LVM boot
partition and specify it in bootentry.
The script uses kpartx (multipath-tools) to create mappings for devices that
are exported as whole disk devices that are partitioned.
(c) 01/2006 Novell Inc
License: GNU GPL
Author: Kurt Garloff <garloff@xxxxxxx>
"""
import os, sys, getopt
from stat import *
from xen.xend import sxp
import tempfile
import time
# Global options
quiet = False
verbose = False
dryrun = False
tmpdir = '/var/lib/xen/tmp'
in_args = ''
# kpartx, left to its own devices, does not consistently pick the
# same partition separator. Explicitly specify it.
kpartx_args = '-p -part'
# Helper functions
def kpartx_has_opt(opt):
""" Return True if kpartx supports option opt, otherwise False"""
have_opt = True
kpartx_cmd = 'kpartx -' + opt + ' 2>&1'
p = os.popen(kpartx_cmd)
for line in p.readlines():
if line.find('invalid option') >= 0:
have_opt = False
break
p.close()
return have_opt
def error(s):
print >> sys.stderr, "domUloader error: %s" % s
def verbose_print(s):
if verbose:
print >> sys.stderr, "domUloader: %s" % s
def traildigits(strg):
"""Return the trailing digits, used to split the partition number off"""
idx = len(strg)-1
while strg[idx].isdigit():
if len == 0:
return strg
idx -= 1
return strg[idx+1:]
def getWholedisk(part):
while len(part) and part[len(part)-1].isdigit():
part = part[:-1]
return part
#def isWholedisk(domUname):
# """Determines whether dev is a wholedisk dev"""
# return not domUname[-1:].isdigit()
# If available, add '-f' option (bnc#613584)
if kpartx_has_opt('f'):
kpartx_args += ' -f'
class Wholedisk:
"Class representing a whole disk that may have partitions"
def __init__(self, vdev, pdev):
"c'tor: set up"
# Initialize object; will not raise:
self.ldev = None
self.vdev = vdev
self.pdev = pdev
self.mapped = 0
self.partitions = []
self.pcount = 0
self.lvm = False
# Finish initialization; may raise:
self.is_blk = (S_ISBLK(os.stat(pdev)[ST_MODE]))
self.pcount = self.scanpartitions()
def physdev(self):
"""Gets the physical device used to access the device from dom0"""
if self.ldev:
return self.ldev
return self.pdev
def findPart(self, vdev):
"Find device dev in list of partitions"
if len(vdev) > 5 and vdev[:5] == "/dev/":
vdev = vdev[5:]
for part in self.partitions:
if vdev == part.vdev:
return part
if len(self.partitions):
return self.partitions[0]
return None
def loopsetup(self):
"""Sets up the loop mapping for a disk image.
Will raise if no loopbacks are available.
"""
if not self.is_blk and not self.ldev:
# Loops through all loopback devices, attempting to
# find a free one to set up. Don't scan for free and
# then try to set it up as a separate step - too racy!
i = 0
while True:
ldev = '/dev/loop%i' % (i)
if not os.path.exists(ldev):
break
i += 1
fd = os.popen("losetup %s '%s' 2> /dev/null" % (ldev,
self.pdev))
if not fd.close():
verbose_print("losetup %s '%s'" % (ldev, self.pdev))
self.ldev = ldev
break
if not self.ldev:
raise RuntimeError("No free loop device found")
def loopclean(self):
"""Delete the loop mapping.
Will never raise.
"""
if self.ldev:
verbose_print("losetup -d %s" % self.ldev)
# Even seemingly innocent queries like "losetup /dev/loop0"
# can temporarily block the loopback and cause transient
# failures deleting the loopback, hence the retry logic.
retries = 10
while retries:
fd = os.popen("losetup -d %s" % self.ldev)
if not fd.close():
self.ldev = None
break
else:
# Mappings may not have been deleted due to race
# between udev and dm - see bnc#379032. Causes
# loop devices to leak. Call kpartx -d again
os.system("kpartx %s -d '%s'" % (kpartx_args,
self.physdev()))
time.sleep(0.1)
retries -= 1
def scanlvmpartitions(self):
pcount = 0
verbose_print("vgchange -ay '%s'" % (self.vdev))
ret = os.system("vgchange -ay '%s' > /dev/null 2>&1" % (self.vdev)) >> 8
if not ret:
self.lvm = True
verbose_print("lvscan | grep '/dev/%s'" % (self.vdev))
fd = os.popen("lvscan | grep '/dev/%s'" % (self.vdev))
for line in fd.readlines():
line = line.strip()
(t1, lvname, t2) = line.split('\'')
pname = lvname[lvname.rfind('/')+1:]
pname = pname.strip()
pname = "/dev/mapper/" + self.vdev + "-" + pname
verbose_print("Found partition: vdev %s, pdev %s" % (self.vdev,
pname))
self.partitions.append(Partition(self, self.vdev, pname))
pcount += 1
fd.close()
verbose_print("vgchange -an '%s'" % (self.vdev))
os.system("vgchange -an '%s' > /dev/null 2>&1" % (self.vdev))
else:
verbose_print("vgchange -ay %s ... failed: -%d" % (self.vdev, ret))
return pcount
def scanpartitions(self):
"""Scan device for partitions (kpartx -l) and set up data structures,
Returns number of partitions found."""
self.loopsetup()
# TODO: We could use fdisk -l instead and look at the type of
# partitions; this way we could also detect LVM and support it.
verbose_print("kpartx %s -l '%s'" % (kpartx_args, self.physdev()))
fd = os.popen("kpartx %s -l '%s'" % (kpartx_args, self.physdev()))
pcount = 0
for line in fd.readlines():
line = line.strip()
verbose_print("kpartx -l: %s" % (line,))
(pname, params) = line.split(' : ')
pname = pname.strip()
pno = int(traildigits(pname))
#if pname.rfind('/') != -1:
# pname = pname[pname.rfind('/')+1:]
#pname = self.pdev[:self.pdev.rfind('/')] + '/' + pname
pname = "/dev/mapper/" + pname
verbose_print("Found partition: vdev %s, pdev %s" % ('%s%i' %
(self.vdev, pno), pname))
self.partitions.append(Partition(self, '%s%i' % (self.vdev, pno),
pname))
pcount += 1
fd.close()
# Try lvm
if not pcount:
pcount = self.scanlvmpartitions()
# Add self to partition table
if not pcount:
if self.ldev:
ref = self
else:
ref = None
self.partitions.append(Partition(ref, self.vdev, self.pdev))
return pcount
def activatepartitions(self):
"Set up loop mapping and device-mapper mappings"
verbose_print("activatepartitions")
if not self.mapped:
self.loopsetup()
if self.pcount:
verbose_print("kpartx %s -a '%s'" % (kpartx_args,
self.physdev()))
fd = os.popen("kpartx %s -a '%s'" % (kpartx_args,
self.physdev()))
fd.close()
if self.pcount and self.lvm:
verbose_print("vgchange -ay '%s'" % (self.vdev))
ret = os.system("vgchange -ay '%s' > /dev/null 2>&1" %
(self.vdev)) >> 8
if not ret:
verbose_print("lvchange -ay '%s'" % (self.vdev))
os.system("lvchange -ay '%s' > /dev/null 2>&1" %
(self.vdev))
self.mapped += 1
def partitionsdeactivated(self):
"Return True if partition mappings have been removed, False otherwise"
for part in self.partitions:
if os.access(part.pdev, os.F_OK):
return False
return True
def deactivatepartitions(self):
"""Remove device-mapper mappings and loop mapping.
Will never raise.
"""
verbose_print("deactivatepartitions")
if not self.mapped:
return
self.mapped -= 1
if not self.mapped:
if self.pcount:
retries = 10
while retries and not self.partitionsdeactivated():
verbose_print("kpartx %s -d '%s'" % (kpartx_args,
self.physdev()))
os.system("kpartx %s -d '%s'" % (kpartx_args,
self.physdev()))
time.sleep(0.1)
retries -= 1
if retries == 0:
error("unable to remove partition mappings with kpartx -d")
if self.pcount and self.lvm:
verbose_print("lvchange -an '%s'" % (self.vdev))
ret = os.system("lvchange -an '%s' > /dev/null 2>&1" %
(self.vdev)) >> 8
if ret:
time.sleep(0.3)
os.system("lvchange -an '/dev/%s' > /dev/null 2>&1" %
(self.vdev))
verbose_print("vgchange -an '%s'" % (self.vdev))
ret = os.system("vgchange -an '%s' > /dev/null 2>&1" %
(self.vdev)) >> 8
if ret:
time.sleep(0.3)
os.system("vgchange -an '%s' > /dev/null 2>&1" %
(self.vdev))
self.loopclean()
def __del__(self):
"d'tor: clean up"
self.deactivatepartitions()
self.loopclean()
def __repr__(self):
"string representation for debugging"
strg = "[" + self.vdev + "," + self.pdev + ","
if self.ldev:
strg += self.ldev
strg += "," + str(self.pcount) + ",mapped %ix]" % self.mapped
return strg
class Partition:
"""Class representing a domU filesystem (partition) that can be
mounted in dom0"""
def __init__(self, whole = None, vdev = None, pdev = None):
"c'tor: setup"
self.wholedisk = whole
self.vdev = vdev
self.pdev = pdev
self.mountpoint = None
def __del__(self):
"d'tor: cleanup"
if self.mountpoint:
self.umount()
# Not needed: Refcounting will take care of it.
#if self.wholedisk:
# self.wholedisk.deactivatepartitions()
def __repr__(self):
"string representation for debugging"
strg = "[" + self.vdev + "," + self.pdev + ","
if self.mountpoint:
strg += "mounted on " + self.mountpoint + ","
else:
strg += "not mounted,"
if self.wholedisk:
return strg + self.wholedisk.__repr__() + "]"
else:
return strg + "]"
def mount(self, fstype = None, options = "ro"):
"mount filesystem, sets self.mountpoint"
if self.mountpoint:
return
if self.wholedisk:
self.wholedisk.activatepartitions()
mtpt = tempfile.mkdtemp(prefix = "%s." % self.vdev, dir = tmpdir)
mopts = ""
if fstype:
mopts += " -t %s" % fstype
if options:
mopts += " -o %s" % options
verbose_print("mount %s '%s' %s" % (mopts, self.pdev, mtpt))
fd = os.popen("mount %s '%s' %s" % (mopts, self.pdev, mtpt))
err = fd.close()
if err:
try:
os.rmdir(mtpt)
except:
pass
raise RuntimeError("Error %i from mount %s '%s' on %s" % \
(err, mopts, self.pdev, mtpt))
self.mountpoint = mtpt
def umount(self):
"""umount filesystem at self.mountpoint"""
if not self.mountpoint:
return
verbose_print("umount %s" % self.mountpoint)
fd = os.popen("umount %s" % self.mountpoint)
err = fd.close()
try:
os.rmdir(self.mountpoint)
except:
pass
if err:
error("Error %i from umount %s" % (err, self.mountpoint))
else:
self.mountpoint = None
if self.wholedisk:
self.wholedisk.deactivatepartitions()
def parseEntry(entry):
"disects bootentry and returns vdev, kernel, ramdisk"
def bad():
raise RuntimeError, "Malformed --entry"
fsspl = entry.split(':')
if len(fsspl) != 2:
bad()
vdev = fsspl[0]
entry = fsspl[1]
enspl = entry.split(',')
if len(enspl) not in (1, 2):
bad()
# Prepend '/' if missing
kernel = enspl[0]
if kernel == '':
bad()
if kernel[0] != '/':
kernel = '/' + kernel
ramdisk = None
if len(enspl) > 1:
ramdisk = enspl[1]
if ramdisk != '' and ramdisk[0] != '/':
ramdisk = '/' + ramdisk
return vdev, kernel, ramdisk
def copyFile(src, dst):
"Wrapper for shutil.filecopy"
import shutil
verbose_print("cp %s %s" % (src, dst))
stat = os.stat(src)
if stat.st_size > 16*1024*1024:
raise RuntimeError("Too large file %s (%s larger than 16MB)" \
% (src, stat.st_size))
try:
shutil.copyfile(src, dst)
except:
os.unlink(dst)
raise()
def copyKernelAndRamdisk(disk, vdev, kernel, ramdisk):
"""Finds vdev in list of partitions, mounts the partition, copies
kernel [and ramdisk] off to dom0 files, umounts the parition again,
and returns sxpr pointing to these copies."""
verbose_print("copyKernelAndRamdisk(%s, %s, %s, %s)" % (disk, vdev, kernel,
ramdisk))
if dryrun:
return "linux (kernel kernel.dummy) (ramdisk ramdisk.dummy)"
part = disk.findPart(vdev)
if not part:
raise RuntimeError("Partition '%s' does not exist" % vdev)
part.mount()
try:
(fd, knm) = tempfile.mkstemp(prefix = "kernel.", dir = tmpdir)
os.close(fd)
copyFile(part.mountpoint + kernel, knm)
except:
os.unlink(knm)
part.umount()
raise
if not quiet:
print "Copy kernel %s from %s to %s for booting" % (kernel, vdev, knm)
sxpr = "linux (kernel %s)" % knm
if ramdisk:
try:
(fd, inm) = tempfile.mkstemp(prefix = "ramdisk.", dir = tmpdir)
os.close(fd)
copyFile(part.mountpoint + ramdisk, inm)
except:
os.unlink(knm)
os.unlink(inm)
part.umount()
raise
sxpr += "(ramdisk %s)" % inm
part.umount()
return sxpr
def main(argv):
"Main routine: Parses options etc."
global quiet, dryrun, verbose, tmpdir, in_args
def usage():
"Help output (usage info)"
global verbose, quiet, dryrun
print >> sys.stderr, "domUloader usage: domUloader [--output=fd]
[--quiet] [--dryrun] [--verbose]\n" +\
"[--args] [--help] --entry=dev:kernel[,ramdisk]
physdisk [virtdisk]\n" +\
"\n" +\
"dev format: hd[a-p][0-9]*, xvd[a-p][0-9]*,
LVM-vgname-lvname\n"
print >> sys.stderr, __doc__
try:
(optlist, args) = getopt.gnu_getopt(argv, 'qvh', \
('entry=', 'output=', 'tmpdir=', 'args=', 'kernel=', 'ramdisk=',
'help', 'quiet', 'dryrun', 'verbose'))
except:
usage()
sys.exit(1)
entry = None
output = None
pdisk = None
vdisk = None
for (opt, oarg) in optlist:
if opt in ('-h', '--help'):
usage()
sys.exit(1)
elif opt in ('-q', '--quiet'):
quiet = True
elif opt in ('-n', '--dryrun'):
dryrun = True
elif opt in ('-v', '--verbose'):
verbose = True
elif opt == '--output':
output = oarg
elif opt == '--entry':
entry = oarg
elif opt == '--tmpdir':
tmpdir = oarg
elif opt == '--args':
in_args = oarg
verbose_print(str(argv))
if args:
if len(args) == 2:
pdisk = args[1]
elif len(args) == 3:
pdisk = args[1]
vdisk = args[2]
if not entry or not pdisk:
usage()
sys.exit(1)
if output is None or output == "-":
fd = sys.stdout.fileno()
else:
fd = os.open(output, os.O_WRONLY)
if not os.access(tmpdir, os.X_OK):
os.mkdir(tmpdir)
os.chmod(tmpdir, 0750)
vdev, kernel, ramdisk = parseEntry(entry)
if vdev[:vdev.find('-')] == "LVM":
vdev = vdev.split('-')[1]
if not vdisk:
vdisk = getWholedisk(vdev)
verbose_print("vdisk not specified; guessing '%s' based on '%s'" %
(vdisk, vdev))
if not vdev.startswith(vdisk):
error("Virtual disk '%s' does not match entry '%s'" % (vdisk, entry))
sys.exit(1)
disk = Wholedisk(vdisk, pdisk)
r = 0
try:
sxpr = copyKernelAndRamdisk(disk, vdev, kernel, ramdisk)
if in_args:
sxpr += "(args '%s')" % in_args
os.write(fd, sxpr)
except Exception, e:
error(str(e))
r = 1
for part in disk.partitions:
part.wholedisk = None
del disk
return r
# Call main if called (and not imported)
if __name__ == "__main__":
r = 1
try:
r = main(sys.argv)
except Exception, e:
error(str(e))
sys.exit(r)
++++++ domu-usb-controller.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendConfig.py
@@ -1874,7 +1874,14 @@ class XendConfig(dict):
ports = sxp.child(dev_sxp, 'port')
for port in ports[1:]:
try:
- num, bus = port
+ # When ['port' ['1','']] is saved into sxp file, it will
become (port (1 ))
+ # If using this sxp file, here variable "port" will be port=1,
+ # we should process it, otherwise, it will report error.
+ if len(port) == 1:
+ num = port[0]
+ bus = ""
+ else:
+ num, bus = port
dev_config['port-%i' % int(num)] = str(bus)
except TypeError:
pass
++++++ etc_pam.d_xen-api ++++++
#%PAM-1.0
auth required pam_listfile.so onerr=fail item=user \
sense=allow file=/etc/xen/xenapiusers
auth include common-auth
account include common-account
password include common-password
session include common-session
++++++ hibernate.patch ++++++
Index: xen-4.1.2-testing/tools/firmware/hvmloader/acpi/dsdt.asl
===================================================================
--- xen-4.1.2-testing.orig/tools/firmware/hvmloader/acpi/dsdt.asl
+++ xen-4.1.2-testing/tools/firmware/hvmloader/acpi/dsdt.asl
@@ -30,21 +30,9 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2,
/*
* S3 (suspend-to-ram), S4 (suspend-to-disc) and S5 (power-off) type codes:
* must match piix4 emulation.
+ * Turn off support for s3 and s4 sleep states to deal with SVVP tests.
+ * This is what MSFT does on HyperV.
*/
- Name (\_S3, Package (0x04)
- {
- 0x01, /* PM1a_CNT.SLP_TYP */
- 0x01, /* PM1b_CNT.SLP_TYP */
- 0x0, /* reserved */
- 0x0 /* reserved */
- })
- Name (\_S4, Package (0x04)
- {
- 0x00, /* PM1a_CNT.SLP_TYP */
- 0x00, /* PM1b_CNT.SLP_TYP */
- 0x00, /* reserved */
- 0x00 /* reserved */
- })
Name (\_S5, Package (0x04)
{
0x00, /* PM1a_CNT.SLP_TYP */
++++++ hotplug.losetup.patch ++++++
Improve busy loop device detection after changeset 22773:02c0af2bf280
The intention is not to find the file to be mounted in the losetup -a
output. What matters are existing mounted files with the same dev:inode
as the new file. So the fix is to apply variable expansion which
happens only without double quotes. Otherwise $dev will contain
newlines for hardlinked files, as mentioned in the commit message from
the changeset above.
losetup -a does also truncate long filenames to 62 chars due to ioctl
limitations. This part is fixed with 2.6.37 where the filename can be
obtained from sysfs. As a result very long filenames will be missed.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
tools/hotplug/Linux/block | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/hotplug/Linux/block
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/block
+++ xen-4.1.2-testing/tools/hotplug/Linux/block
@@ -280,8 +280,8 @@ mount it read-write in a guest domain."
fi
shared_list=$(losetup -a |
- sed -n -e
"s@^\([^:]\+\)\(:[[:blank:]]\[${dev}\]:${inode}[[:blank:]](${file})\)@\1@p" )
- for dev in "$shared_list"
+ sed -n -e
"s@^\([^:]\+\)\(:[[:blank:]]\[0*${dev}\]:${inode}[[:blank:]](.*)\)@\1@p" )
+ for dev in $shared_list
do
if [ -n "$dev" ]
then
++++++ hv_extid_compatibility.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendConfig.py
@@ -159,6 +159,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
'nographic': int,
'nomigrate': int,
'pae' : int,
+ 'extid': int,
'rtc_timeoffset': int,
'parallel': str,
'serial': str,
@@ -517,6 +518,8 @@ class XendConfig(dict):
if self.is_hvm():
if 'timer_mode' not in self['platform']:
self['platform']['timer_mode'] = 1
+ if 'extid' in self['platform'] and int(self['platform']['extid'])
== 1:
+ self['platform']['viridian'] = 1
if 'viridian' not in self['platform']:
self['platform']['viridian'] = 0
if 'rtc_timeoffset' not in self['platform']:
Index: xen-4.1.2-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.1.2-testing/tools/python/xen/xend/image.py
@@ -839,6 +839,7 @@ class HVMImageHandler(ImageHandler):
self.apic = int(vmConfig['platform'].get('apic', 0))
self.acpi = int(vmConfig['platform'].get('acpi', 0))
+ self.extid = int(vmConfig['platform'].get('extid', 0))
self.guest_os_type = vmConfig['platform'].get('guest_os_type')
self.memory_sharing = int(vmConfig['memory_sharing'])
try:
Index: xen-4.1.2-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xm/create.py
+++ xen-4.1.2-testing/tools/python/xen/xm/create.py
@@ -242,6 +242,10 @@ gopts.var('viridian', val='VIRIDIAN',
use="""Expose Viridian interface to x86 HVM guest?
(Default is 0).""")
+gopts.var('extid', val='EXTID',
+ fn=set_int, default=0,
+ use="Specify extention ID for a HVM domain.")
+
gopts.var('acpi', val='ACPI',
fn=set_int, default=1,
use="Disable or enable ACPI of HVM domain.")
@@ -1106,7 +1110,7 @@ def configure_hvm(config_image, vals):
'timer_mode',
'usb', 'usbdevice',
'vcpus', 'vnc', 'vncconsole', 'vncdisplay', 'vnclisten',
- 'vncunused', 'viridian', 'vpt_align',
+ 'vncunused', 'vpt_align',
'watchdog', 'watchdog_action',
'xauthority', 'xen_extended_power_mgmt', 'xen_platform_pci',
'memory_sharing' ]
@@ -1116,6 +1120,10 @@ def configure_hvm(config_image, vals):
config_image.append([a, vals.__dict__[a]])
if vals.vncpasswd is not None:
config_image.append(['vncpasswd', vals.vncpasswd])
+ if vals.extid and vals.extid == 1:
+ config_image.append(['viridian', vals.extid])
+ elif vals.viridian:
+ config_image.append(['viridian', vals.viridian])
def make_config(vals):
++++++ init.pciback ++++++
#!/bin/bash
#
# Copyright (c) 2001 SuSE GmbH Nuernberg, Germany. All rights reserved.
#
# /etc/init.d/pciback
#
### BEGIN INIT INFO
# Provides: pciback
# Required-Start: $syslog $network
# Should-Start: $null
# Required-Stop: $syslog $network
# Should-Stop: $null
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Description: bind PCI devices to pciback
### END INIT INFO
. /etc/rc.status
. /etc/sysconfig/pciback
rc_reset
load_pciback() {
if ! lsmod | grep -qi "pciback"
then
echo "Loading pciback ..."
modprobe pciback
fi
}
unload_pciback() {
if lsmod | grep -qi "pciback"
then
echo "Unloading pciback ..."
modprobe -r pciback
fi
}
bind_dev_to_pciback() {
for DEVICE in ${XEN_PCI_HIDE_LIST}
do
local DRV=`echo ${DEVICE} | /usr/bin/cut -d "," -f 1`
local PCIID=`echo ${DEVICE} | /usr/bin/cut -d "," -f 2`
if ! ls /sys/bus/pci/drivers/pciback/${PCIID} > /dev/null 2>&1
then
echo "Binding ${PCIID} ..."
if ls /sys/bus/pci/drivers/${DRV}/${PCIID} > /dev/null 2>&1
then
echo -n ${PCIID} > /sys/bus/pci/drivers/${DRV}/unbind
fi
echo -n ${PCIID} > /sys/bus/pci/drivers/pciback/new_slot
echo -n ${PCIID} > /sys/bus/pci/drivers/pciback/bind
fi
done
}
unbind_dev_from_pciback() {
for DEVICE in ${XEN_PCI_HIDE_LIST}
do
local DRV=`echo ${DEVICE} | /usr/bin/cut -d "," -f 1`
local PCIID=`echo ${DEVICE} | /usr/bin/cut -d "," -f 2`
if ls /sys/bus/pci/drivers/pciback/${PCIID} > /dev/null
then
echo "Unbinding ${PCIID} ..."
echo -n ${PCIID} > /sys/bus/pci/drivers/pciback/unbind
fi
done
}
test "uname -r" | grep xen && exit 0
case $1 in
start)
echo "Starting pciback ..."
echo
load_pciback
bind_dev_to_pciback
rc_status -v -r
;;
stop)
echo "Stopping pciback ..."
echo
unbind_dev_from_pciback
unload_pciback
rc_status -v
;;
reload|restart)
echo "Stopping pciback ..."
echo
unbind_dev_from_pciback
unload_pciback
echo "Starting pciback ..."
echo
load_pciback
bind_dev_to_pciback
;;
status)
if lsmod | grep -qi pciback
then
echo
echo "pciback: loaded"
echo
echo "Currently bound devices ..."
echo "-----------------------------"
ls /sys/bus/pci/drivers/pciback | grep ^0000
echo
else
echo "pciback: not loaded"
fi
;;
*)
echo "Usage: $0 [start|stop|restart|reload|status]"
exit 1
;;
esac
++++++ init.xen_loop ++++++
# Increase the number of loopback devices available for vm creation
options loop max_loop=64
++++++ init.xend ++++++
#!/bin/bash
#
# xend Starts and stops the Xen management daemon
#
# chkconfig: 35 98 01
# description: Starts and stops the Xen management daemon
#
### BEGIN INIT INFO
# Provides: xend
# Required-Start: $syslog $remote_fs
# Should-Start: iscsi $time
# Required-Stop: $syslog $remote_fs
# Should-Stop: iscsi $time
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Starts and stops the Xen management daemon
# Description: Starts and stops the Xen management daemon. xend is needed
# to create and manage VMs on Xen.
### END INIT INFO
. /etc/rc.status
rc_reset
XEND=`pidof -x /usr/sbin/xend`
await_daemons_up()
{
i=1
rets=10
xend status
while [ $? -ne 0 -a $i -lt $rets ]; do
sleep 1
echo -n .
i=$(($i + 1))
xend status
done
}
xend_abort()
{
echo -n "xend "
rc_failed $1
rc_status -v
rc_exit
}
cleanup()
{
rm -f /var/lib/xen/tmp/* 2>/dev/null
rm -f /var/lib/xen/xenbl* 2>/dev/null
}
check()
{
if [ "$1" == status ]; then
if [ ! -e /proc/xen/capabilities ]; then
xend_abort 3
fi
else
if [ `id -u` != 0 ]; then
xend_abort 4
fi
if [ ! -e /proc/xen/capabilities ] ||
! grep control_d /proc/xen/capabilities >/dev/null 2>&1;
then
if [ "$1" == stop ] ||
[ "$1" == try-restart ]; then
xend_abort 0
else
xend_abort 6
fi
fi
fi
}
case "$1" in
start)
check $1
echo -n "Starting xend "
if [ ! -z "$XEND" ]; then
echo -n "(already running pid $XEND) "
else
cleanup
fi
xend start
await_daemons_up
;;
stop)
check $1
echo -n "Stopping xend "
if [ -z "$XEND" ]; then
echo -n "(not running) "
xend stop
rc_reset
else
echo -n "(pid $XEND) "
xend stop
cleanup
rc_reset
fi
;;
status)
check $1
echo -n "Checking status of xend "
if [ ! -z "$XEND" ]; then
echo -n "(pid $XEND) "
fi
checkproc /usr/sbin/xend
;;
restart|reload)
check $1
echo -n "Restarting xend "
if [ -z "$XEND" ]; then
echo -n "(not running) "
else
echo -n "(old pid $XEND) "
fi
xend restart
await_daemons_up
;;
try-restart)
check $1
$0 status
if [ $? = 0 ]; then
$0 restart
else
rc_reset
fi
;;
*)
echo "Usage: $0 {start|stop|restart|try-restart|reload|status}"
rc_failed 2
rc_exit
esac
rc_status -v
rc_exit
++++++ init.xendomains ++++++
#!/bin/bash
#
# xendomains Starts and stops Xen VMs
#
# chkconfig: 35 99 00
# description: Starts and stops Xen VMs
#
### BEGIN INIT INFO
# Provides: xendomains
# Required-Start: $syslog $remote_fs xend
# Should-Start: iscsi o2cb ocfs2
# Required-Stop: $syslog $remote_fs xend
# Should-Stop: iscsi
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Starts and stops Xen VMs
# Description: Starts and stops Xen VMs automatically when the
# host starts and stops.
### END INIT INFO
. /etc/rc.status
rc_reset
LOCKFILE=/var/lock/subsys/xendomains
XENDOM_CONFIG=/etc/sysconfig/xendomains
RETCODE_FILE=/tmp/xendomains.rc.$$
xm_cmd=echo
. "$XENDOM_CONFIG"
shopt -s dotglob nullglob
smart_term=1
if [ -z "$esc" ]; then
smart_term=0
rc_timer_on()
{
(trap "exit 0" TERM; sleep $1) & _rc_timer_pid=$!
}
rc_timer_off()
{
if [ -n "$_rc_timer_pid" ]; then
kill -TERM $_rc_timer_pid > /dev/null 2>&1
fi
unset _rc_timer_pid
}
fi
xendomains_abort()
{
echo -n "xendomains "
rc_failed $1
rc_status -v
rc_exit
}
check()
{
XEND=`pidof -x /usr/sbin/xend`
if [ -z "$XEND" ]; then
xm_cmd="xl -f"
else
xm_cmd="xm"
fi
if [ "$1" = status ]; then
if [ ! -e /proc/xen/capabilities ] || [ ! -r "$XENDOM_CONFIG" ]
|| [ -z "$XEND" ]; then
xendomains_abort 3
fi
else
if [ `id -u` != 0 ]; then
xendomains_abort 4
fi
if [ ! -e /proc/xen/capabilities ] || [ -z "$XEND" ] ||
! grep control_d /proc/xen/capabilities >/dev/null 2>&1;
then
if [ "$1" = stop ] ||
[ "$1" = restart ]; then
xendomains_abort 0
else
xendomains_abort 6
fi
fi
if [ ! -r "$XENDOM_CONFIG" ]; then
xendomains_abort 6
fi
fi
}
dir_contains_something()
{
[ -d "$1" ] || return 1
local dirfiles=( "$1"/* )
[ ${#dirfiles[@]} != 0 ]
}
get_name_from_cfg()
{
if grep -q "^name" "$1";then
NM=`grep '^name[ ]*=' "$1" | sed -e 's/^name[ ]*=[
]*['\''"]\([^'\''"]*\)['\''"].*$/\1/'`
elif grep -q "(name " "$1";then
NM=`grep '(name ' "$1" | sed -e 's/^ *//' | cut -d " " -f 2 |
sed -e 's/)//'`
fi
}
running_auto_names()
{
unset AUTONAMES[@]
if ! dir_contains_something "$XENDOMAINS_AUTO"; then
return
fi
for dom in "$XENDOMAINS_AUTO"/*; do
get_name_from_cfg "$dom"
AUTONAMES+=("$NM")
done
}
parseln()
{
name=${1:0:$((${#1}-36))}
name=${name%% *}
rest="${1: -36}"
id=${rest:0:4}
id=`echo $id`
mem=${rest:4:6}
mem=`echo $mem`
vcpu=${rest:10:6}
vcpu=`echo $vcpu`
state=${rest:16:11}
state=`echo $state`
tm=${rest:27}
tm=`echo $tm`
}
xm_list()
{
TERM=vt100 ${xm_cmd} list | grep -v '^Name *ID'
}
is_cfg_running()
{
get_name_from_cfg "$1"
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
if [ "$name" = "$NM" ]; then
[ -z "$state" ] && return 1
return 0
fi
done < <(xm_list)
return 1
}
start()
{
if [ -f "$LOCKFILE" ]; then
echo -n "xendomains already running (lockfile exists)"
rc_reset
rc_status -v
return 0
fi
local printed=0
if [ "$XENDOMAINS_RESTORE" = "true" ] &&
dir_contains_something "$XENDOMAINS_SAVE"; then
mkdir -p $(dirname "$LOCKFILE")
touch "$LOCKFILE"
echo "Restoring saved Xen domains"
printed=1
for dom in "$XENDOMAINS_SAVE"/*; do
echo -n " ${dom##*/}: "
${xm_cmd} restore "$dom" >/dev/null 2>&1
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
rm -f "$dom"
fi
rc_status -v
done
fi
if dir_contains_something "$XENDOMAINS_AUTO"; then
touch "$LOCKFILE"
echo "Starting auto Xen domains"
printed=1
for dom in "$XENDOMAINS_AUTO"/*; do
echo -n " ${dom##*/}: "
if is_cfg_running "$dom"; then
rc_status -s
else
if grep -q "^name" "$dom";then
${xm_cmd} create --quiet --defconfig
"$dom"
elif grep -q "(name .*" "$dom";then
${xm_cmd} create --quiet --config "$dom"
fi
if [ $? -ne 0 ]; then
rc_failed
else
usleep $XENDOMAINS_CREATE_USLEEP
rc_reset
fi
rc_status -v
fi
done
fi
if [ $printed -eq 0 ]; then
echo -n "Starting xendomains"
rc_failed 6 # not configured
rc_status -v
fi
}
is_zombie_state()
{
[ "$1" = "-b---d" ] || [ "$1" = "-----d" ]
}
any_non_zombies()
{
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
[ -z "$state" ] && continue
is_zombie_state "$state" || return 0
done < <(xm_list)
return 1
}
migrate_with_watchdog()
{
(${xm_cmd} migrate "$@" ; echo $? > "$RETCODE_FILE") >/dev/null 2>&1 &
watchdog_xm $!
}
save_with_watchdog()
{
(${xm_cmd} save "$@" ; echo $? > "$RETCODE_FILE") >/dev/null 2>&1 &
watchdog_xm $!
}
shutdown_with_watchdog()
{
(${xm_cmd} shutdown -w "$@" ; echo $? > "$RETCODE_FILE") >/dev/null
2>&1 &
watchdog_xm $!
}
get_return_code()
{
local RC=127
[ -r "$RETCODE_FILE" ] && RC=`head -c10 "$RETCODE_FILE"`
rm -f "$RETCODE_FILE"
return $RC
}
# $1: The PID to wait on.
watchdog_xm()
{
local col=$((COLUMNS-11))
if [ -z "$XENDOMAINS_STOP_MAXWAIT" ] || [ "$XENDOMAINS_STOP_MAXWAIT" =
"0" ]; then
wait $1 >/dev/null 2>&1
get_return_code
return
fi
rc_timer_on $XENDOMAINS_STOP_MAXWAIT $col
while true; do
# Prefer "jobs" over "ps": faster and no false positives
pid=`jobs -l | grep " $1 Running"`
if [ -z "$pid" ]; then
break
fi
pid=`jobs -l | grep " $_rc_timer_pid Running"`
if [ -z "$pid" ]; then
disown $1 # To avoid the "Terminated..." message
kill $1 >/dev/null 2>&1
fi
sleep 1
done
rc_timer_off
if [ $smart_term -ne 0 ]; then
echo -en "\015${esc}[${col}C "
fi
get_return_code
}
stop()
{
echo "Shutting down Xen domains"
if [ "$XENDOMAINS_AUTO_ONLY" = "true" ]; then
running_auto_names
fi
local printed=0
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
[ -z "$state" ] && continue
printed=1
if [ "$XENDOMAINS_AUTO_ONLY" = "true" ]; then
is_auto_domain=0
for n in "${AUTONAMES[@]}"; do
if [ "$name" = "$n" ]; then
is_auto_domain=1
break
fi
done
if [ $is_auto_domain -eq 0 ]; then
echo -n " $name: "
rc_status -s
continue
fi
fi
if [ -n "$XENDOMAINS_SYSRQ" ]; then
for sysrq in $XENDOMAINS_SYSRQ; do
echo -n " $name: "
echo -n "sending sysrq '$sysrq'... "
${xm_cmd} sysrq $id $sysrq
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
fi
rc_status -v
# usleep just ignores empty arg
usleep $XENDOMAINS_USLEEP
done
fi
if is_zombie_state "$state"; then
echo -n " $name: "
echo -n "destroying zombie... "
${xm_cmd} destroy $id
rc_reset
rc_status -v
continue
fi
if [ -n "$XENDOMAINS_MIGRATE" ]; then
echo -n " $name: "
echo -n "migrating... "
migrate_with_watchdog $id "$XENDOMAINS_MIGRATE"
if [ $? -ne 0 ]; then
rc_failed
rc_status -v
else
rc_reset
rc_status -v
continue
fi
fi
if [ -n "$XENDOMAINS_SAVE" ]; then
echo -n " $name: "
echo -n "saving... "
save_with_watchdog $id "$XENDOMAINS_SAVE/$name"
if [ $? -ne 0 ]; then
rm -f "$XENDOMAINS_SAVE/$name"
rc_failed
rc_status -v
else
rc_reset
rc_status -v
continue
fi
fi
if [ -n "$XENDOMAINS_SHUTDOWN" ]; then
echo -n " $name: "
echo -n "shutting down... "
shutdown_with_watchdog $id $XENDOMAINS_SHUTDOWN
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
fi
rc_status -v
fi
done < <(xm_list)
if [ -n "$XENDOMAINS_SHUTDOWN_ALL" ] && any_non_zombies ; then
echo -n " others: shutting down... "
shutdown_with_watchdog $XENDOMAINS_SHUTDOWN_ALL
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
fi
rc_status -v
fi
if [ $printed -eq 0 ]; then
echo -e "${rc_done_up}"
fi
# Unconditionally delete lock file
rm -f "$LOCKFILE"
}
check_domain_up()
{
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
if [ "$name" = "$1" ]; then
[ -z "$state" ] && return 1
return 0
fi
done < <(xm_list)
return 1
}
check_all_domains_up()
{
any_auto=0
any_save=0
dir_contains_something "$XENDOMAINS_AUTO" && any_auto=1
dir_contains_something "$XENDOMAINS_SAVE" && any_save=1
if [ $any_auto -eq 0 ] && [ $any_save -eq 0 ]; then
rc_reset
rc_status -v
return
fi
echo
if [ $any_auto -ne 0 ]; then
for nm in "$XENDOMAINS_AUTO"/*; do
get_name_from_cfg "$nm"
echo -n " $nm: "
if check_domain_up "$NM"; then
rc_reset
else
rc_failed 2
fi
rc_status -v
done
fi
if [ $any_save -ne 0 ]; then
for nm in "$XENDOMAINS_SAVE"/*; do
echo -n " $nm: "
rc_failed 3
here is the log from the commit of package xen for openSUSE:12.1:Update:Test
checked in at 2012-02-08 18:18:42
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:12.1:Update:Test/xen (Old)
and /work/SRC/openSUSE:12.1:Update:Test/.xen.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "xen", Maintainer is "CARNOLD@xxxxxxxx"
Changes:
--------
New Changes file:
--- /dev/null 2010-08-26 16:28:41.000000000 +0200
+++ /work/SRC/openSUSE:12.1:Update:Test/.xen.new/xen.changes 2012-02-08
18:18:48.000000000 +0100
@@ -0,0 +1,6025 @@
+-------------------------------------------------------------------
+Mon Jan 23 13:41:42 MST 2012 - carnold@xxxxxxxxxx
+
+- The xen kmp packages fail on the 09-check-packaged-twice script.
+ Rename xen_pvdrivers.conf to xen_pvdrivers-<kernel flavor>.conf
+
+-------------------------------------------------------------------
+Wed Jan 18 09:42:54 MST 2012 - carnold@xxxxxxxxxx
+
+- bnc#739585 - L3: Xen block-attach fails after repeated attach/detach
+ blktap-close-fifos.patch
+
+-------------------------------------------------------------------
+Fri Jan 13 10:37:53 MST 2012 - jfehlig@xxxxxxxx
+
+- bnc#741159 - Fix default setting of XENSTORED_ROOTDIR in
+ xencommons init script
+ xencommons-xenstored-root.patch
+
+-------------------------------------------------------------------
+Thu Jan 12 06:49:57 MST 2012 - carnold@xxxxxxxxxx
+
+- bnc#740625 - xen: cannot interact with xend after upgrade (SLES)
+- bnc#738694 - xen: cannot interact with xend after upgrade (os12.1)
+- Other README changes included.
+ README.SuSE
+
+-------------------------------------------------------------------
+Tue Jan 10 17:30:20 CET 2012 - ohering@xxxxxxx
+
+- bnc#694863 - kexec fails in xen
+ 24478-libxl_add_feature_flag_to_xenstore_for_XS_RESET_WATCHES.patch
+
+-------------------------------------------------------------------
+Mon Jan 9 16:10:19 CET 2012 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ xenpaging.speedup-page-out.patch
+
+-------------------------------------------------------------------
+Tue Jan 3 08:26:42 MST 2012 - carnold@xxxxxxxxxx
+
+- bnc#735806 - VF doesn't work after hot-plug for many times
+ 24448-x86-pt-irq-leak.patch
+- Upstream patches from Jan
+ 24261-x86-cpuidle-Westmere-EX.patch
+ 24417-amd-erratum-573.patch
+ 24429-mceinj-tool.patch
+ 24447-x86-TXT-INIT-SIPI-delay.patch
+ ioemu-9868-MSI-X.patch
+
+-------------------------------------------------------------------
+Mon Jan 2 10:05:57 CET 2012 - ohering@xxxxxxx
+
+- bnc#732884 - remove private runlevel 4 from init scripts
+ xen.no-default-runlevel-4.patch
+
+-------------------------------------------------------------------
+Mon Dec 19 15:22:13 MST 2011 - carnold@xxxxxxxxxx
+
+- bnc#727515 - Fragmented packets hang network boot of HVM guest
+ ipxe-gcc45-warnings.patch
+ ipxe-ipv4-fragment.patch
+ ipxe-enable-nics.patch
+
+-------------------------------------------------------------------
+Mon Dec 19 12:43:11 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ update xenpaging.autostart.patch, make changes with mem-swap-target
+ permanent
+ update xenpaging.doc.patch, mention issues with live migration
+
+-------------------------------------------------------------------
+Thu Dec 15 17:53:51 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ add xenpaging.evict_mmap_readonly.patch
+ update xenpaging.error-handling.patch, reduce debug output
+
+-------------------------------------------------------------------
+Thu Dec 15 08:35:27 MST 2011 - carnold@xxxxxxxxxx
+
+- bnc#736824 - Microcode patches for AMD's 15h processors panic the
+ system
+ 24189-x86-p2m-pod-locking.patch
+ 24412-x86-AMD-errata-model-shift.patch
+ 24411-x86-ucode-AMD-Fam15.patch
+
+-------------------------------------------------------------------
+Wed Dec 14 10:08:24 MST 2011 - carnold@xxxxxxxxxx
+
+- bnc#711219 - SR-IOV VF doesn't work in SLES11 sp2 guest
+ 24357-firmware-no-_PS0-_PS3.patch
+- Upstream patches from Jan
+ 24153-x86-emul-feature-checks.patch
+ 24275-x86-emul-lzcnt.patch
+ 24277-x86-dom0-features.patch
+ 24278-x86-dom0-no-PCID.patch
+ 24282-x86-log-dirty-bitmap-leak.patch
+ 24359-x86-domU-features.patch
+ 24360-x86-pv-domU-no-PCID.patch
+ 24389-amd-fam10-gart-tlb-walk-err.patch
+ 24391-x86-pcpu-version.patch
+
+-------------------------------------------------------------------
+Thu Dec 8 14:19:49 CET 2011 - ohering@xxxxxxx
+
+- bnc#729208 - xenpaging=-1 doesn't work
+ xenpaging.doc.patch
+
+-------------------------------------------------------------------
+Thu Dec 8 08:41:36 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ readd xenpaging.qemu.flush-cache.patch
+
+-------------------------------------------------------------------
+Wed Dec 7 11:01:43 MST 2011 - jfehlig@xxxxxxxx
+
+- bnc#732782 - L3: xm create hangs when maxmen value is enclosed
+ in "quotes"
+ xm-create-maxmem.patch
+
+-------------------------------------------------------------------
+Wed Dec 7 10:44:06 MST 2011 - carnold@xxxxxxxxxx
+
+- Upstream patches / changes from Jan
+ Added 24358-kexec-compat-overflow.patch
+ Removed
24341-x86-64-mmcfg_remove___initdata_annotation_overlooked_in_23749e8d1c8f074ba.patch
+ Removed
24345-tools-libxc_Fix_x86_32_build_breakage_in_previous_changeset..patch
+
+-------------------------------------------------------------------
+Wed Dec 7 16:42:44 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ 24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch
+ Use wait queues for paging, improve foreign mappings.
+ xenpaging.versioned-interface.patch
+ xenpaging.mmap-before-nominate.patch
+ xenpaging.p2m_is_paged.patch
+ xenpaging.evict_fail_fast_forward.patch
+ xenpaging.error-handling.patch
+ xenpaging.mem_event-use-wait_queue.patch
+ xenpaging.waitqueue-paging.patch
+ Remove obsolete patch, not needed with wait queue usage
+ xenpaging.HVMCOPY_gfn_paged_out.patch
+
+-------------------------------------------------------------------
+Wed Dec 7 16:23:49 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ Fix incorrect backport, remove double memset, use xzalloc
+ 24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch
+
+-------------------------------------------------------------------
+Wed Dec 7 12:08:31 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ fix typo in nominate, use lock instead of double unlock
+ 23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch
+
+-------------------------------------------------------------------
+Wed Dec 7 11:07:23 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+
24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch
+
24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch
+
24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch
+
+-------------------------------------------------------------------
+Tue Dec 6 11:14:51 MST 2011 - jfehlig@xxxxxxxx
+
+- bnc#734826 - xm rename doesn't work anymore
+ Updated xend-migration-domname-fix.patch
+
+-------------------------------------------------------------------
+Fri Dec 2 20:35:29 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ 24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch
+ 24270-Free_d-mem_event_on_domain_destruction..patch
+
+-------------------------------------------------------------------
+Fri Dec 2 20:25:24 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ 24318-x86-mm_Fix_checks_during_foreign_mapping_of_paged_pages.patch
+
+-------------------------------------------------------------------
+Fri Dec 2 20:21:48 CET 2011 - ohering@xxxxxxx
+
+- fate#310510 - fix xenpaging
+ 23949-constify_vcpu_set_affinitys_second_parameter.patch
+
+-------------------------------------------------------------------
+Fri Dec 2 19:36:33 CET 2011 - ohering@xxxxxxx
++++ 5828 more lines (skipped)
++++ between /dev/null
++++ and /work/SRC/openSUSE:12.1:Update:Test/.xen.new/xen.changes
New:
----
22998-x86-get_page_from_l1e-retcode.patch
22999-x86-mod_l1_entry-retcode.patch
23000-x86-mod_l2_entry-retcode.patch
23050-xentrace_dynamic_tracebuffer_allocation.patch
23074-pfn.h.patch
23091-xentrace_fix_t_info_pages_calculation..patch
23092-xentrace_print_calculated_numbers_in_calculate_tbuf_size.patch
23093-xentrace_remove_gdprintk_usage_since_they_are_not_in_guest_context.patch
23094-xentrace_update_comments.patch
23095-xentrace_use_consistent_printk_prefix.patch
23096-x86-hpet-no-cpumask_lock.patch
23099-x86-rwlock-scalability.patch
23103-x86-pirq-guest-eoi-check.patch
23127-vtd-bios-settings.patch
23128-xentrace_correct_formula_to_calculate_t_info_pages.patch
23129-xentrace_remove_unneeded_debug_printk.patch
23173-xentrace_Move_register_cpu_notifier_call_into_boot-time_init..patch
23199-amd-iommu-unmapped-intr-fault.patch
23233-hvm-cr-access.patch
23234-svm-decode-assist-base.patch
23235-svm-decode-assist-crs.patch
23236-svm-decode-assist-invlpg.patch
23238-svm-decode-assist-insn-fetch.patch
23239-xentrace_correct_overflow_check_for_number_of_per-cpu_trace_pages.patch
23246-x86-xsave-enable.patch
23303-cpufreq-misc.patch
23304-amd-oprofile-strings.patch
23305-amd-fam15-xenoprof.patch
23306-amd-fam15-vpmu.patch
23308-xentrace_Move_the_global_variable_t_info_first_offset_into_calculate_tbuf_size.patch
23309-xentrace_Mark_data_size___read_mostly_because_its_only_written_once.patch
23310-xentrace_Remove_unneeded_cast_when_assigning_pointer_value_to_dst.patch
23334-amd-fam12+14-vpmu.patch
23383-libxc-rm-static-vars.patch
23404-xentrace_reduce_trace_buffer_size_to_something_mfn_offset_can_reach.patch
23405-xentrace_fix_type_of_offset_to_avoid_ouf-of-bounds_access.patch
23406-xentrace_update___insert_record_to_copy_the_trace_record_to_individual_mfns.patch
23407-xentrace_allocate_non-contiguous_per-cpu_trace_buffers.patch
23437-amd-fam15-TSC-scaling.patch
23462-libxc-cpu-feature.patch
23506-x86_Disable_set_gpfn_from_mfn_until_m2p_table_is_allocated..patch
23507-xenpaging_update_machine_to_phys_mapping_during_page_deallocation.patch
23508-vmx-proc-based-ctls-probe.patch
23509-x86_32_Fix_build_Define_machine_to_phys_mapping_valid.patch
23511-amd-fam15-no-flush-for-C3.patch
23562-xenpaging_remove_unused_spinlock_in_pager.patch
23571-vtd-fault-verbosity.patch
23574-x86-dom0-compressed-ELF.patch
23575-x86-DMI.patch
23576-x86_show_page_walk_also_for_early_page_faults.patch
23577-tools_merge_several_bitop_functions_into_xc_bitops.h.patch
23578-xenpaging_add_xs_handle_to_struct_xenpaging.patch
23579-xenpaging_drop_xc.c_remove_ASSERT.patch
23580-xenpaging_drop_xc.c_remove_xc_platform_info_t.patch
23581-xenpaging_drop_xc.c_remove_xc_wait_for_event.patch
23582-xenpaging_drop_xc.c_move_xc_mem_paging_flush_ioemu_cache.patch
23583-xenpaging_drop_xc.c_move_xc_wait_for_event_or_timeout.patch
23584-xenpaging_drop_xc.c_remove_xc_files.patch
23585-xenpaging_correct_dropping_of_pages_to_avoid_full_ring_buffer.patch
23586-xenpaging_do_not_bounce_p2mt_back_to_the_hypervisor.patch
23587-xenpaging_remove_srand_call.patch
23588-xenpaging_remove_return_values_from_functions_that_can_not_fail.patch
23589-xenpaging_catch_xc_mem_paging_resume_errors.patch
23590-xenpaging_remove_local_domain_id_variable.patch
23591-xenpaging_move_num_pages_into_xenpaging_struct.patch
23592-xenpaging_start_paging_in_the_middle_of_gfn_range.patch
23593-xenpaging_pass_integer_to_xenpaging_populate_page.patch
23594-xenpaging_add_helper_function_for_unlinking_pagefile.patch
23595-xenpaging_add_watch_thread_to_catch_guest_shutdown.patch
23596-xenpaging_implement_stopping_of_pager_by_sending_SIGTERM-SIGINT.patch
23597-xenpaging_remove_private_mem_event.h.patch
23599-tools_fix_build_after_recent_xenpaging_changes.patch
23610-x86-topology-info.patch
23611-amd-fam15-topology.patch
23613-EFI-headers.patch
23614-x86_64-EFI-boot.patch
23615-x86_64-EFI-runtime.patch
23616-x86_64-EFI-MPS.patch
23643-xentrace_Allow_tracing_to_be_enabled_at_boot.patch
23676-x86_64-image-map-bounds.patch
23719-xentrace_update___trace_var_comment.patch
23723-x86-CMOS-lock.patch
23724-x86-smpboot-x2apic.patch
23726-x86-intel-flexmigration-v2.patch
23735-guest-dom0-cap.patch
23747-mmcfg-base-address.patch
23749-mmcfg-reservation.patch
23752-x86-shared-IRQ-vector-maps.patch
23754-AMD-perdev-vector-map.patch
23771-x86-ioapic-clear-pin.patch
23772-x86-trampoline.patch
23774-x86_64-EFI-EDD.patch
23781-pm-wide-ACPI-ids.patch
23782-x86-ioapic-clear-irr.patch
23783-ACPI-set-_PDC-bits.patch
23795-intel-ich10-quirk.patch
23800-x86_64-guest-addr-range.patch
23804-x86-IPI-counts.patch
23817-mem_event_add_ref_counting_for_free_requestslots.patch
23818-mem_event_use_mem_event_mark_and_pause_in_mem_event_check_ring.patch
23819-make-docs.patch
23827-xenpaging_use_batch_of_pages_during_final_page-in.patch
23841-mem_event_pass_mem_event_domain_pointer_to_mem_event_functions.patch
23842-mem_event_use_different_ringbuffers_for_share_paging_and_access.patch
23853-x86-pv-cpuid-xsave.patch
23874-xenpaging_track_number_of_paged_pages_in_struct_domain.patch
23897-x86-mce-offline-again.patch
23900-xzalloc.patch
23904-xenpaging_use_p2m-get_entry_in_p2m_mem_paging_functions.patch
23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch
23906-xenpaging_remove_confusing_comment_from_p2m_mem_paging_populate.patch
23908-p2m_query-modify_p2mt_with_p2m_lock_held.patch
23925-x86-AMD-ARAT-Fam12.patch
23933-pt-bus2bridge-update.patch
23943-xenpaging_clear_page_content_after_evict.patch
23949-constify_vcpu_set_affinitys_second_parameter.patch
23953-xenpaging_handle_evict_failures.patch
23955-x86-pv-cpuid-xsave.patch
23957-cpufreq-error-paths.patch
23978-xenpaging_check_p2mt_in_p2m_mem_paging_functions.patch
23979-xenpaging_document_p2m_mem_paging_functions.patch
23980-xenpaging_disallow_paging_in_a_PoD_guest.patch
23993-x86-microcode-amd-fix-23871.patch
24104-waitqueue_Double_size_of_x86_shadow_stack..patch
24105-xenpaging_compare_domain_pointer_in_p2m_mem_paging_populate.patch
24106-mem_event_check_capabilities_only_once.patch
24116-x86-continuation-cancel.patch
24123-x86-cpuidle-quiesce.patch
24124-x86-microcode-amd-quiesce.patch
24137-revert-23666.patch
24138-xenpaging_munmap_all_pages_after_page-in.patch
24144-cpufreq-turbo-crash.patch
24148-shadow-pgt-dying-op-performance.patch
24153-x86-emul-feature-checks.patch
24155-x86-ioapic-EOI-after-migration.patch
24156-x86-ioapic-shared-vectors.patch
24157-x86-xstate-init.patch
24168-x86-vioapic-clear-remote_irr.patch
24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch
24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch
24189-x86-p2m-pod-locking.patch
24190-hap-log-dirty-disable-rc.patch
24193-hap-track-dirty-vram-rc.patch
24195-waitqueue_Detect_saved-stack_overflow_and_crash_the_guest..patch
24196-waitqueue_Reorder_prepare_to_wait_so_that_vcpu_is_definitely_on_the.patch
24197-x86-waitqueue_Because_we_have_per-cpu_stacks_we_must_wake_up_on_teh.patch
24201-x86-pcpu-platform-op.patch
24208-xenpaging_remove_filename_from_comment.patch
24209-xenpaging_remove_obsolete_comment_in_resume_path.patch
24210-xenpaging_use_PERROR_to_print_errno.patch
24211-xenpaging_simplify_file_op.patch
24212-xenpaging_print_gfn_in_failure_case.patch
24213-xenpaging_update_xenpaging_init.patch
24214-xenpaging_remove_xc_dominfo_t_from_paging_t.patch
24215-xenpaging_track_the_number_of_paged-out_pages.patch
24216-xenpaging_move_page_add-resume_loops_into_its_own_function..patch
24217-xenpaging_improve_mainloop_exit_handling.patch
24218-libxc_add_bitmap_clear_function.patch
24219-xenpaging_retry_unpageable_gfns.patch
24220-xenpaging_install_into_LIBEXEC_dir.patch
24221-xenpaging_add_XEN_PAGING_DIR_-_libxl_xenpaging_dir_path.patch
24222-xenpaging_use_guests_tot_pages_as_working_target.patch
24223-xenpaging_watch_the_guests_memory-target-tot_pages_xenstore_value.patch
24224-xenpaging_add_cmdline_interface_for_pager.patch
24225-xenpaging_improve_policy_mru_list_handling.patch
24226-xenpaging_add_debug_to_show_received_watch_event..patch
24227-xenpaging_restrict_pagefile_permissions.patch
24231-waitqueue_Implement_wake_up_nroneall..patch
24232-waitqueue_Hold_a_reference_to_a_domain_on_a_waitqueue..patch
24261-x86-cpuidle-Westmere-EX.patch
24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch
24270-Free_d-mem_event_on_domain_destruction..patch
24272-xenpaging_Fix_c-s_235070a29c8c3ddf7_update_machine_to_phys_mapping_during_page_deallocation.patch
24275-x86-emul-lzcnt.patch
24277-x86-dom0-features.patch
24278-x86-dom0-no-PCID.patch
24282-x86-log-dirty-bitmap-leak.patch
24318-x86-mm_Fix_checks_during_foreign_mapping_of_paged_pages.patch
24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch
24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch
24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch
24344-tools-x86_64_Fix_cpuid_inline_asm_to_not_clobber_stacks_red_zone.patch
24357-firmware-no-_PS0-_PS3.patch
24358-kexec-compat-overflow.patch
24359-x86-domU-features.patch
24360-x86-pv-domU-no-PCID.patch
24389-amd-fam10-gart-tlb-walk-err.patch
24391-x86-pcpu-version.patch
24411-x86-ucode-AMD-Fam15.patch
24412-x86-AMD-errata-model-shift.patch
24417-amd-erratum-573.patch
24429-mceinj-tool.patch
24447-x86-TXT-INIT-SIPI-delay.patch
24448-x86-pt-irq-leak.patch
24478-libxl_add_feature_flag_to_xenstore_for_XS_RESET_WATCHES.patch
2XXXX-vif-bridge.patch
32on64-extra-mem.patch
README.SuSE
_link
altgr_2.patch
baselibs.conf
bdrv_default_rwflag.patch
bdrv_open2_fix_flags.patch
bdrv_open2_flags_2.patch
blktap-close-fifos.patch
blktap-pv-cdrom.patch
blktap.patch
blktapctrl-default-to-ioemu.patch
block-dmmd
block-iscsi
block-nbd
block-npiv
block-npiv-common.sh
block-npiv-vport
boot.local.xenU
boot.xen
bridge-bonding.diff
bridge-opensuse.patch
bridge-record-creation.patch
bridge-vlan.diff
build-tapdisk-ioemu.patch
capslock_enable.patch
cdrom-removable.patch
change-vnc-passwd.patch
change_home_server.patch
check_device_status.patch
checkpoint-rename.patch
del_usb_xend_entry.patch
disable-xl-when-using-xend.patch
disable_emulated_device.diff
dom-print.patch
domUloader.py
domu-usb-controller.patch
etc_pam.d_xen-api
hibernate.patch
hotplug.losetup.patch
hv_extid_compatibility.patch
init.pciback
init.xen_loop
init.xend
init.xendomains
ioemu-7615-qcow2-fix-alloc_cluster_link_l2.patch
ioemu-9868-MSI-X.patch
ioemu-bdrv-open-CACHE_WB.patch
ioemu-blktap-barriers.patch
ioemu-blktap-fv-init.patch
ioemu-blktap-image-format.patch
ioemu-blktap-zero-size.patch
ioemu-debuginfo.patch
ioemu-disable-emulated-ide-if-pv.patch
ioemu-disable-scsi.patch
ioemu-vnc-resize.patch
ioemu-watchdog-ib700-timer.patch
ioemu-watchdog-linkage.patch
ioemu-watchdog-support.patch
ipxe-enable-nics.patch
ipxe-gcc45-warnings.patch
ipxe-ipv4-fragment.patch
kernel-boot-hvm.patch
kmp_filelist
libxen_permissive.patch
log-guest-console.patch
logrotate.conf
magic_ioport_compat.patch
minios-fixups.patch
multi-xvdp.patch
network-nat-open-SuSEfirewall2-FORWARD.patch
network-nat.patch
pv-driver-build.patch
pvdrv-import-shared-info.patch
pvdrv_emulation_control.patch
qemu-dm-segfault.patch
qemu-security-etch1.diff
serial-split.patch
snapshot-ioemu-delete.patch
snapshot-ioemu-restore.patch
snapshot-ioemu-save.patch
snapshot-without-pv-fix.patch
snapshot-xend.patch
stdvga-cache.patch
stubdom.tar.bz2
supported_module.diff
suspend_evtchn_lock.patch
sysconfig.pciback
tapdisk-ioemu-logfile.patch
tapdisk-ioemu-shutdown-fix.patch
tmp-initscript-modprobe.patch
tmp_build.patch
tools-kboot.diff
tools-watchdog-support.patch
tools-xc_kexec.diff
udev-rules.patch
usb-list.patch
vif-bridge-no-iptables.patch
vif-bridge.mtu.patch
vif-route-ifup.patch
x86-cpufreq-report.patch
x86-extra-trap-info.patch
x86-ioapic-ack-default.patch
xen-4.1.2-testing-src.tar.bz2
xen-api-auth.patch
xen-changeset.diff
xen-config.diff
xen-cpupool-xl-config-format.patch
xen-destdir.diff
xen-disable-qemu-monitor.diff
xen-domUloader.diff
xen-fixme-doc.diff
xen-hvm-default-bridge.diff
xen-hvm-default-pae.diff
xen-ioemu-hvm-pv-support.diff
xen-max-free-mem.diff
xen-minimum-restart-time.patch
xen-no-dummy-nfs-ip.diff
xen-paths.diff
xen-qemu-iscsi-fix.patch
xen-rpmoptflags.diff
xen-updown.sh
xen-utils-0.1.tar.bz2
xen-warnings-unused.diff
xen-warnings.diff
xen-xm-top-needs-root.diff
xen-xmexample-vti.diff
xen-xmexample.diff
xen.changes
xen.no-default-runlevel-4.patch
xen.sles11sp1.bug684297.xen_oldmem_pfn_is_ram.patch
xen.sles11sp1.fate311487.xen_platform_pci.dmistring.patch
xen.spec
xen_pvdrivers.conf
xenalyze.gcc46.patch
xenalyze.hg.tar.bz2
xenapi-console-protocol.patch
xenapiusers
xencommons-proc-xen.patch
xencommons-xenstored-root.patch
xenconsole-no-multiple-connections.patch
xend-config-enable-dump-comment.patch
xend-config.diff
xend-console-port-restore.patch
xend-core-dump-loc.diff
xend-devid-or-name.patch
xend-disable-internal-logrotate.patch
xend-domain-lock-sfex.patch
xend-domain-lock.patch
xend-migration-domname-fix.patch
xend-relocation-server.fw
xend-relocation.sh
xend-sysconfig.patch
xend-vcpu-affinity-fix.patch
xenpaging.autostart.patch
xenpaging.doc.patch
xenpaging.error-handling.patch
xenpaging.evict_fail_fast_forward.patch
xenpaging.evict_mmap_readonly.patch
xenpaging.guest-memusage.patch
xenpaging.mem_event-use-wait_queue.patch
xenpaging.mmap-before-nominate.patch
xenpaging.p2m_is_paged.patch
xenpaging.qemu.flush-cache.patch
xenpaging.speedup-page-out.patch
xenpaging.versioned-interface.patch
xenpaging.waitqueue-paging.patch
xenstored.XS_RESET_WATCHES.patch
xl-create-pv-with-qcow2-img.patch
xm-create-maxmem.patch
xm-create-xflag.patch
xm-save-check-file.patch
xm-test-cleanup.diff
xmclone.sh
xmexample.disks
xmexample.domUloader
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ xen.spec ++++++
++++ 1623 lines (skipped)
++++++ 22998-x86-get_page_from_l1e-retcode.patch ++++++
References: bnc#675363
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1299687371 0
# Node ID e9fab50d7b61d151d51a4b1088930c9e1ca2da47
# Parent 5f28dcea13555f7ab948c9cb95de3e79e0fbfc4b
x86: make get_page_from_l1e() return a proper error code
... so that the guest can actually know the reason for the (hypercall)
failure.
ptwr_do_page_fault() could propagate the error indicator received from
get_page_from_l1e() back to the guest in the high half of the error
code (entry_vector), provided we're sure all existing guests can deal
with that (or indicate so by means of a to-be-added guest feature
flag). Alternatively, a second virtual status register (like CR2)
could be introduced.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/mm/shadow/multi.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/shadow/multi.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/shadow/multi.c
@@ -872,7 +872,7 @@ shadow_get_page_from_l1e(shadow_l1e_t sl
// If a privileged domain is attempting to install a map of a page it does
// not own, we let it succeed anyway.
//
- if ( unlikely(!res) &&
+ if ( unlikely(res < 0) &&
!shadow_mode_translate(d) &&
mfn_valid(mfn = shadow_l1e_get_mfn(sl1e)) &&
(owner = page_get_owner(mfn_to_page(mfn))) &&
@@ -883,11 +883,11 @@ shadow_get_page_from_l1e(shadow_l1e_t sl
SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx "
"which is owned by domain %d: %s\n",
d->domain_id, mfn_x(mfn), owner->domain_id,
- res ? "success" : "failed");
+ res >= 0 ? "success" : "failed");
}
/* Okay, it might still be a grant mapping PTE. Try it. */
- if ( unlikely(!res) &&
+ if ( unlikely(res < 0) &&
(type == p2m_grant_map_rw ||
(type == p2m_grant_map_ro &&
!(shadow_l1e_get_flags(sl1e) & _PAGE_RW))) )
@@ -900,7 +900,7 @@ shadow_get_page_from_l1e(shadow_l1e_t sl
res = get_page_from_l1e(sl1e, d, page_get_owner(mfn_to_page(mfn)));
}
- if ( unlikely(!res) )
+ if ( unlikely(res < 0) )
{
perfc_incr(shadow_get_page_fail);
SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
@@ -1229,15 +1229,15 @@ static int shadow_set_l1e(struct vcpu *v
TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
switch ( shadow_get_page_from_l1e(new_sl1e, d, new_type) )
{
- case 0:
+ default:
/* Doesn't look like a pagetable. */
flags |= SHADOW_SET_ERROR;
new_sl1e = shadow_l1e_empty();
break;
- case -1:
+ case 1:
shadow_l1e_remove_flags(new_sl1e, _PAGE_RW);
/* fall through */
- default:
+ case 0:
shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
break;
}
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -799,12 +799,12 @@ get_page_from_l1e(
bool_t write;
if ( !(l1f & _PAGE_PRESENT) )
- return 1;
+ return 0;
if ( unlikely(l1f & l1_disallow_mask(l1e_owner)) )
{
MEM_LOG("Bad L1 flags %x", l1f & l1_disallow_mask(l1e_owner));
- return 0;
+ return -EINVAL;
}
if ( !mfn_valid(mfn) ||
@@ -821,18 +821,21 @@ get_page_from_l1e(
if ( !iomem_access_permitted(pg_owner, mfn, mfn) )
{
if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */
+ {
MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx",
pg_owner->domain_id, mfn);
- return 0;
+ return -EPERM;
+ }
+ return -EINVAL;
}
if ( !(l1f & _PAGE_RW) || IS_PRIV(pg_owner) ||
!rangeset_contains_singleton(mmio_ro_ranges, mfn) )
- return 1;
+ return 0;
dprintk(XENLOG_G_WARNING,
"d%d: Forcing read-only access to MFN %lx\n",
l1e_owner->domain_id, mfn);
- return -1;
+ return 1;
}
if ( unlikely(real_pg_owner != pg_owner) )
@@ -863,6 +866,7 @@ get_page_from_l1e(
{
unsigned long x, nx, y = page->count_info;
unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
+ int err;
if ( is_xen_heap_page(page) )
{
@@ -870,7 +874,7 @@ get_page_from_l1e(
put_page_type(page);
put_page(page);
MEM_LOG("Attempt to change cache attributes of Xen heap page");
- return 0;
+ return -EACCES;
}
do {
@@ -878,7 +882,8 @@ get_page_from_l1e(
nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base);
} while ( (y = cmpxchg(&page->count_info, x, nx)) != x );
- if ( unlikely(update_xen_mappings(mfn, cacheattr) != 0) )
+ err = update_xen_mappings(mfn, cacheattr);
+ if ( unlikely(err) )
{
cacheattr = y & PGC_cacheattr_mask;
do {
@@ -894,11 +899,11 @@ get_page_from_l1e(
" from L1 entry %" PRIpte ") for %d",
mfn, get_gpfn_from_mfn(mfn),
l1e_get_intpte(l1e), l1e_owner->domain_id);
- return 0;
+ return err;
}
}
- return 1;
+ return 0;
could_not_pin:
MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
@@ -907,7 +912,7 @@ get_page_from_l1e(
l1e_get_intpte(l1e), l1e_owner->domain_id, pg_owner->domain_id);
if ( real_pg_owner != NULL )
put_page(page);
- return 0;
+ return -EBUSY;
}
@@ -1197,17 +1202,20 @@ static int alloc_l1_table(struct page_in
unsigned long pfn = page_to_mfn(page);
l1_pgentry_t *pl1e;
unsigned int i;
+ int ret = 0;
pl1e = map_domain_page(pfn);
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
if ( is_guest_l1_slot(i) )
- switch ( get_page_from_l1e(pl1e[i], d, d) )
+ switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
{
- case 0:
+ default:
goto fail;
- case -1:
+ case 0:
+ break;
+ case 1:
l1e_remove_flags(pl1e[i], _PAGE_RW);
break;
}
@@ -1225,7 +1233,7 @@ static int alloc_l1_table(struct page_in
put_page_from_l1e(pl1e[i], d);
unmap_domain_page(pl1e);
- return -EINVAL;
+ return ret;
}
static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
@@ -1794,11 +1802,13 @@ static int mod_l1_entry(l1_pgentry_t *pl
return rc;
}
- switch ( get_page_from_l1e(nl1e, pt_dom, pg_dom) )
+ switch ( rc = get_page_from_l1e(nl1e, pt_dom, pg_dom) )
{
- case 0:
+ default:
return 0;
- case -1:
+ case 0:
+ break;
+ case 1:
l1e_remove_flags(nl1e, _PAGE_RW);
break;
}
@@ -4948,7 +4958,7 @@ static int ptwr_emulated_update(
nl1e = l1e_from_intpte(val);
switch ( get_page_from_l1e(nl1e, d, d) )
{
- case 0:
+ default:
if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
!do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
{
@@ -4968,7 +4978,9 @@ static int ptwr_emulated_update(
return X86EMUL_UNHANDLEABLE;
}
break;
- case -1:
+ case 0:
+ break;
+ case 1:
l1e_remove_flags(nl1e, _PAGE_RW);
break;
}
++++++ 22999-x86-mod_l1_entry-retcode.patch ++++++
References: bnc#675363
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1299687409 0
# Node ID 82b5f8d12903e140f957ae8d13d66e44be076b05
# Parent e9fab50d7b61d151d51a4b1088930c9e1ca2da47
x86: make mod_l1_entry() return a proper error code
... again is so that the guest can actually know the reason for the
(hypercall) failure.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -1765,15 +1765,16 @@ static int mod_l1_entry(l1_pgentry_t *pl
struct domain *pt_dom = pt_vcpu->domain;
unsigned long mfn;
p2m_type_t p2mt;
- int rc = 1;
+ int rc = 0;
if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
- return 0;
+ return -EFAULT;
if ( unlikely(paging_mode_refcounts(pt_dom)) )
{
- rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, preserve_ad);
- return rc;
+ if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, preserve_ad) )
+ return 0;
+ return -EBUSY;
}
if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
@@ -1782,7 +1783,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(pg_dom),
l1e_get_pfn(nl1e), &p2mt));
if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) )
- return 0;
+ return -EINVAL;
ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
@@ -1790,22 +1791,23 @@ static int mod_l1_entry(l1_pgentry_t *pl
{
MEM_LOG("Bad L1 flags %x",
l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom));
- return 0;
+ return -EINVAL;
}
/* Fast path for identical mapping, r/w and presence. */
if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
{
adjust_guest_l1e(nl1e, pt_dom);
- rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
- preserve_ad);
- return rc;
+ if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
+ preserve_ad) )
+ return 0;
+ return -EBUSY;
}
switch ( rc = get_page_from_l1e(nl1e, pt_dom, pg_dom) )
{
default:
- return 0;
+ return rc;
case 0:
break;
case 1:
@@ -1818,13 +1820,13 @@ static int mod_l1_entry(l1_pgentry_t *pl
preserve_ad)) )
{
ol1e = nl1e;
- rc = 0;
+ rc = -EBUSY;
}
}
else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
preserve_ad)) )
{
- return 0;
+ return -EBUSY;
}
put_page_from_l1e(ol1e, pt_dom);
@@ -3516,9 +3518,10 @@ int do_mmu_update(
}
#endif
- okay = mod_l1_entry(va, l1e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, v,
- pg_owner);
+ rc = mod_l1_entry(va, l1e, mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v,
+ pg_owner);
+ okay = !rc;
}
break;
case PGT_l2_page_table:
@@ -4300,7 +4303,7 @@ static int __do_update_va_mapping(
goto out;
}
- rc = mod_l1_entry(pl1e, val, gl1mfn, 0, v, pg_owner) ? 0 : -EINVAL;
+ rc = mod_l1_entry(pl1e, val, gl1mfn, 0, v, pg_owner);
page_unlock(gl1pg);
put_page(gl1pg);
++++++ 23000-x86-mod_l2_entry-retcode.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1299687446 0
# Node ID d428fa67abaa0db20b915a697f1d5ba16e554185
# Parent 82b5f8d12903e140f957ae8d13d66e44be076b05
x86: make mod_l2_entry() return a proper error code
... so that finally all mod_lN_entry() functions behave identically,
allowing some cleanup in do_mmu_update() (which no longer needs to
track both an okay status and an error code).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -1845,16 +1845,16 @@ static int mod_l2_entry(l2_pgentry_t *pl
struct domain *d = vcpu->domain;
struct page_info *l2pg = mfn_to_page(pfn);
unsigned long type = l2pg->u.inuse.type_info;
- int rc = 1;
+ int rc = 0;
if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
{
MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
- return 0;
+ return -EPERM;
}
if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
- return 0;
+ return -EFAULT;
if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
{
@@ -1862,32 +1862,33 @@ static int mod_l2_entry(l2_pgentry_t *pl
{
MEM_LOG("Bad L2 flags %x",
l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
- return 0;
+ return -EINVAL;
}
/* Fast path for identical mapping and presence. */
if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) )
{
adjust_guest_l2e(nl2e, d);
- rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad);
- return rc;
+ if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) )
+ return 0;
+ return -EBUSY;
}
- if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
- return 0;
+ if ( unlikely((rc = get_page_from_l2e(nl2e, pfn, d)) < 0) )
+ return rc;
adjust_guest_l2e(nl2e, d);
if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
preserve_ad)) )
{
ol2e = nl2e;
- rc = 0;
+ rc = -EBUSY;
}
}
else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
preserve_ad)) )
{
- return 0;
+ return -EBUSY;
}
put_page_from_l2e(ol2e, pfn);
@@ -3367,7 +3368,7 @@ int do_mmu_update(
void *va;
unsigned long gpfn, gmfn, mfn;
struct page_info *page;
- int rc = 0, okay = 1, i = 0;
+ int rc = 0, i = 0;
unsigned int cmd, done = 0, pt_dom;
struct vcpu *v = current;
struct domain *d = v->domain, *pt_owner = d, *pg_owner;
@@ -3434,7 +3435,6 @@ int do_mmu_update(
}
cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
- okay = 0;
switch ( cmd )
{
@@ -3451,6 +3451,7 @@ int do_mmu_update(
rc = xsm_mmu_normal_update(d, pg_owner, req.val);
if ( rc )
break;
+ rc = -EINVAL;
req.ptr -= cmd;
gmfn = req.ptr >> PAGE_SHIFT;
@@ -3521,7 +3522,6 @@ int do_mmu_update(
rc = mod_l1_entry(va, l1e, mfn,
cmd == MMU_PT_UPDATE_PRESERVE_AD, v,
pg_owner);
- okay = !rc;
}
break;
case PGT_l2_page_table:
@@ -3545,13 +3545,12 @@ int do_mmu_update(
else if ( p2m_ram_shared == l2e_p2mt )
{
MEM_LOG("Unexpected attempt to map shared page.\n");
- rc = -EINVAL;
break;
}
- okay = mod_l2_entry(va, l2e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
+ rc = mod_l2_entry(va, l2e, mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
}
break;
case PGT_l3_page_table:
@@ -3575,13 +3574,11 @@ int do_mmu_update(
else if ( p2m_ram_shared == l3e_p2mt )
{
MEM_LOG("Unexpected attempt to map shared page.\n");
- rc = -EINVAL;
break;
}
rc = mod_l3_entry(va, l3e, mfn,
cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
- okay = !rc;
}
break;
#if CONFIG_PAGING_LEVELS >= 4
@@ -3607,20 +3604,18 @@ int do_mmu_update(
else if ( p2m_ram_shared == l4e_p2mt )
{
MEM_LOG("Unexpected attempt to map shared page.\n");
- rc = -EINVAL;
break;
}
rc = mod_l4_entry(va, l4e, mfn,
cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
- okay = !rc;
}
break;
#endif
case PGT_writable_page:
perfc_incr(writable_mmu_updates);
- okay = paging_write_guest_entry(
- v, va, req.val, _mfn(mfn));
+ if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) )
+ rc = 0;
break;
}
page_unlock(page);
@@ -3630,8 +3625,8 @@ int do_mmu_update(
else if ( get_page_type(page, PGT_writable_page) )
{
perfc_incr(writable_mmu_updates);
- okay = paging_write_guest_entry(
- v, va, req.val, _mfn(mfn));
+ if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) )
+ rc = 0;
put_page_type(page);
}
@@ -3652,17 +3647,18 @@ int do_mmu_update(
if ( unlikely(!get_page_from_pagenr(mfn, pg_owner)) )
{
MEM_LOG("Could not get page for mach->phys update");
+ rc = -EINVAL;
break;
}
if ( unlikely(paging_mode_translate(pg_owner)) )
{
MEM_LOG("Mach-phys update on auto-translate guest");
+ rc = -EINVAL;
break;
}
set_gpfn_from_mfn(mfn, gpfn);
- okay = 1;
paging_mark_dirty(pg_owner, mfn);
@@ -3672,15 +3668,11 @@ int do_mmu_update(
default:
MEM_LOG("Invalid page update command %x", cmd);
rc = -ENOSYS;
- okay = 0;
break;
}
- if ( unlikely(!okay) )
- {
- rc = rc ? rc : -EINVAL;
+ if ( unlikely(rc) )
break;
- }
guest_handle_add_offset(ureqs, 1);
}
++++++ 23050-xentrace_dynamic_tracebuffer_allocation.patch ++++++
changeset: 23050:4ebba54b666f
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Mar 17 13:29:01 2011 +0000
files: xen/common/trace.c
description:
xentrace: dynamic tracebuffer allocation
Allocate tracebuffers dynamically, based on the requested buffer size.
Calculate t_info_size from requested t_buf size.
Fix allocation failure path, free pages outside the spinlock.
Remove casts for rawbuf, it can be a void pointer since no math is
done.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 249 ++++++++++++++++++++++-------------------------------
1 file changed, 104 insertions(+), 145 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -42,14 +42,14 @@ CHECK_t_buf;
#define compat_t_rec t_rec
#endif
-/* opt_tbuf_size: trace buffer size (in pages) */
-static unsigned int opt_tbuf_size = 0;
+/* opt_tbuf_size: trace buffer size (in pages) for each cpu */
+static unsigned int opt_tbuf_size;
integer_param("tbuf_size", opt_tbuf_size);
/* Pointers to the meta-data objects for all system trace buffers */
static struct t_info *t_info;
-#define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */
-#define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
+static unsigned int t_info_pages;
+
static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
@@ -78,6 +78,21 @@ static u32 tb_event_mask = TRC_ALL;
* i.e., sizeof(_type) * ans >= _x. */
#define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type))
+static int cpu_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ if ( action == CPU_UP_PREPARE )
+ spin_lock_init(&per_cpu(t_lock, cpu));
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block cpu_nfb = {
+ .notifier_call = cpu_callback
+};
+
static void calc_tinfo_first_offset(void)
{
int offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
@@ -85,20 +100,34 @@ static void calc_tinfo_first_offset(void
}
/**
- * check_tbuf_size - check to make sure that the proposed size will fit
+ * calculate_tbuf_size - check to make sure that the proposed size will fit
* in the currently sized struct t_info and allows prod and cons to
* reach double the value without overflow.
+ * Initialize t_info_pages based on number of trace pages.
*/
-static int check_tbuf_size(u32 pages)
+static int calculate_tbuf_size(unsigned int pages)
{
struct t_buf dummy;
typeof(dummy.prod) size;
-
- size = ((typeof(dummy.prod))pages) * PAGE_SIZE;
-
- return (size / PAGE_SIZE != pages)
- || (size + size < size)
- || (num_online_cpus() * pages + t_info_first_offset > T_INFO_SIZE /
sizeof(uint32_t));
+
+ /* force maximum value for an unsigned type */
+ size = -1;
+
+ /* max size holds up to n pages */
+ size /= PAGE_SIZE;
+ if ( pages > size )
+ {
+ gdprintk(XENLOG_INFO, "%s: requested number of %u pages reduced to
%u\n",
+ __func__, pages, (unsigned int)size);
+ pages = size;
+ }
+
+ t_info_pages = num_online_cpus() * pages + t_info_first_offset;
+ t_info_pages *= sizeof(uint32_t);
+ t_info_pages /= PAGE_SIZE;
+ if ( t_info_pages % PAGE_SIZE )
+ t_info_pages++;
+ return pages;
}
/**
@@ -111,47 +140,28 @@ static int check_tbuf_size(u32 pages)
* This function may also be called later when enabling trace buffers
* via the SET_SIZE hypercall.
*/
-static int alloc_trace_bufs(void)
+static int alloc_trace_bufs(unsigned int pages)
{
- int i, cpu, order;
- unsigned long nr_pages;
+ int i, cpu, order;
/* Start after a fixed-size array of NR_CPUS */
uint32_t *t_info_mfn_list;
int offset;
- if ( opt_tbuf_size == 0 )
- return -EINVAL;
+ if ( t_info )
+ return -EBUSY;
- if ( check_tbuf_size(opt_tbuf_size) )
- {
- printk("Xen trace buffers: tb size %d too large. "
- "Tracing disabled.\n",
- opt_tbuf_size);
+ if ( pages == 0 )
return -EINVAL;
- }
- /* t_info size is fixed for now. Currently this works great, so there
- * seems to be no need to make it dynamic. */
- t_info = alloc_xenheap_pages(get_order_from_pages(T_INFO_PAGES), 0);
- if ( t_info == NULL )
- {
- printk("Xen trace buffers: t_info allocation failed! "
- "Tracing disabled.\n");
- return -ENOMEM;
- }
-
- for ( i = 0; i < T_INFO_PAGES; i++ )
- share_xen_page_with_privileged_guests(
- virt_to_page(t_info) + i, XENSHARE_readonly);
-
- t_info_mfn_list = (uint32_t *)t_info;
- offset = t_info_first_offset;
+ /* Calculate offset in u32 of first mfn */
+ calc_tinfo_first_offset();
- t_info->tbuf_size = opt_tbuf_size;
- printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
+ pages = calculate_tbuf_size(pages);
+ order = get_order_from_pages(pages);
- nr_pages = opt_tbuf_size;
- order = get_order_from_pages(nr_pages);
+ t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);
+ if ( t_info == NULL )
+ goto out_dealloc;
/*
* First, allocate buffers for all of the cpus. If any
@@ -159,27 +169,29 @@ static int alloc_trace_bufs(void)
*/
for_each_online_cpu(cpu)
{
- int flags;
- char *rawbuf;
+ void *rawbuf;
struct t_buf *buf;
if ( (rawbuf = alloc_xenheap_pages(
order, MEMF_bits(32 + PAGE_SHIFT))) == NULL )
{
- printk("Xen trace buffers: memory allocation failed\n");
- opt_tbuf_size = 0;
+ printk("Xen trace buffers: memory allocation failed on cpu %d\n",
cpu);
goto out_dealloc;
}
- spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
-
- per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
+ per_cpu(t_bufs, cpu) = buf = rawbuf;
buf->cons = buf->prod = 0;
per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
+ }
- spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags);
+ offset = t_info_first_offset;
+ t_info_mfn_list = (uint32_t *)t_info;
- }
+ for(i = 0; i < t_info_pages; i++)
+ share_xen_page_with_privileged_guests(
+ virt_to_page(t_info) + i, XENSHARE_readonly);
+
+ t_info->tbuf_size = pages;
/*
* Now share the pages to xentrace can map them, and write them in
@@ -188,89 +200,75 @@ static int alloc_trace_bufs(void)
for_each_online_cpu(cpu)
{
/* Share pages so that xentrace can map them. */
- char *rawbuf;
+ void *rawbuf = per_cpu(t_bufs, cpu);
+ struct page_info *p = virt_to_page(rawbuf);
+ uint32_t mfn = virt_to_mfn(rawbuf);
- if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
+ for ( i = 0; i < pages; i++ )
{
- struct page_info *p = virt_to_page(rawbuf);
- uint32_t mfn = virt_to_mfn(rawbuf);
+ share_xen_page_with_privileged_guests(p + i, XENSHARE_writable);
- for ( i = 0; i < nr_pages; i++ )
- {
- share_xen_page_with_privileged_guests(
- p + i, XENSHARE_writable);
-
- t_info_mfn_list[offset + i]=mfn + i;
- }
- /* Write list first, then write per-cpu offset. */
- wmb();
- t_info->mfn_offset[cpu]=offset;
- printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
- cpu, mfn, offset);
- offset+=i;
+ t_info_mfn_list[offset + i]=mfn + i;
}
+ t_info->mfn_offset[cpu]=offset;
+ printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
+ cpu, mfn, offset);
+ offset+=i;
+
+ spin_lock_init(&per_cpu(t_lock, cpu));
}
- data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
+ data_size = (pages * PAGE_SIZE - sizeof(struct t_buf));
t_buf_highwater = data_size >> 1; /* 50% high water */
+ opt_tbuf_size = pages;
+
+ register_cpu_notifier(&cpu_nfb);
+
+ printk("Xen trace buffers: initialised\n");
+ wmb(); /* above must be visible before tb_init_done flag set */
+ tb_init_done = 1;
return 0;
+
out_dealloc:
for_each_online_cpu(cpu)
{
- int flags;
- char * rawbuf;
-
- spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
- if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
+ void *rawbuf = per_cpu(t_bufs, cpu);
+ per_cpu(t_bufs, cpu) = NULL;
+ printk("Xen trace buffers: cpu %d p %p\n", cpu, rawbuf);
+ if ( rawbuf )
{
- per_cpu(t_bufs, cpu) = NULL;
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
free_xenheap_pages(rawbuf, order);
}
- spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags);
}
-
+ free_xenheap_pages(t_info, get_order_from_pages(t_info_pages));
+ t_info = NULL;
+ printk("Xen trace buffers: allocation failed! Tracing disabled.\n");
return -ENOMEM;
}
/**
- * tb_set_size - handle the logic involved with dynamically
- * allocating and deallocating tbufs
+ * tb_set_size - handle the logic involved with dynamically allocating tbufs
*
* This function is called when the SET_SIZE hypercall is done.
*/
-static int tb_set_size(int size)
+static int tb_set_size(unsigned int pages)
{
/*
* Setting size is a one-shot operation. It can be done either at
* boot time or via control tools, but not by both. Once buffers
* are created they cannot be destroyed.
*/
- int ret = 0;
-
- if ( opt_tbuf_size != 0 )
+ if ( opt_tbuf_size && pages != opt_tbuf_size )
{
- if ( size != opt_tbuf_size )
- gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not
implemented\n",
- opt_tbuf_size, size);
+ gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not implemented\n",
+ opt_tbuf_size, pages);
return -EINVAL;
}
- if ( size <= 0 )
- return -EINVAL;
-
- opt_tbuf_size = size;
-
- if ( (ret = alloc_trace_bufs()) != 0 )
- {
- opt_tbuf_size = 0;
- return ret;
- }
-
- printk("Xen trace buffers: initialized\n");
- return 0;
+ return alloc_trace_bufs(pages);
}
int trace_will_trace_event(u32 event)
@@ -299,21 +297,6 @@ int trace_will_trace_event(u32 event)
return 1;
}
-static int cpu_callback(
- struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- if ( action == CPU_UP_PREPARE )
- spin_lock_init(&per_cpu(t_lock, cpu));
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block cpu_nfb = {
- .notifier_call = cpu_callback
-};
-
/**
* init_trace_bufs - performs initialization of the per-cpu trace buffers.
*
@@ -323,37 +306,13 @@ static struct notifier_block cpu_nfb = {
*/
void __init init_trace_bufs(void)
{
- int i;
-
- /* Calculate offset in u32 of first mfn */
- calc_tinfo_first_offset();
-
- /* Per-cpu t_lock initialisation. */
- for_each_online_cpu ( i )
- spin_lock_init(&per_cpu(t_lock, i));
- register_cpu_notifier(&cpu_nfb);
-
- if ( opt_tbuf_size == 0 )
- {
- printk("Xen trace buffers: disabled\n");
- goto fail;
- }
-
- if ( alloc_trace_bufs() != 0 )
+ if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) )
{
- dprintk(XENLOG_INFO, "Xen trace buffers: "
- "allocation size %d failed, disabling\n",
- opt_tbuf_size);
- goto fail;
+ gdprintk(XENLOG_INFO, "Xen trace buffers: "
+ "allocation size %d failed, disabling\n",
+ opt_tbuf_size);
+ opt_tbuf_size = 0;
}
-
- printk("Xen trace buffers: initialised\n");
- wmb(); /* above must be visible before tb_init_done flag set */
- tb_init_done = 1;
- return;
-
- fail:
- opt_tbuf_size = 0;
}
/**
@@ -372,7 +331,7 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
case XEN_SYSCTL_TBUFOP_get_info:
tbc->evt_mask = tb_event_mask;
tbc->buffer_mfn = t_info ? virt_to_mfn(t_info) : 0;
- tbc->size = T_INFO_PAGES * PAGE_SIZE;
+ tbc->size = t_info_pages * PAGE_SIZE;
break;
case XEN_SYSCTL_TBUFOP_set_cpu_mask:
rc = xenctl_cpumap_to_cpumask(&tb_cpu_mask, &tbc->cpu_mask);
++++++ 23074-pfn.h.patch ++++++
References: fate#311376, fate#311529, bnc#578927, bnc#628554
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1300887295 0
# Node ID c80e0fb4fe932b4d8379ea5739af93ae22a30ea5
# Parent 3831bd253e02aa0536ed32e936777d026abb955e
Define new <pfn.h> header for PFN_{DOWN,UP} macros.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/domain_build.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/domain_build.c
+++ xen-4.1.2-testing/xen/arch/x86/domain_build.c
@@ -21,6 +21,7 @@
#include <xen/bitops.h>
#include <xen/compat.h>
#include <xen/libelf.h>
+#include <xen/pfn.h>
#include <asm/regs.h>
#include <asm/system.h>
#include <asm/io.h>
Index: xen-4.1.2-testing/xen/arch/x86/e820.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/e820.c
+++ xen-4.1.2-testing/xen/arch/x86/e820.c
@@ -4,6 +4,7 @@
#include <xen/mm.h>
#include <xen/compat.h>
#include <xen/dmi.h>
+#include <xen/pfn.h>
#include <asm/e820.h>
#include <asm/page.h>
#include <asm/processor.h>
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -99,6 +99,7 @@
#include <xen/event.h>
#include <xen/iocap.h>
#include <xen/guest_access.h>
+#include <xen/pfn.h>
#include <asm/paging.h>
#include <asm/shadow.h>
#include <asm/page.h>
Index: xen-4.1.2-testing/xen/arch/x86/msi.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/msi.c
+++ xen-4.1.2-testing/xen/arch/x86/msi.c
@@ -18,6 +18,7 @@
#include <xen/pci_regs.h>
#include <xen/iocap.h>
#include <xen/keyhandler.h>
+#include <xen/pfn.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/desc.h>
Index: xen-4.1.2-testing/xen/arch/x86/numa.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/numa.c
+++ xen-4.1.2-testing/xen/arch/x86/numa.c
@@ -13,6 +13,7 @@
#include <xen/keyhandler.h>
#include <xen/time.h>
#include <xen/smp.h>
+#include <xen/pfn.h>
#include <asm/acpi.h>
#include <xen/sched.h>
Index: xen-4.1.2-testing/xen/arch/x86/setup.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/setup.c
+++ xen-4.1.2-testing/xen/arch/x86/setup.c
@@ -21,6 +21,7 @@
#include <xen/rcupdate.h>
#include <xen/vga.h>
#include <xen/dmi.h>
+#include <xen/pfn.h>
#include <xen/nodemask.h>
#include <public/version.h>
#ifdef CONFIG_COMPAT
Index: xen-4.1.2-testing/xen/arch/x86/srat.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/srat.c
+++ xen-4.1.2-testing/xen/arch/x86/srat.c
@@ -17,6 +17,7 @@
#include <xen/nodemask.h>
#include <xen/acpi.h>
#include <xen/numa.h>
+#include <xen/pfn.h>
#include <asm/e820.h>
#include <asm/page.h>
Index: xen-4.1.2-testing/xen/arch/x86/tboot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/tboot.c
+++ xen-4.1.2-testing/xen/arch/x86/tboot.c
@@ -6,6 +6,7 @@
#include <xen/domain_page.h>
#include <xen/iommu.h>
#include <xen/acpi.h>
+#include <xen/pfn.h>
#include <asm/fixmap.h>
#include <asm/page.h>
#include <asm/processor.h>
Index: xen-4.1.2-testing/xen/include/asm-x86/page.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/page.h
+++ xen-4.1.2-testing/xen/include/asm-x86/page.h
@@ -396,8 +396,6 @@ static inline uint32_t cacheattr_to_pte_
#endif /* !__ASSEMBLY__ */
-#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PAGE_ALIGN(x) (((x) + PAGE_SIZE - 1) & PAGE_MASK)
#endif /* __X86_PAGE_H__ */
Index: xen-4.1.2-testing/xen/include/xen/pfn.h
===================================================================
--- /dev/null
+++ xen-4.1.2-testing/xen/include/xen/pfn.h
@@ -0,0 +1,9 @@
+#ifndef __XEN_PFN_H__
+#define __XEN_PFN_H__
+
+#include <asm/page.h>
+
+#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+
+#endif /* __XEN_PFN_H__ */
++++++ 23091-xentrace_fix_t_info_pages_calculation..patch ++++++
changeset: 23091:67632e5cf652
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Mar 25 08:56:33 2011 +0000
files: xen/common/trace.c
description:
xentrace: fix t_info_pages calculation.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -29,6 +29,7 @@
#include <xen/init.h>
#include <xen/mm.h>
#include <xen/percpu.h>
+#include <xen/pfn.h>
#include <xen/cpu.h>
#include <asm/atomic.h>
#include <public/sysctl.h>
@@ -109,6 +110,7 @@ static int calculate_tbuf_size(unsigned
{
struct t_buf dummy;
typeof(dummy.prod) size;
+ unsigned int t_info_words, t_info_bytes;
/* force maximum value for an unsigned type */
size = -1;
@@ -122,11 +124,9 @@ static int calculate_tbuf_size(unsigned
pages = size;
}
- t_info_pages = num_online_cpus() * pages + t_info_first_offset;
- t_info_pages *= sizeof(uint32_t);
- t_info_pages /= PAGE_SIZE;
- if ( t_info_pages % PAGE_SIZE )
- t_info_pages++;
+ t_info_words = num_online_cpus() * pages + t_info_first_offset;
+ t_info_bytes = t_info_words * sizeof(uint32_t);
+ t_info_pages = PFN_UP(t_info_bytes);
return pages;
}
++++++ 23092-xentrace_print_calculated_numbers_in_calculate_tbuf_size.patch
++++++
changeset: 23092:45dafa422812
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Mar 25 08:57:28 2011 +0000
files: xen/common/trace.c
description:
xentrace: print calculated numbers in calculate_tbuf_size()
Print number of pages to allocate for per-cpu tracebuffer and metadata
to ease debugging when allocation fails.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 2 ++
1 file changed, 2 insertions(+)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -127,6 +127,8 @@ static int calculate_tbuf_size(unsigned
t_info_words = num_online_cpus() * pages + t_info_first_offset;
t_info_bytes = t_info_words * sizeof(uint32_t);
t_info_pages = PFN_UP(t_info_bytes);
+ printk(XENLOG_INFO "xentrace: requesting %u t_info pages for %u trace
pages on %u cpus\n",
+ t_info_pages, pages, num_online_cpus());
return pages;
}
++++++
23093-xentrace_remove_gdprintk_usage_since_they_are_not_in_guest_context.patch
++++++
changeset: 23093:4b784605b089
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Mar 25 08:57:47 2011 +0000
files: xen/common/trace.c
description:
xentrace: remove gdprintk usage since they are not in guest context
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -119,7 +119,7 @@ static int calculate_tbuf_size(unsigned
size /= PAGE_SIZE;
if ( pages > size )
{
- gdprintk(XENLOG_INFO, "%s: requested number of %u pages reduced to
%u\n",
+ printk(XENLOG_INFO "%s: requested number of %u pages reduced to %u\n",
__func__, pages, (unsigned int)size);
pages = size;
}
@@ -265,7 +265,7 @@ static int tb_set_size(unsigned int page
*/
if ( opt_tbuf_size && pages != opt_tbuf_size )
{
- gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not implemented\n",
+ printk(XENLOG_INFO "tb_set_size from %d to %d not implemented\n",
opt_tbuf_size, pages);
return -EINVAL;
}
@@ -310,7 +310,7 @@ void __init init_trace_bufs(void)
{
if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) )
{
- gdprintk(XENLOG_INFO, "Xen trace buffers: "
+ printk(XENLOG_INFO "Xen trace buffers: "
"allocation size %d failed, disabling\n",
opt_tbuf_size);
opt_tbuf_size = 0;
++++++ 23094-xentrace_update_comments.patch ++++++
changeset: 23094:d09e8885bc82
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Mar 25 08:58:04 2011 +0000
files: xen/common/trace.c
description:
xentrace: update comments
Fix a typo, remove redundant comment.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -196,12 +196,11 @@ static int alloc_trace_bufs(unsigned int
t_info->tbuf_size = pages;
/*
- * Now share the pages to xentrace can map them, and write them in
+ * Now share the pages so xentrace can map them, and write them in
* the global t_info structure.
*/
for_each_online_cpu(cpu)
{
- /* Share pages so that xentrace can map them. */
void *rawbuf = per_cpu(t_bufs, cpu);
struct page_info *p = virt_to_page(rawbuf);
uint32_t mfn = virt_to_mfn(rawbuf);
++++++ 23095-xentrace_use_consistent_printk_prefix.patch ++++++
changeset: 23095:941119d58655
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Mar 25 09:01:37 2011 +0000
files: xen/common/trace.c
description:
xentrace: use consistent printk prefix
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 31 +++++++++++++++++--------------
1 file changed, 17 insertions(+), 14 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -119,16 +119,18 @@ static int calculate_tbuf_size(unsigned
size /= PAGE_SIZE;
if ( pages > size )
{
- printk(XENLOG_INFO "%s: requested number of %u pages reduced to %u\n",
- __func__, pages, (unsigned int)size);
+ printk(XENLOG_INFO "xentrace: requested number of %u pages "
+ "reduced to %u\n",
+ pages, (unsigned int)size);
pages = size;
}
t_info_words = num_online_cpus() * pages + t_info_first_offset;
t_info_bytes = t_info_words * sizeof(uint32_t);
t_info_pages = PFN_UP(t_info_bytes);
- printk(XENLOG_INFO "xentrace: requesting %u t_info pages for %u trace
pages on %u cpus\n",
- t_info_pages, pages, num_online_cpus());
+ printk(XENLOG_INFO "xentrace: requesting %u t_info pages "
+ "for %u trace pages on %u cpus\n",
+ t_info_pages, pages, num_online_cpus());
return pages;
}
@@ -177,7 +179,8 @@ static int alloc_trace_bufs(unsigned int
if ( (rawbuf = alloc_xenheap_pages(
order, MEMF_bits(32 + PAGE_SHIFT))) == NULL )
{
- printk("Xen trace buffers: memory allocation failed on cpu %d\n",
cpu);
+ printk(XENLOG_INFO "xentrace: memory allocation failed "
+ "on cpu %d\n", cpu);
goto out_dealloc;
}
@@ -212,7 +215,7 @@ static int alloc_trace_bufs(unsigned int
t_info_mfn_list[offset + i]=mfn + i;
}
t_info->mfn_offset[cpu]=offset;
- printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
+ printk(XENLOG_INFO "xentrace: p%d mfn %"PRIx32" offset %d\n",
cpu, mfn, offset);
offset+=i;
@@ -225,7 +228,7 @@ static int alloc_trace_bufs(unsigned int
register_cpu_notifier(&cpu_nfb);
- printk("Xen trace buffers: initialised\n");
+ printk("xentrace: initialised\n");
wmb(); /* above must be visible before tb_init_done flag set */
tb_init_done = 1;
@@ -236,7 +239,7 @@ out_dealloc:
{
void *rawbuf = per_cpu(t_bufs, cpu);
per_cpu(t_bufs, cpu) = NULL;
- printk("Xen trace buffers: cpu %d p %p\n", cpu, rawbuf);
+ printk(XENLOG_DEBUG "xentrace: cpu %d p %p\n", cpu, rawbuf);
if ( rawbuf )
{
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
@@ -245,7 +248,7 @@ out_dealloc:
}
free_xenheap_pages(t_info, get_order_from_pages(t_info_pages));
t_info = NULL;
- printk("Xen trace buffers: allocation failed! Tracing disabled.\n");
+ printk(XENLOG_WARNING "xentrace: allocation failed! Tracing disabled.\n");
return -ENOMEM;
}
@@ -264,8 +267,9 @@ static int tb_set_size(unsigned int page
*/
if ( opt_tbuf_size && pages != opt_tbuf_size )
{
- printk(XENLOG_INFO "tb_set_size from %d to %d not implemented\n",
- opt_tbuf_size, pages);
+ printk(XENLOG_INFO "xentrace: tb_set_size from %d to %d "
+ "not implemented\n",
+ opt_tbuf_size, pages);
return -EINVAL;
}
@@ -309,9 +313,8 @@ void __init init_trace_bufs(void)
{
if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) )
{
- printk(XENLOG_INFO "Xen trace buffers: "
- "allocation size %d failed, disabling\n",
- opt_tbuf_size);
+ printk(XENLOG_INFO "xentrace: allocation size %d failed, disabling\n",
+ opt_tbuf_size);
opt_tbuf_size = 0;
}
}
++++++ 23096-x86-hpet-no-cpumask_lock.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1301043797 0
# Node ID a65612bcbb921e98a8843157bf365e4ab16e8144
# Parent 941119d58655f2b2df86d9ecc4cb502bbc5e783c
x86/hpet: eliminate cpumask_lock
According to the (now getting removed) comment in struct
hpet_event_channel, this was to prevent accessing a CPU's
timer_deadline after it got cleared from cpumask. This can be done
without a lock altogether - hpet_broadcast_exit() can simply clear
the bit, and handle_hpet_broadcast() can read timer_deadline before
looking at the mask a second time (the cpumask bit was already
found set by the surrounding loop).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Acked-by: Gang Wei <gang.wei@xxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hpet.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hpet.c
+++ xen-4.1.2-testing/xen/arch/x86/hpet.c
@@ -34,18 +34,6 @@ struct hpet_event_channel
int shift;
s_time_t next_event;
cpumask_t cpumask;
- /*
- * cpumask_lock is used to prevent hpet intr handler from accessing other
- * cpu's timer_deadline after the other cpu's mask was cleared --
- * mask cleared means cpu waken up, then accessing timer_deadline from
- * other cpu is not safe.
- * It is not used for protecting cpumask, so set ops needn't take it.
- * Multiple cpus clear cpumask simultaneously is ok due to the atomic
- * feature of cpu_clear, so hpet_broadcast_exit() can take read lock for
- * clearing cpumask, and handle_hpet_broadcast() have to take write lock
- * for read cpumask & access timer_deadline.
- */
- rwlock_t cpumask_lock;
spinlock_t lock;
void (*event_handler)(struct hpet_event_channel *);
@@ -208,17 +196,18 @@ again:
/* find all expired events */
for_each_cpu_mask(cpu, ch->cpumask)
{
- write_lock_irq(&ch->cpumask_lock);
+ s_time_t deadline;
- if ( cpu_isset(cpu, ch->cpumask) )
- {
- if ( per_cpu(timer_deadline, cpu) <= now )
- cpu_set(cpu, mask);
- else if ( per_cpu(timer_deadline, cpu) < next_event )
- next_event = per_cpu(timer_deadline, cpu);
- }
+ rmb();
+ deadline = per_cpu(timer_deadline, cpu);
+ rmb();
+ if ( !cpu_isset(cpu, ch->cpumask) )
+ continue;
- write_unlock_irq(&ch->cpumask_lock);
+ if ( deadline <= now )
+ cpu_set(cpu, mask);
+ else if ( deadline < next_event )
+ next_event = deadline;
}
/* wakeup the cpus which have an expired event. */
@@ -598,7 +587,6 @@ void hpet_broadcast_init(void)
hpet_events[i].shift = 32;
hpet_events[i].next_event = STIME_MAX;
spin_lock_init(&hpet_events[i].lock);
- rwlock_init(&hpet_events[i].cpumask_lock);
wmb();
hpet_events[i].event_handler = handle_hpet_broadcast;
}
@@ -634,7 +622,6 @@ void hpet_broadcast_init(void)
legacy_hpet_event.idx = 0;
legacy_hpet_event.flags = 0;
spin_lock_init(&legacy_hpet_event.lock);
- rwlock_init(&legacy_hpet_event.cpumask_lock);
wmb();
legacy_hpet_event.event_handler = handle_hpet_broadcast;
@@ -716,9 +703,7 @@ void hpet_broadcast_exit(void)
if ( !reprogram_timer(this_cpu(timer_deadline)) )
raise_softirq(TIMER_SOFTIRQ);
- read_lock_irq(&ch->cpumask_lock);
cpu_clear(cpu, ch->cpumask);
- read_unlock_irq(&ch->cpumask_lock);
if ( ch != &legacy_hpet_event )
{
++++++ 23099-x86-rwlock-scalability.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1301126601 0
# Node ID 612171ff82ea51aaf65d98fd1a551eb8d50fb481
# Parent c9f745c153ec8c3775e2ee03adc3cb30370b84f6
rwlock: Allow to scale to 2^31-1 readers on x86.
Also rework to match the 'trylock' style of raw function used for
spinlocks.
Inspired by Jan Beulich's patch to do similar improved scaling.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1301214635 -3600
# Node ID 0bc1c4746c8939337f693a513fd837fc03477db1
# Parent 48dac730a93b27ff60a340564e9a7afd7f9385f4
x86_32: Fix _raw_read_trylock() build on some gcc versions.
Was broken by 23099:612171ff82ea.
A bool_t is a single byte, and needs a 'q' register constraint. Avoid
the whole issue by changing the variable to an int, and explicitly
specify the operand suffix as 'l' for good measure.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/common/spinlock.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/spinlock.c
+++ xen-4.1.2-testing/xen/common/spinlock.c
@@ -234,7 +234,11 @@ void _spin_unlock_recursive(spinlock_t *
void _read_lock(rwlock_t *lock)
{
check_lock(&lock->debug);
- _raw_read_lock(&lock->raw);
+ while ( unlikely(!_raw_read_trylock(&lock->raw)) )
+ {
+ while ( likely(_raw_rw_is_write_locked(&lock->raw)) )
+ cpu_relax();
+ }
preempt_disable();
}
@@ -243,7 +247,13 @@ void _read_lock_irq(rwlock_t *lock)
ASSERT(local_irq_is_enabled());
local_irq_disable();
check_lock(&lock->debug);
- _raw_read_lock(&lock->raw);
+ while ( unlikely(!_raw_read_trylock(&lock->raw)) )
+ {
+ local_irq_enable();
+ while ( likely(_raw_rw_is_write_locked(&lock->raw)) )
+ cpu_relax();
+ local_irq_disable();
+ }
preempt_disable();
}
@@ -252,11 +262,26 @@ unsigned long _read_lock_irqsave(rwlock_
unsigned long flags;
local_irq_save(flags);
check_lock(&lock->debug);
- _raw_read_lock(&lock->raw);
+ while ( unlikely(!_raw_read_trylock(&lock->raw)) )
+ {
+ local_irq_restore(flags);
+ while ( likely(_raw_rw_is_write_locked(&lock->raw)) )
+ cpu_relax();
+ local_irq_save(flags);
+ }
preempt_disable();
return flags;
}
+int _read_trylock(rwlock_t *lock)
+{
+ check_lock(&lock->debug);
+ if ( !_raw_read_trylock(&lock->raw) )
+ return 0;
+ preempt_disable();
+ return 1;
+}
+
void _read_unlock(rwlock_t *lock)
{
preempt_enable();
@@ -280,7 +305,11 @@ void _read_unlock_irqrestore(rwlock_t *l
void _write_lock(rwlock_t *lock)
{
check_lock(&lock->debug);
- _raw_write_lock(&lock->raw);
+ while ( unlikely(!_raw_write_trylock(&lock->raw)) )
+ {
+ while ( likely(_raw_rw_is_locked(&lock->raw)) )
+ cpu_relax();
+ }
preempt_disable();
}
@@ -289,7 +318,13 @@ void _write_lock_irq(rwlock_t *lock)
ASSERT(local_irq_is_enabled());
local_irq_disable();
check_lock(&lock->debug);
- _raw_write_lock(&lock->raw);
+ while ( unlikely(!_raw_write_trylock(&lock->raw)) )
+ {
+ local_irq_enable();
+ while ( likely(_raw_rw_is_locked(&lock->raw)) )
+ cpu_relax();
+ local_irq_disable();
+ }
preempt_disable();
}
@@ -298,7 +333,13 @@ unsigned long _write_lock_irqsave(rwlock
unsigned long flags;
local_irq_save(flags);
check_lock(&lock->debug);
- _raw_write_lock(&lock->raw);
+ while ( unlikely(!_raw_write_trylock(&lock->raw)) )
+ {
+ local_irq_restore(flags);
+ while ( likely(_raw_rw_is_locked(&lock->raw)) )
+ cpu_relax();
+ local_irq_save(flags);
+ }
preempt_disable();
return flags;
}
Index: xen-4.1.2-testing/xen/include/asm-ia64/linux-xen/asm/spinlock.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-ia64/linux-xen/asm/spinlock.h
+++ xen-4.1.2-testing/xen/include/asm-ia64/linux-xen/asm/spinlock.h
@@ -35,17 +35,6 @@ typedef struct {
} raw_rwlock_t;
#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { 0, 0 }
-#define _raw_read_lock(rw)
\
-do {
\
- raw_rwlock_t *__read_lock_ptr = (rw);
\
-
\
- while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) {
\
- ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);
\
- while (*(volatile int *)__read_lock_ptr < 0)
\
- cpu_relax();
\
- }
\
-} while (0)
-
#define _raw_read_unlock(rw) \
do { \
raw_rwlock_t *__read_lock_ptr = (rw); \
@@ -53,20 +42,6 @@ do {
\
} while (0)
#ifdef ASM_SUPPORTED
-#define _raw_write_lock(rw)
\
-do {
\
- __asm__ __volatile__ (
\
- "mov ar.ccv = r0\n"
\
- "dep r29 = -1, r0, 31, 1;;\n"
\
- "1:\n"
\
- "ld4 r2 = [%0];;\n"
\
- "cmp4.eq p0,p7 = r0,r2\n"
\
- "(p7) br.cond.spnt.few 1b \n"
\
- "cmpxchg4.acq r2 = [%0], r29, ar.ccv;;\n"
\
- "cmp4.eq p0,p7 = r0, r2\n"
\
- "(p7) br.cond.spnt.few 1b;;\n"
\
- :: "r"(rw) : "ar.ccv", "p7", "r2", "r29", "memory");
\
-} while(0)
#define _raw_write_trylock(rw)
\
({
\
@@ -82,16 +57,6 @@ do {
\
#else /* !ASM_SUPPORTED */
-#define _raw_write_lock(l)
\
-({
\
- __u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1);
\
- __u32 *ia64_write_lock_ptr = (__u32 *) (l);
\
- do {
\
- while (*ia64_write_lock_ptr)
\
- ia64_barrier();
\
- ia64_val = ia64_cmpxchg4_acq(ia64_write_lock_ptr, ia64_set_val,
0); \
- } while (ia64_val);
\
-})
#define _raw_write_trylock(rw) \
({ \
Index: xen-4.1.2-testing/xen/include/asm-x86/spinlock.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/spinlock.h
+++ xen-4.1.2-testing/xen/include/asm-x86/spinlock.h
@@ -35,51 +35,29 @@ typedef struct {
volatile int lock;
} raw_rwlock_t;
-#define RW_LOCK_BIAS 0x01000000
-#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { RW_LOCK_BIAS }
+#define RW_WRITE_BIAS 0x7fffffff
+#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { 0 }
-static always_inline void _raw_read_lock(raw_rwlock_t *rw)
+static always_inline int _raw_read_trylock(raw_rwlock_t *rw)
{
- asm volatile (
- "1: lock; decl %0 \n"
- " jns 3f \n"
- " lock; incl %0 \n"
- "2: rep; nop \n"
- " cmpl $1,%0 \n"
- " js 2b \n"
- " jmp 1b \n"
- "3:"
- : "=m" (rw->lock) : : "memory" );
-}
+ int acquired;
-static always_inline void _raw_write_lock(raw_rwlock_t *rw)
-{
asm volatile (
- "1: lock; subl %1,%0 \n"
- " jz 3f \n"
- " lock; addl %1,%0 \n"
- "2: rep; nop \n"
- " cmpl %1,%0 \n"
- " jne 2b \n"
+ " lock; decl %0 \n"
+ " jns 2f \n"
+ "1: .subsection 1 \n"
+ "2: lock; incl %0 \n"
+ " decl %1 \n"
" jmp 1b \n"
- "3:"
- : "=m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory" );
+ " .subsection 0 \n"
+ : "=m" (rw->lock), "=r" (acquired) : "1" (1) : "memory" );
+
+ return acquired;
}
static always_inline int _raw_write_trylock(raw_rwlock_t *rw)
{
- int rc;
-
- asm volatile (
- " lock; subl %2,%0 \n"
- " jz 1f \n"
- " lock; addl %2,%0 \n"
- " dec %1 \n"
- "1:"
- : "=m" (rw->lock), "=r" (rc) : "i" (RW_LOCK_BIAS), "1" (1)
- : "memory" );
-
- return rc;
+ return (cmpxchg(&rw->lock, 0, RW_WRITE_BIAS) == 0);
}
static always_inline void _raw_read_unlock(raw_rwlock_t *rw)
@@ -92,11 +70,11 @@ static always_inline void _raw_read_unlo
static always_inline void _raw_write_unlock(raw_rwlock_t *rw)
{
asm volatile (
- "lock ; addl %1,%0"
- : "=m" ((rw)->lock) : "i" (RW_LOCK_BIAS) : "memory" );
+ "lock ; subl %1,%0"
+ : "=m" ((rw)->lock) : "i" (RW_WRITE_BIAS) : "memory" );
}
-#define _raw_rw_is_locked(x) ((x)->lock < RW_LOCK_BIAS)
-#define _raw_rw_is_write_locked(x) ((x)->lock <= 0)
+#define _raw_rw_is_locked(x) ((x)->lock != 0)
+#define _raw_rw_is_write_locked(x) ((x)->lock > 0)
#endif /* __ASM_SPINLOCK_H */
Index: xen-4.1.2-testing/xen/include/xen/spinlock.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/spinlock.h
+++ xen-4.1.2-testing/xen/include/xen/spinlock.h
@@ -157,6 +157,7 @@ unsigned long _read_lock_irqsave(rwlock_
void _read_unlock(rwlock_t *lock);
void _read_unlock_irq(rwlock_t *lock);
void _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags);
+int _read_trylock(rwlock_t *lock);
void _write_lock(rwlock_t *lock);
void _write_lock_irq(rwlock_t *lock);
@@ -210,6 +211,7 @@ int _rw_is_write_locked(rwlock_t *lock);
#define read_unlock(l) _read_unlock(l)
#define read_unlock_irq(l) _read_unlock_irq(l)
#define read_unlock_irqrestore(l, f) _read_unlock_irqrestore(l, f)
+#define read_trylock(l) _read_trylock(l)
#define write_lock(l) _write_lock(l)
#define write_lock_irq(l) _write_lock_irq(l)
++++++ 23103-x86-pirq-guest-eoi-check.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1301132521 0
# Node ID 48dac730a93b27ff60a340564e9a7afd7f9385f4
# Parent 8f001d864fefac689b7662bc9979eaddf4fd6e9c
x86: __pirq_guest_eoi() must check it is called for a fully
guest-bound irq before accessing desc->action.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/irq.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/irq.c
+++ xen-4.1.2-testing/xen/arch/x86/irq.c
@@ -1033,6 +1033,12 @@ static void __pirq_guest_eoi(struct doma
return;
}
+ if ( !(desc->status & IRQ_GUEST) )
+ {
+ spin_unlock_irq(&desc->lock);
+ return;
+ }
+
action = (irq_guest_action_t *)desc->action;
irq = desc - irq_desc;
++++++ 23127-vtd-bios-settings.patch ++++++
# HG changeset patch
# User Allen Kay <allen.m.kay@xxxxxxxxx>
# Date 1301755765 -3600
# Node ID 1046830079376a4b29fcad0cd037a834e808ed06
# Parent 89c23f58aa986092da0c9a7dfac1c41befbe1f3f
[VTD] check BIOS settings before enabling interrupt remapping or x2apic
Check flags field in ACPI DMAR structure before enabling interrupt
remapping or x2apic. This allows platform vendors to disable
interrupt remapping or x2apic features if on board BIOS does not
support them.
Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx>
# HG changeset patch
# User Allen Kay <allen.m.kay@xxxxxxxxx>
# Date 1302077462 -3600
# Node ID c7916d6f4dfba9d6c7eeb0fc2796068d75e2fb4a
# Parent 42fa70e0761bbb0596618ca5323664f31a2faa76
[VTD] Fixes to ACPI DMAR flag checks.
* platform_supports_{intremap,x2apic} should not be marked __init as
they are used during S3 resume.
* DMAR flags should be taken from the table passed to
acpi_parse_dmar() -- this is the trusted copy of the DMAR, when
running in TXT mode.
Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/apic.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/apic.c
+++ xen-4.1.2-testing/xen/arch/x86/apic.c
@@ -566,7 +566,7 @@ static void resume_x2apic(void)
mask_8259A();
mask_IO_APIC_setup(ioapic_entries);
- iommu_enable_IR();
+ iommu_enable_x2apic_IR();
__enable_x2apic();
restore_IO_APIC_setup(ioapic_entries);
@@ -783,7 +783,7 @@ int lapic_suspend(void)
local_irq_save(flags);
disable_local_APIC();
- iommu_disable_IR();
+ iommu_disable_x2apic_IR();
local_irq_restore(flags);
return 0;
}
@@ -1029,7 +1029,7 @@ void __init x2apic_bsp_setup(void)
mask_8259A();
mask_IO_APIC_setup(ioapic_entries);
- if ( iommu_enable_IR() )
+ if ( iommu_enable_x2apic_IR() )
{
if ( x2apic_enabled )
panic("Interrupt remapping could not be enabled while "
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/dmar.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/dmar.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/dmar.c
@@ -46,6 +46,7 @@ LIST_HEAD(acpi_rmrr_units);
LIST_HEAD(acpi_atsr_units);
LIST_HEAD(acpi_rhsa_units);
+static int __read_mostly dmar_flags;
static u64 igd_drhd_address;
u8 dmar_host_address_width;
@@ -684,6 +685,7 @@ static int __init acpi_parse_dmar(struct
int ret = 0;
dmar = (struct acpi_table_dmar *)table;
+ dmar_flags = dmar->flags;
if ( !iommu_enabled )
{
@@ -804,3 +806,22 @@ void acpi_dmar_zap(void)
dmar_table->signature[0] = 'X';
dmar_table->checksum -= 'X'-'D';
}
+
+int platform_supports_intremap(void)
+{
+ unsigned int flags = 0;
+
+ flags = DMAR_INTR_REMAP;
+ return ((dmar_flags & flags) == DMAR_INTR_REMAP);
+}
+
+int platform_supports_x2apic(void)
+{
+ unsigned int flags = 0;
+
+ if (!cpu_has_x2apic)
+ return 0;
+
+ flags = DMAR_INTR_REMAP | DMAR_X2APIC_OPT_OUT;
+ return ((dmar_flags & flags) == DMAR_INTR_REMAP);
+}
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/extern.h
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/extern.h
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/extern.h
@@ -87,5 +87,7 @@ void vtd_ops_preamble_quirk(struct iommu
void vtd_ops_postamble_quirk(struct iommu* iommu);
void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map);
void pci_vtd_quirk(struct pci_dev *pdev);
+int platform_supports_intremap(void);
+int platform_supports_x2apic(void);
#endif // _VTD_EXTERN_H_
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/intremap.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/intremap.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/intremap.c
@@ -741,6 +741,13 @@ int enable_intremap(struct iommu *iommu,
ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
+ if ( !platform_supports_intremap() )
+ {
+ dprintk(XENLOG_ERR VTDPREFIX,
+ "Platform firmware does not support interrupt remapping\n");
+ return -EINVAL;
+ }
+
ir_ctrl = iommu_ir_ctrl(iommu);
sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
@@ -847,10 +854,10 @@ out:
}
/*
- * This function is used to enable Interrutp remapping when
+ * This function is used to enable Interrupt remapping when
* enable x2apic
*/
-int iommu_enable_IR(void)
+int iommu_enable_x2apic_IR(void)
{
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
@@ -858,6 +865,9 @@ int iommu_enable_IR(void)
if ( !iommu_supports_eim() )
return -1;
+ if ( !platform_supports_x2apic() )
+ return -1;
+
for_each_drhd_unit ( drhd )
{
struct qi_ctrl *qi_ctrl = NULL;
@@ -907,7 +917,7 @@ int iommu_enable_IR(void)
* This function is used to disable Interrutp remapping when
* suspend local apic
*/
-void iommu_disable_IR(void)
+void iommu_disable_x2apic_IR(void)
{
struct acpi_drhd_unit *drhd;
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/iommu.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.c
@@ -2006,7 +2006,7 @@ static int init_vtd_hw(void)
{
iommu_intremap = 0;
dprintk(XENLOG_WARNING VTDPREFIX,
- "Failed to enable Interrupt Remapping!\n");
+ "Interrupt Remapping not enabled\n");
break;
}
}
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.h
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/iommu.h
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.h
@@ -22,6 +22,10 @@
#include <xen/types.h>
+/* DMAR Flags bits */
+#define DMAR_INTR_REMAP 0x1
+#define DMAR_X2APIC_OPT_OUT 0x2
+
/*
* Intel IOMMU register specification per version 1.0 public spec.
*/
Index: xen-4.1.2-testing/xen/include/xen/iommu.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/iommu.h
+++ xen-4.1.2-testing/xen/include/xen/iommu.h
@@ -66,8 +66,8 @@ struct iommu {
int iommu_setup(void);
int iommu_supports_eim(void);
-int iommu_enable_IR(void);
-void iommu_disable_IR(void);
+int iommu_enable_x2apic_IR(void);
+void iommu_disable_x2apic_IR(void);
int iommu_add_device(struct pci_dev *pdev);
int iommu_remove_device(struct pci_dev *pdev);
++++++ 23128-xentrace_correct_formula_to_calculate_t_info_pages.patch ++++++
changeset: 23128:4a335f1000ea
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sat Apr 02 15:50:19 2011 +0100
files: xen/common/trace.c
description:
xentrace: correct formula to calculate t_info_pages
The current formula to calculate t_info_pages, based on the initial
code, is slightly incorrect. It may allocate more than needed.
Each cpu has some pages/mfns stored as uint32_t.
That list is stored with an offset at tinfo.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -110,7 +110,7 @@ static int calculate_tbuf_size(unsigned
{
struct t_buf dummy;
typeof(dummy.prod) size;
- unsigned int t_info_words, t_info_bytes;
+ unsigned int t_info_words;
/* force maximum value for an unsigned type */
size = -1;
@@ -125,9 +125,8 @@ static int calculate_tbuf_size(unsigned
pages = size;
}
- t_info_words = num_online_cpus() * pages + t_info_first_offset;
- t_info_bytes = t_info_words * sizeof(uint32_t);
- t_info_pages = PFN_UP(t_info_bytes);
+ t_info_words = num_online_cpus() * pages * sizeof(uint32_t);
+ t_info_pages = PFN_UP(t_info_first_offset + t_info_words);
printk(XENLOG_INFO "xentrace: requesting %u t_info pages "
"for %u trace pages on %u cpus\n",
t_info_pages, pages, num_online_cpus());
++++++ 23129-xentrace_remove_unneeded_debug_printk.patch ++++++
changeset: 23129:219ba19aedeb
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sat Apr 02 15:50:47 2011 +0100
files: xen/common/trace.c
description:
xentrace: remove unneeded debug printk
The pointer value in case of an allocation failure is rather
uninteresting.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 1 -
1 file changed, 1 deletion(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -238,7 +238,6 @@ out_dealloc:
{
void *rawbuf = per_cpu(t_bufs, cpu);
per_cpu(t_bufs, cpu) = NULL;
- printk(XENLOG_DEBUG "xentrace: cpu %d p %p\n", cpu, rawbuf);
if ( rawbuf )
{
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
++++++
23173-xentrace_Move_register_cpu_notifier_call_into_boot-time_init..patch ++++++
changeset: 23173:94cef9aaf0cd
user: Keir Fraser <keir@xxxxxxx>
date: Wed Apr 06 15:52:50 2011 +0100
files: xen/common/trace.c
description:
xentrace: Move register_cpu_notifier() call into boot-time init.
We can't do it lazily from alloc_trace_bufs() as that gets called
later if tracing is enabled later by dom0.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/trace.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -225,8 +225,6 @@ static int alloc_trace_bufs(unsigned int
t_buf_highwater = data_size >> 1; /* 50% high water */
opt_tbuf_size = pages;
- register_cpu_notifier(&cpu_nfb);
-
printk("xentrace: initialised\n");
wmb(); /* above must be visible before tb_init_done flag set */
tb_init_done = 1;
@@ -309,6 +307,8 @@ int trace_will_trace_event(u32 event)
*/
void __init init_trace_bufs(void)
{
+ register_cpu_notifier(&cpu_nfb);
+
if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) )
{
printk(XENLOG_INFO "xentrace: allocation size %d failed, disabling\n",
++++++ 23199-amd-iommu-unmapped-intr-fault.patch ++++++
# HG changeset patch
# User Wei Wang <wei.wang2@xxxxxxx>
# Date 1302610857 -3600
# Node ID dbd98ab2f87facba8117bb881fa2ea5dfdb92960
# Parent 697ac895c11c6d5d82524de56796cee98fded2a5
amd iommu: Unmapped interrupt should generate IO page faults.
This helps us to debug interrupt issues.
Signed-off-by: Wei Wang <wei.wang2@xxxxxxx>
Index: xen-4.1.2-testing/xen/drivers/passthrough/amd/iommu_map.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/amd/iommu_map.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/amd/iommu_map.c
@@ -327,8 +327,9 @@ void amd_iommu_set_intremap_table(u32 *d
set_field_in_reg_u32(0xB, entry,
IOMMU_DEV_TABLE_INT_TABLE_LENGTH_MASK,
IOMMU_DEV_TABLE_INT_TABLE_LENGTH_SHIFT, &entry);
- /* ignore unmapped interrupts */
- set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+
+ /* unmapped interrupt results io page faults*/
+ set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_MASK,
IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_SHIFT, &entry);
set_field_in_reg_u32(int_valid ? IOMMU_CONTROL_ENABLED :
++++++ 23233-hvm-cr-access.patch ++++++
References: FATE#309900
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1303116432 -3600
# Node ID 1276926e3795b11ef6ac2f59df900d8e0ba9f54b
# Parent 07d832ad23021445bc56fafaeb2843c94d868005
vmx/hvm: move mov-cr handling functions to generic HVM code
Currently the handling of CR accesses intercepts is done much
differently in SVM and VMX. For future usage move the VMX part
into the generic HVM path and use the exported functions.
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
@@ -1298,6 +1298,86 @@ static void hvm_set_uc_mode(struct vcpu
return hvm_funcs.set_uc_mode(v);
}
+int hvm_mov_to_cr(unsigned int cr, unsigned int gpr)
+{
+ struct vcpu *curr = current;
+ unsigned long val, *reg;
+
+ if ( (reg = get_x86_gpr(guest_cpu_user_regs(), gpr)) == NULL )
+ {
+ gdprintk(XENLOG_ERR, "invalid gpr: %u\n", gpr);
+ goto exit_and_crash;
+ }
+
+ val = *reg;
+ HVMTRACE_LONG_2D(CR_WRITE, cr, TRC_PAR_LONG(val));
+ HVM_DBG_LOG(DBG_LEVEL_1, "CR%u, value = %lx", cr, val);
+
+ switch ( cr )
+ {
+ case 0:
+ return hvm_set_cr0(val);
+
+ case 3:
+ return hvm_set_cr3(val);
+
+ case 4:
+ return hvm_set_cr4(val);
+
+ case 8:
+ vlapic_set_reg(vcpu_vlapic(curr), APIC_TASKPRI, ((val & 0x0f) << 4));
+ break;
+
+ default:
+ gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
+ goto exit_and_crash;
+ }
+
+ return X86EMUL_OKAY;
+
+ exit_and_crash:
+ domain_crash(curr->domain);
+ return X86EMUL_UNHANDLEABLE;
+}
+
+int hvm_mov_from_cr(unsigned int cr, unsigned int gpr)
+{
+ struct vcpu *curr = current;
+ unsigned long val = 0, *reg;
+
+ if ( (reg = get_x86_gpr(guest_cpu_user_regs(), gpr)) == NULL )
+ {
+ gdprintk(XENLOG_ERR, "invalid gpr: %u\n", gpr);
+ goto exit_and_crash;
+ }
+
+ switch ( cr )
+ {
+ case 0:
+ case 2:
+ case 3:
+ case 4:
+ val = curr->arch.hvm_vcpu.guest_cr[cr];
+ break;
+ case 8:
+ val = (vlapic_get_reg(vcpu_vlapic(curr), APIC_TASKPRI) & 0xf0) >> 4;
+ break;
+ default:
+ gdprintk(XENLOG_ERR, "invalid cr: %u\n", cr);
+ goto exit_and_crash;
+ }
+
+ *reg = val;
+ HVMTRACE_LONG_2D(CR_READ, cr, TRC_PAR_LONG(val));
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%u, value = %lx", cr, val);
+
+ return X86EMUL_OKAY;
+
+ exit_and_crash:
+ domain_crash(curr->domain);
+ return X86EMUL_UNHANDLEABLE;
+}
+
int hvm_set_cr0(unsigned long value)
{
struct vcpu *v = current;
Index: xen-4.1.2-testing/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/vmx/vmx.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/vmx/vmx.c
@@ -1545,182 +1545,42 @@ static void vmx_invlpg_intercept(unsigne
vpid_sync_vcpu_gva(curr, vaddr);
}
-#define CASE_SET_REG(REG, reg) \
- case VMX_CONTROL_REG_ACCESS_GPR_ ## REG: regs->reg = value; break
-#define CASE_GET_REG(REG, reg) \
- case VMX_CONTROL_REG_ACCESS_GPR_ ## REG: value = regs->reg; break
-
-#define CASE_EXTEND_SET_REG \
- CASE_EXTEND_REG(S)
-#define CASE_EXTEND_GET_REG \
- CASE_EXTEND_REG(G)
-
-#ifdef __i386__
-#define CASE_EXTEND_REG(T)
-#else
-#define CASE_EXTEND_REG(T) \
- CASE_ ## T ## ET_REG(R8, r8); \
- CASE_ ## T ## ET_REG(R9, r9); \
- CASE_ ## T ## ET_REG(R10, r10); \
- CASE_ ## T ## ET_REG(R11, r11); \
- CASE_ ## T ## ET_REG(R12, r12); \
- CASE_ ## T ## ET_REG(R13, r13); \
- CASE_ ## T ## ET_REG(R14, r14); \
- CASE_ ## T ## ET_REG(R15, r15)
-#endif
-
-static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
-{
- unsigned long value;
- struct vcpu *v = current;
- struct vlapic *vlapic = vcpu_vlapic(v);
- int rc = 0;
- unsigned long old;
-
- switch ( gp )
- {
- CASE_GET_REG(EAX, eax);
- CASE_GET_REG(ECX, ecx);
- CASE_GET_REG(EDX, edx);
- CASE_GET_REG(EBX, ebx);
- CASE_GET_REG(EBP, ebp);
- CASE_GET_REG(ESI, esi);
- CASE_GET_REG(EDI, edi);
- CASE_GET_REG(ESP, esp);
- CASE_EXTEND_GET_REG;
- default:
- gdprintk(XENLOG_ERR, "invalid gp: %d\n", gp);
- goto exit_and_crash;
- }
-
- HVMTRACE_LONG_2D(CR_WRITE, cr, TRC_PAR_LONG(value));
-
- HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value);
-
- switch ( cr )
- {
- case 0:
- old = v->arch.hvm_vcpu.guest_cr[0];
- rc = !hvm_set_cr0(value);
- if (rc)
- hvm_memory_event_cr0(value, old);
- return rc;
-
- case 3:
- old = v->arch.hvm_vcpu.guest_cr[3];
- rc = !hvm_set_cr3(value);
- if (rc)
- hvm_memory_event_cr3(value, old);
- return rc;
-
- case 4:
- old = v->arch.hvm_vcpu.guest_cr[4];
- rc = !hvm_set_cr4(value);
- if (rc)
- hvm_memory_event_cr4(value, old);
- return rc;
-
- case 8:
- vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
- break;
-
- default:
- gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
- goto exit_and_crash;
- }
-
- return 1;
-
- exit_and_crash:
- domain_crash(v->domain);
- return 0;
-}
-
-/*
- * Read from control registers. CR0 and CR4 are read from the shadow.
- */
-static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
+static int vmx_cr_access(unsigned long exit_qualification)
{
- unsigned long value = 0;
- struct vcpu *v = current;
- struct vlapic *vlapic = vcpu_vlapic(v);
-
- switch ( cr )
- {
- case 3:
- value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
- break;
- case 8:
- value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
- value = (value & 0xF0) >> 4;
- break;
- default:
- gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
- domain_crash(v->domain);
- break;
- }
-
- switch ( gp ) {
- CASE_SET_REG(EAX, eax);
- CASE_SET_REG(ECX, ecx);
- CASE_SET_REG(EDX, edx);
- CASE_SET_REG(EBX, ebx);
- CASE_SET_REG(EBP, ebp);
- CASE_SET_REG(ESI, esi);
- CASE_SET_REG(EDI, edi);
- CASE_SET_REG(ESP, esp);
- CASE_EXTEND_SET_REG;
- default:
- printk("invalid gp: %d\n", gp);
- domain_crash(v->domain);
- break;
- }
-
- HVMTRACE_LONG_2D(CR_READ, cr, TRC_PAR_LONG(value));
-
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value);
-}
-
-static int vmx_cr_access(unsigned long exit_qualification,
- struct cpu_user_regs *regs)
-{
- unsigned int gp, cr;
- unsigned long value;
- struct vcpu *v = current;
+ struct vcpu *curr = current;
- switch ( exit_qualification & VMX_CONTROL_REG_ACCESS_TYPE )
+ switch ( VMX_CONTROL_REG_ACCESS_TYPE(exit_qualification) )
{
- case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR:
- gp = exit_qualification & VMX_CONTROL_REG_ACCESS_GPR;
- cr = exit_qualification & VMX_CONTROL_REG_ACCESS_NUM;
- return mov_to_cr(gp, cr, regs);
- case VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR:
- gp = exit_qualification & VMX_CONTROL_REG_ACCESS_GPR;
- cr = exit_qualification & VMX_CONTROL_REG_ACCESS_NUM;
- mov_from_cr(cr, gp, regs);
- break;
- case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
- {
- unsigned long old = v->arch.hvm_vcpu.guest_cr[0];
- v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
- vmx_update_guest_cr(v, 0);
-
- hvm_memory_event_cr0(v->arch.hvm_vcpu.guest_cr[0], old);
-
+ case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR: {
+ unsigned long gp = VMX_CONTROL_REG_ACCESS_GPR(exit_qualification);
+ unsigned long cr = VMX_CONTROL_REG_ACCESS_NUM(exit_qualification);
+ return hvm_mov_to_cr(cr, gp);
+ }
+ case VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR: {
+ unsigned long gp = VMX_CONTROL_REG_ACCESS_GPR(exit_qualification);
+ unsigned long cr = VMX_CONTROL_REG_ACCESS_NUM(exit_qualification);
+ return hvm_mov_from_cr(cr, gp);
+ }
+ case VMX_CONTROL_REG_ACCESS_TYPE_CLTS: {
+ unsigned long old = curr->arch.hvm_vcpu.guest_cr[0];
+ curr->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
+ vmx_update_guest_cr(curr, 0);
+ hvm_memory_event_cr0(curr->arch.hvm_vcpu.guest_cr[0], old);
HVMTRACE_0D(CLTS);
break;
}
- case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
- value = v->arch.hvm_vcpu.guest_cr[0];
+ case VMX_CONTROL_REG_ACCESS_TYPE_LMSW: {
+ unsigned long value = curr->arch.hvm_vcpu.guest_cr[0];
/* LMSW can: (1) set bits 0-3; (2) clear bits 1-3. */
value = (value & ~0xe) | ((exit_qualification >> 16) & 0xf);
HVMTRACE_LONG_1D(LMSW, value);
- return !hvm_set_cr0(value);
+ return hvm_set_cr0(value);
+ }
default:
BUG();
}
- return 1;
+ return X86EMUL_OKAY;
}
static const struct lbr_info {
@@ -2525,7 +2385,7 @@ asmlinkage void vmx_vmexit_handler(struc
case EXIT_REASON_CR_ACCESS:
{
exit_qualification = __vmread(EXIT_QUALIFICATION);
- if ( vmx_cr_access(exit_qualification, regs) )
+ if ( vmx_cr_access(exit_qualification) == X86EMUL_OKAY )
update_guest_eip(); /* Safe: MOV Cn, LMSW, CLTS */
break;
}
Index: xen-4.1.2-testing/xen/arch/x86/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/traps.c
@@ -368,6 +368,36 @@ void vcpu_show_execution_state(struct vc
vcpu_unpause(v);
}
+unsigned long *get_x86_gpr(struct cpu_user_regs *regs, unsigned int modrm_reg)
+{
+ void *p;
+
+ switch ( modrm_reg )
+ {
+ case 0: p = ®s->eax; break;
+ case 1: p = ®s->ecx; break;
+ case 2: p = ®s->edx; break;
+ case 3: p = ®s->ebx; break;
+ case 4: p = ®s->esp; break;
+ case 5: p = ®s->ebp; break;
+ case 6: p = ®s->esi; break;
+ case 7: p = ®s->edi; break;
+#if defined(__x86_64__)
+ case 8: p = ®s->r8; break;
+ case 9: p = ®s->r9; break;
+ case 10: p = ®s->r10; break;
+ case 11: p = ®s->r11; break;
+ case 12: p = ®s->r12; break;
+ case 13: p = ®s->r13; break;
+ case 14: p = ®s->r14; break;
+ case 15: p = ®s->r15; break;
+#endif
+ default: p = NULL; break;
+ }
+
+ return p;
+}
+
static char *trapstr(int trapnr)
{
static char *strings[] = {
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/support.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/support.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/support.h
@@ -137,5 +137,7 @@ int hvm_set_cr3(unsigned long value);
int hvm_set_cr4(unsigned long value);
int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content);
int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content);
+int hvm_mov_to_cr(unsigned int cr, unsigned int gpr);
+int hvm_mov_from_cr(unsigned int cr, unsigned int gpr);
#endif /* __ASM_X86_HVM_SUPPORT_H__ */
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/vmx/vmx.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/vmx/vmx.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -144,31 +144,15 @@ void vmx_update_cpu_exec_control(struct
* Exit Qualifications for MOV for Control Register Access
*/
/* 3:0 - control register number (CRn) */
-#define VMX_CONTROL_REG_ACCESS_NUM 0xf
+#define VMX_CONTROL_REG_ACCESS_NUM(eq) ((eq) & 0xf)
/* 5:4 - access type (CR write, CR read, CLTS, LMSW) */
-#define VMX_CONTROL_REG_ACCESS_TYPE 0x30
+#define VMX_CONTROL_REG_ACCESS_TYPE(eq) (((eq) >> 4) & 0x3)
+# define VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR 0
+# define VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR 1
+# define VMX_CONTROL_REG_ACCESS_TYPE_CLTS 2
+# define VMX_CONTROL_REG_ACCESS_TYPE_LMSW 3
/* 10:8 - general purpose register operand */
-#define VMX_CONTROL_REG_ACCESS_GPR 0xf00
-#define VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR (0 << 4)
-#define VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR (1 << 4)
-#define VMX_CONTROL_REG_ACCESS_TYPE_CLTS (2 << 4)
-#define VMX_CONTROL_REG_ACCESS_TYPE_LMSW (3 << 4)
-#define VMX_CONTROL_REG_ACCESS_GPR_EAX (0 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_ECX (1 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_EDX (2 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_EBX (3 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_ESP (4 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_EBP (5 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_ESI (6 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_EDI (7 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R8 (8 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R9 (9 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R10 (10 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R11 (11 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R12 (12 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R13 (13 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R14 (14 << 8)
-#define VMX_CONTROL_REG_ACCESS_GPR_R15 (15 << 8)
+#define VMX_CONTROL_REG_ACCESS_GPR(eq) (((eq) >> 8) & 0xf)
/*
* Access Rights
Index: xen-4.1.2-testing/xen/include/asm-x86/processor.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/processor.h
+++ xen-4.1.2-testing/xen/include/asm-x86/processor.h
@@ -592,6 +592,8 @@ int wrmsr_hypervisor_regs(uint32_t idx,
int microcode_update(XEN_GUEST_HANDLE(const_void), unsigned long len);
int microcode_resume_cpu(int cpu);
+unsigned long *get_x86_gpr(struct cpu_user_regs *regs, unsigned int modrm_reg);
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_X86_PROCESSOR_H */
++++++ 23234-svm-decode-assist-base.patch ++++++
References: FATE#309900
# HG changeset patch
# User Andre Przywara <andre.przywara@xxxxxxx>
# Date 1303116553 -3600
# Node ID bf7afd48339a18cd86d89337f3c055045fb78d3b
# Parent 1276926e3795b11ef6ac2f59df900d8e0ba9f54b
svm: add bit definitions for SVM DecodeAssist
Chapter 15.33 of recent APM Vol.2 manuals describe some additions
to SVM called DecodeAssist. Add the newly added fields to the VMCB
structure and name the associated CPUID bit.
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1303117802 -3600
# Node ID 381ab77db71a4739b8a4f4fdad4ef3504999f998
# Parent e324c4d1dd6eeb9417fec513640ca795bd0f5dd4
svm: decode-assists feature must depend on nextrip feature.
...since the decode-assist fast paths assume nextrip vmcb field is
valid.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -928,11 +928,16 @@ struct hvm_function_table * __init start
printk("SVM: Supported advanced features:\n");
+ /* DecodeAssists fast paths assume nextrip is valid for fast rIP update. */
+ if ( !cpu_has_svm_nrips )
+ clear_bit(SVM_FEATURE_DECODEASSISTS, &svm_feature_flags);
+
#define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; }
P(cpu_has_svm_npt, "Nested Page Tables (NPT)");
P(cpu_has_svm_lbrv, "Last Branch Record (LBR) Virtualisation");
P(cpu_has_svm_nrips, "Next-RIP Saved on #VMEXIT");
P(cpu_has_svm_cleanbits, "VMCB Clean Bits");
+ P(cpu_has_svm_decode, "DecodeAssists");
P(cpu_has_pause_filter, "Pause-Intercept Filter");
#undef P
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/svm.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
@@ -80,6 +80,7 @@ extern u32 svm_feature_flags;
#define cpu_has_svm_svml cpu_has_svm_feature(SVM_FEATURE_SVML)
#define cpu_has_svm_nrips cpu_has_svm_feature(SVM_FEATURE_NRIPS)
#define cpu_has_svm_cleanbits cpu_has_svm_feature(SVM_FEATURE_VMCBCLEAN)
+#define cpu_has_svm_decode cpu_has_svm_feature(SVM_FEATURE_DECODEASSISTS)
#define cpu_has_pause_filter cpu_has_svm_feature(SVM_FEATURE_PAUSEFILTER)
#endif /* __ASM_X86_HVM_SVM_H__ */
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/vmcb.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/vmcb.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -432,7 +432,9 @@ struct vmcb_struct {
vmcbcleanbits_t cleanbits; /* offset 0xC0 */
u32 res09; /* offset 0xC4 */
u64 nextrip; /* offset 0xC8 */
- u64 res10a[102]; /* offset 0xD0 pad to save area */
+ u8 guest_ins_len; /* offset 0xD0 */
+ u8 guest_ins[15]; /* offset 0xD1 */
+ u64 res10a[100]; /* offset 0xE0 pad to save area */
svm_segment_register_t es; /* offset 1024 - cleanbit 8 */
svm_segment_register_t cs; /* cleanbit 8 */
++++++ 23235-svm-decode-assist-crs.patch ++++++
References: FATE#309900
# HG changeset patch
# User Andre Przywara <andre.przywara@xxxxxxx>
# Date 1303117266 -3600
# Node ID 2c8ad607ece18b4740b9fc4ffe267a0e0893c141
# Parent bf7afd48339a18cd86d89337f3c055045fb78d3b
svm: implement CR access part of DecodeAssist
Newer SVM implementations (Bulldozer) now give the used general
purpose register on a MOV-CR intercept explictly. This avoids
fetching and decoding the instruction from guest's memory and speeds
up some Windows guest, which exercise CR8 quite often.
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -1039,6 +1039,22 @@ static void svm_vmexit_do_cpuid(struct c
__update_guest_eip(regs, inst_len);
}
+static void svm_vmexit_do_cr_access(
+ struct vmcb_struct *vmcb, struct cpu_user_regs *regs)
+{
+ int gp, cr, dir, rc;
+
+ cr = vmcb->exitcode - VMEXIT_CR0_READ;
+ dir = (cr > 15);
+ cr &= 0xf;
+ gp = vmcb->exitinfo1 & 0xf;
+
+ rc = dir ? hvm_mov_to_cr(cr, gp) : hvm_mov_from_cr(cr, gp);
+
+ if ( rc == X86EMUL_OKAY )
+ __update_guest_eip(regs, vmcb->nextrip - vmcb->rip);
+}
+
static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
{
HVMTRACE_0D(DR_WRITE);
@@ -1620,11 +1636,19 @@ asmlinkage void svm_vmexit_handler(struc
int dir = (vmcb->exitinfo1 & 1) ? IOREQ_READ : IOREQ_WRITE;
if ( handle_pio(port, bytes, dir) )
__update_guest_eip(regs, vmcb->exitinfo2 - vmcb->rip);
- break;
}
- /* fallthrough to emulation if a string instruction */
+ else if ( !handle_mmio() )
+ hvm_inject_exception(TRAP_gp_fault, 0, 0);
+ break;
+
case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
+ if ( cpu_has_svm_decode && (vmcb->exitinfo1 & (1ULL << 63)) )
+ svm_vmexit_do_cr_access(vmcb, regs);
+ else if ( !handle_mmio() )
+ hvm_inject_exception(TRAP_gp_fault, 0, 0);
+ break;
+
case VMEXIT_INVLPG:
case VMEXIT_INVLPGA:
if ( !handle_mmio() )
++++++ 23236-svm-decode-assist-invlpg.patch ++++++
References: FATE#309900
# HG changeset patch
# User Christoph Egger <Christoph.Egger@xxxxxxx>
# Date 1302700499 -3600
# Node ID 3b2182100ba2fa5c4a3a450e473717e2300aa8f1
# Parent 2284c79b606ac14ef5c5bc2c1cce62188b5bd9ee
x86/svm/asid: Introduce svm_invlpga()
Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx>
# HG changeset patch
# User Andre Przywara <andre.przywara@xxxxxxx>
# Date 1303117597 -3600
# Node ID e324c4d1dd6eeb9417fec513640ca795bd0f5dd4
# Parent 2c8ad607ece18b4740b9fc4ffe267a0e0893c141
svm: implement INVLPG part of DecodeAssist
Newer SVM implementations (Bulldozer) give the desired address on
a INVLPG intercept explicitly in the EXITINFO1 field of the VMCB.
Use this address to avoid a costly instruction fetch and decode
cycle.
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
# HG changeset patch
# User Christoph Egger <Christoph.Egger@xxxxxxx>
# Date 1305187246 -3600
# Node ID 19d6541c4abec3486c83de76102ec46d7fe22a16
# Parent b6e8e916ed2827fb1329de0de2e23ee5b6b78662
nestedsvm: update rip on invlpga intercept
Fixes endless loop.
Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/emulate.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/emulate.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/emulate.c
@@ -102,6 +102,7 @@ MAKE_INSTR(INT3, 1, 0xcc);
MAKE_INSTR(RDTSC, 2, 0x0f, 0x31);
MAKE_INSTR(PAUSE, 1, 0x90);
MAKE_INSTR(XSETBV, 3, 0x0f, 0x01, 0xd1);
+MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf);
static const u8 *opc_bytes[INSTR_MAX_COUNT] =
{
@@ -116,6 +117,7 @@ static const u8 *opc_bytes[INSTR_MAX_COU
[INSTR_RDTSC] = OPCODE_RDTSC,
[INSTR_PAUSE] = OPCODE_PAUSE,
[INSTR_XSETBV] = OPCODE_XSETBV,
+ [INSTR_INVLPGA] = OPCODE_INVLPGA,
};
static int fetch(struct vcpu *v, u8 *buf, unsigned long addr, int len)
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -1650,11 +1650,22 @@ asmlinkage void svm_vmexit_handler(struc
break;
case VMEXIT_INVLPG:
- case VMEXIT_INVLPGA:
- if ( !handle_mmio() )
+ if ( cpu_has_svm_decode )
+ {
+ svm_invlpg_intercept(vmcb->exitinfo1);
+ __update_guest_eip(regs, vmcb->nextrip - vmcb->rip);
+ }
+ else if ( !handle_mmio() )
hvm_inject_exception(TRAP_gp_fault, 0, 0);
break;
+ case VMEXIT_INVLPGA:
+ if ( (inst_len = __get_instruction_length(v, INSTR_INVLPGA)) == 0 )
+ break;
+ svm_invlpga(regs->eax, v->arch.hvm_vcpu.asid);
+ __update_guest_eip(regs, inst_len);
+ break;
+
case VMEXIT_VMMCALL:
if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
break;
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/asid.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/asid.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/asid.h
@@ -34,10 +34,7 @@ static inline void svm_asid_g_invlpg(str
{
#if 0
/* Optimization? */
- asm volatile (".byte 0x0F,0x01,0xDF \n"
- : /* output */
- : /* input */
- "a" (g_vaddr), "c"(v->arch.hvm_svm.vmcb->guest_asid) );
+ svm_invlpga(g_vaddr, v->arch.hvm_svm.vmcb->guest_asid);
#endif
/* Safe fallback. Take a new ASID. */
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/emulate.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/emulate.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/emulate.h
@@ -33,6 +33,7 @@ enum instruction_index {
INSTR_RDTSC,
INSTR_PAUSE,
INSTR_XSETBV,
+ INSTR_INVLPGA,
INSTR_MAX_COUNT /* Must be last - Number of instructions supported */
};
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/svm.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
@@ -62,6 +62,15 @@ static inline void svm_vmsave(void *vmcb
: : "a" (__pa(vmcb)) : "memory" );
}
+static inline void svm_invlpga(unsigned long vaddr, uint32_t asid)
+{
+ asm volatile (
+ ".byte 0x0f,0x01,0xdf"
+ : /* output */
+ : /* input */
+ "a" (vaddr), "c" (asid));
+}
+
extern u32 svm_feature_flags;
#define SVM_FEATURE_NPT 0 /* Nested page table support */
++++++ 23238-svm-decode-assist-insn-fetch.patch ++++++
References: FATE#309900
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1303130170 -3600
# Node ID 60f5df2afcbbe1e8d8438c2b7b8223d9d2102e06
# Parent 381ab77db71a4739b8a4f4fdad4ef3504999f998
svm: implement instruction fetch part of DecodeAssist (on #PF/#NPF)
Newer SVM implementations (Bulldozer) copy up to 15 bytes from the
instruction stream into the VMCB when a #PF or #NPF exception is
intercepted. This patch makes use of this information if available.
This saves us from a) traversing the guest's page tables, b) mapping
the guest's memory and c) copy the instructions from there into the
hypervisor's address space.
This speeds up #NPF intercepts quite a lot and avoids cache and TLB
trashing.
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/emulate.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/emulate.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/emulate.c
@@ -996,6 +996,8 @@ int hvm_emulate_one(
hvmemul_ctxt->insn_buf_eip = regs->eip;
hvmemul_ctxt->insn_buf_bytes =
+ hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf)
+ ? :
(hvm_virtual_to_linear_addr(
x86_seg_cs, &hvmemul_ctxt->seg_reg[x86_seg_cs],
regs->eip, sizeof(hvmemul_ctxt->insn_buf),
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -603,6 +603,21 @@ static void svm_set_rdtsc_exiting(struct
vmcb_set_general1_intercepts(vmcb, general1_intercepts);
}
+static unsigned int svm_get_insn_bytes(struct vcpu *v, uint8_t *buf)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ unsigned int len = v->arch.hvm_svm.cached_insn_len;
+
+ if ( len != 0 )
+ {
+ /* Latch and clear the cached instruction. */
+ memcpy(buf, vmcb->guest_ins, 15);
+ v->arch.hvm_svm.cached_insn_len = 0;
+ }
+
+ return len;
+}
+
static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
{
char *p;
@@ -1448,7 +1463,8 @@ static struct hvm_function_table __read_
.msr_read_intercept = svm_msr_read_intercept,
.msr_write_intercept = svm_msr_write_intercept,
.invlpg_intercept = svm_invlpg_intercept,
- .set_rdtsc_exiting = svm_set_rdtsc_exiting
+ .set_rdtsc_exiting = svm_set_rdtsc_exiting,
+ .get_insn_bytes = svm_get_insn_bytes
};
asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
@@ -1554,7 +1570,12 @@ asmlinkage void svm_vmexit_handler(struc
(unsigned long)regs->ecx, (unsigned long)regs->edx,
(unsigned long)regs->esi, (unsigned long)regs->edi);
- if ( paging_fault(va, regs) )
+ if ( cpu_has_svm_decode )
+ v->arch.hvm_svm.cached_insn_len = vmcb->guest_ins_len & 0xf;
+ rc = paging_fault(va, regs);
+ v->arch.hvm_svm.cached_insn_len = 0;
+
+ if ( rc )
{
if ( trace_will_trace_event(TRC_SHADOW) )
break;
@@ -1720,7 +1741,10 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_NPF:
perfc_incra(svmexits, VMEXIT_NPF_PERFC);
regs->error_code = vmcb->exitinfo1;
+ if ( cpu_has_svm_decode )
+ v->arch.hvm_svm.cached_insn_len = vmcb->guest_ins_len & 0xf;
svm_do_nested_pgfault(vmcb->exitinfo2);
+ v->arch.hvm_svm.cached_insn_len = 0;
break;
case VMEXIT_IRET: {
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/hvm.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/hvm.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/hvm.h
@@ -132,6 +132,9 @@ struct hvm_function_table {
int (*cpu_up)(void);
void (*cpu_down)(void);
+ /* Copy up to 15 bytes from cached instruction bytes at current rIP. */
+ unsigned int (*get_insn_bytes)(struct vcpu *v, uint8_t *buf);
+
/* Instruction intercepts: non-void return values are X86EMUL codes. */
void (*cpuid_intercept)(
unsigned int *eax, unsigned int *ebx,
@@ -328,6 +331,11 @@ static inline void hvm_cpu_down(void)
hvm_funcs.cpu_down();
}
+static inline unsigned int hvm_get_insn_bytes(struct vcpu *v, uint8_t *buf)
+{
+ return (hvm_funcs.get_insn_bytes ? hvm_funcs.get_insn_bytes(v, buf) : 0);
+}
+
enum hvm_task_switch_reason { TSW_jmp, TSW_iret, TSW_call_or_int };
void hvm_task_switch(
uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/vmcb.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/vmcb.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -498,6 +498,9 @@ struct arch_svm_struct {
int launch_core;
bool_t vmcb_in_sync; /* VMCB sync'ed with VMSAVE? */
+ /* VMCB has a cached instruction from #PF/#NPF Decode Assist? */
+ uint8_t cached_insn_len; /* Zero if no cached instruction. */
+
/* Upper four bytes are undefined in the VMCB, therefore we can't
* use the fields in the VMCB. Write a 64bit value and then read a 64bit
* value is fine unless there's a VMRUN/VMEXIT in between which clears
++++++
23239-xentrace_correct_overflow_check_for_number_of_per-cpu_trace_pages.patch
++++++
changeset: 23239:51d89366c859
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon Apr 18 15:12:04 2011 +0100
files: xen/common/trace.c
description:
xentrace: correct overflow check for number of per-cpu trace pages
The calculated number of per-cpu trace pages is stored in t_info and
shared with tools like xentrace. Since its an u16 the value may
overflow because the current check is based on u32. Using the u16
means each cpu could in theory use up to 256MB as trace
buffer. However such a large allocation will currently fail on x86 due
to the MAX_ORDER limit. Check both max theoretical number of pages
per cpu and max number of pages reachable by struct t_buf->prod/cons
variables with requested number of pages.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -104,25 +104,33 @@ static void calc_tinfo_first_offset(void
* calculate_tbuf_size - check to make sure that the proposed size will fit
* in the currently sized struct t_info and allows prod and cons to
* reach double the value without overflow.
+ * The t_info layout is fixed and cant be changed without breaking xentrace.
* Initialize t_info_pages based on number of trace pages.
*/
static int calculate_tbuf_size(unsigned int pages)
{
- struct t_buf dummy;
- typeof(dummy.prod) size;
+ struct t_buf dummy_size;
+ typeof(dummy_size.prod) max_size;
+ struct t_info dummy_pages;
+ typeof(dummy_pages.tbuf_size) max_pages;
unsigned int t_info_words;
/* force maximum value for an unsigned type */
- size = -1;
+ max_size = -1;
+ max_pages = -1;
/* max size holds up to n pages */
- size /= PAGE_SIZE;
- if ( pages > size )
+ max_size /= PAGE_SIZE;
+
+ if ( max_size < max_pages )
+ max_pages = max_size;
+
+ if ( pages > max_pages )
{
printk(XENLOG_INFO "xentrace: requested number of %u pages "
"reduced to %u\n",
- pages, (unsigned int)size);
- pages = size;
+ pages, max_pages);
+ pages = max_pages;
}
t_info_words = num_online_cpus() * pages * sizeof(uint32_t);
++++++ 23246-x86-xsave-enable.patch ++++++
References: bnc#718668
# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxx>
# Date 1303297371 -3600
# Node ID eb4505f8dd97f894ee4b4e1b55ea1272c05e6759
# Parent 3539ef956a378ad7fe39654ff9aca5b0e7bf8843
xen/x86: re-enable xsave by default now that it supports live migration.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -25,7 +25,7 @@ boolean_param("nofxsr", disable_x86_fxsr
static bool_t __cpuinitdata disable_x86_serial_nr;
boolean_param("noserialnumber", disable_x86_serial_nr);
-static bool_t __cpuinitdata use_xsave;
+static bool_t __cpuinitdata use_xsave = 1;
boolean_param("xsave", use_xsave);
unsigned int __devinitdata opt_cpuid_mask_ecx = ~0u;
integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx);
++++++ 23303-cpufreq-misc.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1304930762 -3600
# Node ID 82180954eda9cfe279e7ecf8c9ed4ffa29796bfb
# Parent c822888f36568f26e95f9844c7f0c5e06df7aa20
misc cpufreq cleanup
- proper handling of governor command line options when using the
default governor
- warning message for unrecognized command line options
- replacing a NR_CPUS sized array with per-CPU data
- a couple of __read_mostly annotations
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/cpufreq/cpufreq.c
+++ xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq.c
@@ -47,7 +47,8 @@
#include <acpi/acpi.h>
#include <acpi/cpufreq/cpufreq.h>
-static unsigned int usr_max_freq, usr_min_freq;
+static unsigned int __read_mostly usr_min_freq;
+static unsigned int __read_mostly usr_max_freq;
static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy);
struct cpufreq_dom {
@@ -57,7 +58,7 @@ struct cpufreq_dom {
};
static LIST_HEAD(cpufreq_dom_list_head);
-struct cpufreq_governor *cpufreq_opt_governor;
+struct cpufreq_governor *__read_mostly cpufreq_opt_governor;
LIST_HEAD(cpufreq_governor_list);
bool_t __read_mostly cpufreq_verbose;
@@ -543,6 +544,7 @@ void __init cpufreq_cmdline_parse(char *
{
static struct cpufreq_governor *__initdata cpufreq_governors[] =
{
+ CPUFREQ_DEFAULT_GOVERNOR,
&cpufreq_gov_userspace,
&cpufreq_gov_dbs,
&cpufreq_gov_performance,
@@ -576,8 +578,10 @@ void __init cpufreq_cmdline_parse(char *
}
if (str && !cpufreq_handle_common_option(str, val) &&
- cpufreq_governors[gov_index]->handle_option)
- cpufreq_governors[gov_index]->handle_option(str, val);
+ (!cpufreq_governors[gov_index]->handle_option ||
+ !cpufreq_governors[gov_index]->handle_option(str, val)))
+ printk(XENLOG_WARNING "cpufreq/%s: option '%s' not recognized\n",
+ cpufreq_governors[gov_index]->name, str);
str = end;
} while (str);
Index: xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq_misc_governors.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/cpufreq/cpufreq_misc_governors.c
+++ xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq_misc_governors.c
@@ -14,14 +14,17 @@
*
*/
+#include <xen/cpu.h>
#include <xen/init.h>
+#include <xen/percpu.h>
#include <xen/sched.h>
#include <acpi/cpufreq/cpufreq.h>
/*
* cpufreq userspace governor
*/
-static unsigned int cpu_set_freq[NR_CPUS];
+static unsigned int __read_mostly userspace_cmdline_freq;
+static DEFINE_PER_CPU(unsigned int, cpu_set_freq);
static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
unsigned int event)
@@ -35,21 +38,21 @@ static int cpufreq_governor_userspace(st
switch (event) {
case CPUFREQ_GOV_START:
- if (!cpu_set_freq[cpu])
- cpu_set_freq[cpu] = policy->cur;
+ if (!per_cpu(cpu_set_freq, cpu))
+ per_cpu(cpu_set_freq, cpu) = policy->cur;
break;
case CPUFREQ_GOV_STOP:
- cpu_set_freq[cpu] = 0;
+ per_cpu(cpu_set_freq, cpu) = 0;
break;
case CPUFREQ_GOV_LIMITS:
- if (policy->max < cpu_set_freq[cpu])
+ if (policy->max < per_cpu(cpu_set_freq, cpu))
ret = __cpufreq_driver_target(policy, policy->max,
CPUFREQ_RELATION_H);
- else if (policy->min > cpu_set_freq[cpu])
+ else if (policy->min > per_cpu(cpu_set_freq, cpu))
ret = __cpufreq_driver_target(policy, policy->min,
CPUFREQ_RELATION_L);
else
- ret = __cpufreq_driver_target(policy, cpu_set_freq[cpu],
+ ret = __cpufreq_driver_target(policy, per_cpu(cpu_set_freq, cpu),
CPUFREQ_RELATION_L);
break;
@@ -68,7 +71,7 @@ int write_userspace_scaling_setspeed(uns
if (!cpu_online(cpu) || !(policy = per_cpu(cpufreq_cpu_policy, cpu)))
return -EINVAL;
- cpu_set_freq[cpu] = freq;
+ per_cpu(cpu_set_freq, cpu) = freq;
if (freq < policy->min)
freq = policy->min;
@@ -78,19 +81,35 @@ int write_userspace_scaling_setspeed(uns
return __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
}
-static void __init
+static bool_t __init
cpufreq_userspace_handle_option(const char *name, const char *val)
{
if (!strcmp(name, "speed") && val) {
- unsigned int usr_cmdline_freq;
- unsigned int cpu;
+ userspace_cmdline_freq = simple_strtoul(val, NULL, 0);
+ return 1;
+ }
+ return 0;
+}
- usr_cmdline_freq = simple_strtoul(val, NULL, 0);
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- cpu_set_freq[cpu] = usr_cmdline_freq;
+static int cpufreq_userspace_cpu_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action)
+ {
+ case CPU_UP_PREPARE:
+ per_cpu(cpu_set_freq, cpu) = userspace_cmdline_freq;
+ break;
}
+
+ return NOTIFY_DONE;
}
+static struct notifier_block cpufreq_userspace_cpu_nfb = {
+ .notifier_call = cpufreq_userspace_cpu_callback
+};
+
struct cpufreq_governor cpufreq_gov_userspace = {
.name = "userspace",
.governor = cpufreq_governor_userspace,
@@ -99,6 +118,11 @@ struct cpufreq_governor cpufreq_gov_user
static int __init cpufreq_gov_userspace_init(void)
{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu)
+ per_cpu(cpu_set_freq, cpu) = userspace_cmdline_freq;
+ register_cpu_notifier(&cpufreq_userspace_cpu_nfb);
return cpufreq_register_governor(&cpufreq_gov_userspace);
}
__initcall(cpufreq_gov_userspace_init);
Index: xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq_ondemand.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/cpufreq/cpufreq_ondemand.c
+++ xen-4.1.2-testing/xen/drivers/cpufreq/cpufreq_ondemand.c
@@ -296,7 +296,7 @@ int cpufreq_governor_dbs(struct cpufreq_
return 0;
}
-static void __init cpufreq_dbs_handle_option(const char *name, const char *val)
+static bool_t __init cpufreq_dbs_handle_option(const char *name, const char
*val)
{
if ( !strcmp(name, "rate") && val )
{
@@ -334,6 +334,9 @@ static void __init cpufreq_dbs_handle_op
}
dbs_tuners_ins.powersave_bias = tmp;
}
+ else
+ return 0;
+ return 1;
}
struct cpufreq_governor cpufreq_gov_dbs = {
Index: xen-4.1.2-testing/xen/include/acpi/cpufreq/cpufreq.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/acpi/cpufreq/cpufreq.h
+++ xen-4.1.2-testing/xen/include/acpi/cpufreq/cpufreq.h
@@ -93,7 +93,7 @@ struct cpufreq_governor {
char name[CPUFREQ_NAME_LEN];
int (*governor)(struct cpufreq_policy *policy,
unsigned int event);
- void (*handle_option)(const char *name, const char *value);
+ bool_t (*handle_option)(const char *name, const char *value);
struct list_head governor_list;
};
++++++ 23304-amd-oprofile-strings.patch ++++++
References: FATE#309893, FATE#309902, FATE#309903, FATE#309906
# HG changeset patch
# User Jacob Shin <jacob.shin@xxxxxxx>
# Date 1304930954 -3600
# Node ID 8981b582be3e2f6647ef5ff3d93e167436ed357a
# Parent 82180954eda9cfe279e7ecf8c9ed4ffa29796bfb
xenoprof: Update cpu_type to sync with upstream oprofile
Update xenoprof's cpu_type to match upstream oprofile. Currently AMD
Family 11h ~ Family 15h are broken due to string mismatches.
Signed-off-by: Jacob Shin <jacob.shin@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/oprofile/nmi_int.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/oprofile/nmi_int.c
+++ xen-4.1.2-testing/xen/arch/x86/oprofile/nmi_int.c
@@ -435,19 +435,19 @@ static int __init nmi_init(void)
break;
case 0x11:
model = &op_athlon_spec;
- cpu_type = "x86-64/family11";
+ cpu_type = "x86-64/family11h";
break;
case 0x12:
model = &op_athlon_spec;
- cpu_type = "x86-64/family12";
+ cpu_type = "x86-64/family12h";
break;
case 0x14:
model = &op_athlon_spec;
- cpu_type = "x86-64/family14";
+ cpu_type = "x86-64/family14h";
break;
case 0x15:
model = &op_athlon_spec;
- cpu_type = "x86-64/family15";
+ cpu_type = "x86-64/family15h";
break;
}
break;
++++++ 23305-amd-fam15-xenoprof.patch ++++++
References: FATE#309893, FATE#309906
# HG changeset patch
# User Jacob Shin <jacob.shin@xxxxxxx>
# Date 1304931187 -3600
# Node ID 014ee4e09644bd3ae55919d267f742c1d60c337a
# Parent 8981b582be3e2f6647ef5ff3d93e167436ed357a
xenoprof: Add support for AMD Family 15h processors
AMD Family 15h CPU mirrors legacy K7 performance monitor counters to
a new location, and adds 2 new counters. This patch updates xenoprof
to take advantage of the new counters.
Signed-off-by: Jacob Shin <jacob.shin@xxxxxxx>
Rename fam15h -> amd_fam15h in a few places, as suggested by Jan
Beulich.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/oprofile/nmi_int.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/oprofile/nmi_int.c
+++ xen-4.1.2-testing/xen/arch/x86/oprofile/nmi_int.c
@@ -30,7 +30,7 @@
struct op_counter_config counter_config[OP_MAX_COUNTER];
struct op_ibs_config ibs_config;
-static struct op_x86_model_spec const *__read_mostly model;
+struct op_x86_model_spec const *__read_mostly model;
static struct op_msrs cpu_msrs[NR_CPUS];
static unsigned long saved_lvtpc[NR_CPUS];
@@ -446,7 +446,7 @@ static int __init nmi_init(void)
cpu_type = "x86-64/family14h";
break;
case 0x15:
- model = &op_athlon_spec;
+ model = &op_amd_fam15h_spec;
cpu_type = "x86-64/family15h";
break;
}
Index: xen-4.1.2-testing/xen/arch/x86/oprofile/op_model_athlon.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/oprofile/op_model_athlon.c
+++ xen-4.1.2-testing/xen/arch/x86/oprofile/op_model_athlon.c
@@ -24,8 +24,13 @@
#include "op_x86_model.h"
#include "op_counter.h"
-#define NUM_COUNTERS 4
-#define NUM_CONTROLS 4
+#define K7_NUM_COUNTERS 4
+#define K7_NUM_CONTROLS 4
+
+#define FAM15H_NUM_COUNTERS 6
+#define FAM15H_NUM_CONTROLS 6
+
+#define MAX_COUNTERS FAM15H_NUM_COUNTERS
#define CTR_READ(msr_content,msrs,c) do {rdmsrl(msrs->counters[(c)].addr,
(msr_content));} while (0)
#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned
int)(l), -1);} while (0)
@@ -44,9 +49,10 @@
#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 0x1ULL) << 41))
#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 0x1ULL) << 40))
-static unsigned long reset_value[NUM_COUNTERS];
+static unsigned long reset_value[MAX_COUNTERS];
extern char svm_stgi_label[];
+extern struct op_x86_model_spec const *__read_mostly model;
#ifdef CONFIG_X86_64
u32 ibs_caps = 0;
@@ -175,26 +181,44 @@ static void athlon_fill_in_addresses(str
msrs->controls[3].addr = MSR_K7_EVNTSEL3;
}
-
+static void fam15h_fill_in_addresses(struct op_msrs * const msrs)
+{
+ msrs->counters[0].addr = MSR_AMD_FAM15H_PERFCTR0;
+ msrs->counters[1].addr = MSR_AMD_FAM15H_PERFCTR1;
+ msrs->counters[2].addr = MSR_AMD_FAM15H_PERFCTR2;
+ msrs->counters[3].addr = MSR_AMD_FAM15H_PERFCTR3;
+ msrs->counters[4].addr = MSR_AMD_FAM15H_PERFCTR4;
+ msrs->counters[5].addr = MSR_AMD_FAM15H_PERFCTR5;
+
+ msrs->controls[0].addr = MSR_AMD_FAM15H_EVNTSEL0;
+ msrs->controls[1].addr = MSR_AMD_FAM15H_EVNTSEL1;
+ msrs->controls[2].addr = MSR_AMD_FAM15H_EVNTSEL2;
+ msrs->controls[3].addr = MSR_AMD_FAM15H_EVNTSEL3;
+ msrs->controls[4].addr = MSR_AMD_FAM15H_EVNTSEL4;
+ msrs->controls[5].addr = MSR_AMD_FAM15H_EVNTSEL5;
+}
+
static void athlon_setup_ctrs(struct op_msrs const * const msrs)
{
uint64_t msr_content;
int i;
+ unsigned int const nr_ctrs = model->num_counters;
+ unsigned int const nr_ctrls = model->num_controls;
/* clear all counters */
- for (i = 0 ; i < NUM_CONTROLS; ++i) {
+ for (i = 0 ; i < nr_ctrls; ++i) {
CTRL_READ(msr_content, msrs, i);
CTRL_CLEAR(msr_content);
CTRL_WRITE(msr_content, msrs, i);
}
/* avoid a false detection of ctr overflows in NMI handler */
- for (i = 0; i < NUM_COUNTERS; ++i) {
+ for (i = 0; i < nr_ctrs; ++i) {
CTR_WRITE(1, msrs, i);
}
/* enable active counters */
- for (i = 0; i < NUM_COUNTERS; ++i) {
+ for (i = 0; i < nr_ctrs; ++i) {
if (counter_config[i].enabled) {
reset_value[i] = counter_config[i].count;
@@ -300,6 +324,7 @@ static int athlon_check_ctrs(unsigned in
int mode = 0;
struct vcpu *v = current;
struct cpu_user_regs *guest_regs = guest_cpu_user_regs();
+ unsigned int const nr_ctrs = model->num_counters;
if (!guest_mode(regs) &&
(regs->eip == (unsigned long)svm_stgi_label)) {
@@ -312,7 +337,7 @@ static int athlon_check_ctrs(unsigned in
mode = xenoprofile_get_mode(v, regs);
}
- for (i = 0 ; i < NUM_COUNTERS; ++i) {
+ for (i = 0 ; i < nr_ctrs; ++i) {
CTR_READ(msr_content, msrs, i);
if (CTR_OVERFLOWED(msr_content)) {
xenoprof_log_event(current, regs, eip, mode, i);
@@ -373,7 +398,8 @@ static void athlon_start(struct op_msrs
{
uint64_t msr_content;
int i;
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ unsigned int const nr_ctrs = model->num_counters;
+ for (i = 0 ; i < nr_ctrs ; ++i) {
if (reset_value[i]) {
CTRL_READ(msr_content, msrs, i);
CTRL_SET_ACTIVE(msr_content);
@@ -401,10 +427,11 @@ static void athlon_stop(struct op_msrs c
{
uint64_t msr_content;
int i;
+ unsigned int const nr_ctrs = model->num_counters;
/* Subtle: stop on all counters to avoid race with
* setting our pm callback */
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+ for (i = 0 ; i < nr_ctrs ; ++i) {
CTRL_READ(msr_content, msrs, i);
CTRL_SET_INACTIVE(msr_content);
CTRL_WRITE(msr_content, msrs, i);
@@ -512,11 +539,21 @@ void __init ibs_init(void)
#endif /* CONFIG_X86_64 */
struct op_x86_model_spec const op_athlon_spec = {
- .num_counters = NUM_COUNTERS,
- .num_controls = NUM_CONTROLS,
+ .num_counters = K7_NUM_COUNTERS,
+ .num_controls = K7_NUM_CONTROLS,
.fill_in_addresses = &athlon_fill_in_addresses,
.setup_ctrs = &athlon_setup_ctrs,
.check_ctrs = &athlon_check_ctrs,
.start = &athlon_start,
+ .stop = &athlon_stop
+};
+
+struct op_x86_model_spec const op_amd_fam15h_spec = {
+ .num_counters = FAM15H_NUM_COUNTERS,
+ .num_controls = FAM15H_NUM_CONTROLS,
+ .fill_in_addresses = &fam15h_fill_in_addresses,
+ .setup_ctrs = &athlon_setup_ctrs,
+ .check_ctrs = &athlon_check_ctrs,
+ .start = &athlon_start,
.stop = &athlon_stop
};
Index: xen-4.1.2-testing/xen/arch/x86/oprofile/op_x86_model.h
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/oprofile/op_x86_model.h
+++ xen-4.1.2-testing/xen/arch/x86/oprofile/op_x86_model.h
@@ -48,6 +48,7 @@ extern struct op_x86_model_spec op_arch_
extern struct op_x86_model_spec const op_p4_spec;
extern struct op_x86_model_spec const op_p4_ht2_spec;
extern struct op_x86_model_spec const op_athlon_spec;
+extern struct op_x86_model_spec const op_amd_fam15h_spec;
void arch_perfmon_setup_counters(void);
#endif /* OP_X86_MODEL_H */
Index: xen-4.1.2-testing/xen/include/asm-x86/msr-index.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/msr-index.h
+++ xen-4.1.2-testing/xen/include/asm-x86/msr-index.h
@@ -219,6 +219,19 @@
#define MSR_K8_VM_CR 0xc0010114
#define MSR_K8_VM_HSAVE_PA 0xc0010117
+#define MSR_AMD_FAM15H_EVNTSEL0 0xc0010200
+#define MSR_AMD_FAM15H_PERFCTR0 0xc0010201
+#define MSR_AMD_FAM15H_EVNTSEL1 0xc0010202
+#define MSR_AMD_FAM15H_PERFCTR1 0xc0010203
+#define MSR_AMD_FAM15H_EVNTSEL2 0xc0010204
+#define MSR_AMD_FAM15H_PERFCTR2 0xc0010205
+#define MSR_AMD_FAM15H_EVNTSEL3 0xc0010206
+#define MSR_AMD_FAM15H_PERFCTR3 0xc0010207
+#define MSR_AMD_FAM15H_EVNTSEL4 0xc0010208
+#define MSR_AMD_FAM15H_PERFCTR4 0xc0010209
+#define MSR_AMD_FAM15H_EVNTSEL5 0xc001020a
+#define MSR_AMD_FAM15H_PERFCTR5 0xc001020b
+
#define MSR_K8_FEATURE_MASK 0xc0011004
#define MSR_K8_EXT_FEATURE_MASK 0xc0011005
++++++ 23306-amd-fam15-vpmu.patch ++++++
References: FATE#309893, FATE#309906
# HG changeset patch
# User Jacob Shin <jacob.shin@xxxxxxx>
# Date 1304931286 -3600
# Node ID e787d4f2e5acdba48728a9390710de800315a540
# Parent 014ee4e09644bd3ae55919d267f742c1d60c337a
hvm: vpmu: Add support for AMD Family 15h processors
AMD Family 15h CPU mirrors legacy K7 performance monitor counters to
a new location, and adds 2 new counters. This patch updates HVM VPMU
to take advantage of the new counters.
Signed-off-by: Jacob Shin <jacob.shin@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -1142,6 +1142,18 @@ static int svm_msr_read_intercept(unsign
case MSR_K7_EVNTSEL1:
case MSR_K7_EVNTSEL2:
case MSR_K7_EVNTSEL3:
+ case MSR_AMD_FAM15H_PERFCTR0:
+ case MSR_AMD_FAM15H_PERFCTR1:
+ case MSR_AMD_FAM15H_PERFCTR2:
+ case MSR_AMD_FAM15H_PERFCTR3:
+ case MSR_AMD_FAM15H_PERFCTR4:
+ case MSR_AMD_FAM15H_PERFCTR5:
+ case MSR_AMD_FAM15H_EVNTSEL0:
+ case MSR_AMD_FAM15H_EVNTSEL1:
+ case MSR_AMD_FAM15H_EVNTSEL2:
+ case MSR_AMD_FAM15H_EVNTSEL3:
+ case MSR_AMD_FAM15H_EVNTSEL4:
+ case MSR_AMD_FAM15H_EVNTSEL5:
vpmu_do_rdmsr(msr, msr_content);
break;
@@ -1237,6 +1249,18 @@ static int svm_msr_write_intercept(unsig
case MSR_K7_EVNTSEL1:
case MSR_K7_EVNTSEL2:
case MSR_K7_EVNTSEL3:
+ case MSR_AMD_FAM15H_PERFCTR0:
+ case MSR_AMD_FAM15H_PERFCTR1:
+ case MSR_AMD_FAM15H_PERFCTR2:
+ case MSR_AMD_FAM15H_PERFCTR3:
+ case MSR_AMD_FAM15H_PERFCTR4:
+ case MSR_AMD_FAM15H_PERFCTR5:
+ case MSR_AMD_FAM15H_EVNTSEL0:
+ case MSR_AMD_FAM15H_EVNTSEL1:
+ case MSR_AMD_FAM15H_EVNTSEL2:
+ case MSR_AMD_FAM15H_EVNTSEL3:
+ case MSR_AMD_FAM15H_EVNTSEL4:
+ case MSR_AMD_FAM15H_EVNTSEL5:
vpmu_do_wrmsr(msr, msr_content);
break;
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/vpmu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/vpmu.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/vpmu.c
@@ -36,7 +36,9 @@
#include <public/hvm/save.h>
#include <asm/hvm/vpmu.h>
-#define NUM_COUNTERS 4
+#define F10H_NUM_COUNTERS 4
+#define F15H_NUM_COUNTERS 6
+#define MAX_NUM_COUNTERS F15H_NUM_COUNTERS
#define MSR_F10H_EVNTSEL_GO_SHIFT 40
#define MSR_F10H_EVNTSEL_EN_SHIFT 22
@@ -47,6 +49,11 @@
#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT))
#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1))))
+static int __read_mostly num_counters = 0;
+static u32 __read_mostly *counters = NULL;
+static u32 __read_mostly *ctrls = NULL;
+static bool_t __read_mostly k7_counters_mirrored = 0;
+
/* PMU Counter MSRs. */
u32 AMD_F10H_COUNTERS[] = {
MSR_K7_PERFCTR0,
@@ -63,10 +70,28 @@ u32 AMD_F10H_CTRLS[] = {
MSR_K7_EVNTSEL3
};
+u32 AMD_F15H_COUNTERS[] = {
+ MSR_AMD_FAM15H_PERFCTR0,
+ MSR_AMD_FAM15H_PERFCTR1,
+ MSR_AMD_FAM15H_PERFCTR2,
+ MSR_AMD_FAM15H_PERFCTR3,
+ MSR_AMD_FAM15H_PERFCTR4,
+ MSR_AMD_FAM15H_PERFCTR5
+};
+
+u32 AMD_F15H_CTRLS[] = {
+ MSR_AMD_FAM15H_EVNTSEL0,
+ MSR_AMD_FAM15H_EVNTSEL1,
+ MSR_AMD_FAM15H_EVNTSEL2,
+ MSR_AMD_FAM15H_EVNTSEL3,
+ MSR_AMD_FAM15H_EVNTSEL4,
+ MSR_AMD_FAM15H_EVNTSEL5
+};
+
/* storage for context switching */
struct amd_vpmu_context {
- u64 counters[NUM_COUNTERS];
- u64 ctrls[NUM_COUNTERS];
+ u64 counters[MAX_NUM_COUNTERS];
+ u64 ctrls[MAX_NUM_COUNTERS];
u32 hw_lapic_lvtpc;
};
@@ -78,10 +103,45 @@ static inline int get_pmu_reg_type(u32 a
if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) )
return MSR_TYPE_COUNTER;
+ if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) &&
+ (addr <= MSR_AMD_FAM15H_PERFCTR5 ) )
+ {
+ if (addr & 1)
+ return MSR_TYPE_COUNTER;
+ else
+ return MSR_TYPE_CTRL;
+ }
+
/* unsupported registers */
return -1;
}
+static inline u32 get_fam15h_addr(u32 addr)
+{
+ switch ( addr )
+ {
+ case MSR_K7_PERFCTR0:
+ return MSR_AMD_FAM15H_PERFCTR0;
+ case MSR_K7_PERFCTR1:
+ return MSR_AMD_FAM15H_PERFCTR1;
+ case MSR_K7_PERFCTR2:
+ return MSR_AMD_FAM15H_PERFCTR2;
+ case MSR_K7_PERFCTR3:
+ return MSR_AMD_FAM15H_PERFCTR3;
+ case MSR_K7_EVNTSEL0:
+ return MSR_AMD_FAM15H_EVNTSEL0;
+ case MSR_K7_EVNTSEL1:
+ return MSR_AMD_FAM15H_EVNTSEL1;
+ case MSR_K7_EVNTSEL2:
+ return MSR_AMD_FAM15H_EVNTSEL2;
+ case MSR_K7_EVNTSEL3:
+ return MSR_AMD_FAM15H_EVNTSEL3;
+ default:
+ break;
+ }
+
+ return addr;
+}
static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
{
@@ -110,12 +170,12 @@ static inline void context_restore(struc
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct amd_vpmu_context *ctxt = vpmu->context;
- for ( i = 0; i < NUM_COUNTERS; i++ )
- wrmsrl(AMD_F10H_CTRLS[i], ctxt->ctrls[i]);
+ for ( i = 0; i < num_counters; i++ )
+ wrmsrl(ctrls[i], ctxt->ctrls[i]);
- for ( i = 0; i < NUM_COUNTERS; i++ )
+ for ( i = 0; i < num_counters; i++ )
{
- wrmsrl(AMD_F10H_COUNTERS[i], ctxt->counters[i]);
+ wrmsrl(counters[i], ctxt->counters[i]);
/* Force an interrupt to allow guest reset the counter,
if the value is positive */
@@ -147,11 +207,11 @@ static inline void context_save(struct v
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct amd_vpmu_context *ctxt = vpmu->context;
- for ( i = 0; i < NUM_COUNTERS; i++ )
- rdmsrl(AMD_F10H_COUNTERS[i], ctxt->counters[i]);
+ for ( i = 0; i < num_counters; i++ )
+ rdmsrl(counters[i], ctxt->counters[i]);
- for ( i = 0; i < NUM_COUNTERS; i++ )
- rdmsrl(AMD_F10H_CTRLS[i], ctxt->ctrls[i]);
+ for ( i = 0; i < num_counters; i++ )
+ rdmsrl(ctrls[i], ctxt->ctrls[i]);
}
static void amd_vpmu_save(struct vcpu *v)
@@ -175,12 +235,18 @@ static void context_update(unsigned int
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct amd_vpmu_context *ctxt = vpmu->context;
- for ( i = 0; i < NUM_COUNTERS; i++ )
- if ( msr == AMD_F10H_COUNTERS[i] )
+ if ( k7_counters_mirrored &&
+ ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) )
+ {
+ msr = get_fam15h_addr(msr);
+ }
+
+ for ( i = 0; i < num_counters; i++ )
+ if ( msr == counters[i] )
ctxt->counters[i] = msr_content;
- for ( i = 0; i < NUM_COUNTERS; i++ )
- if ( msr == AMD_F10H_CTRLS[i] )
+ for ( i = 0; i < num_counters; i++ )
+ if ( msr == ctrls[i] )
ctxt->ctrls[i] = msr_content;
ctxt->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
@@ -235,10 +301,31 @@ static void amd_vpmu_initialise(struct v
{
struct amd_vpmu_context *ctxt = NULL;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ __u8 family = current_cpu_data.x86;
if ( vpmu->flags & VPMU_CONTEXT_ALLOCATED )
return;
+ if ( counters == NULL )
+ {
+ switch ( family )
+ {
+ case 0x15:
+ num_counters = F15H_NUM_COUNTERS;
+ counters = AMD_F15H_COUNTERS;
+ ctrls = AMD_F15H_CTRLS;
+ k7_counters_mirrored = 1;
+ break;
+ case 0x10:
+ default:
+ num_counters = F10H_NUM_COUNTERS;
+ counters = AMD_F10H_COUNTERS;
+ ctrls = AMD_F10H_CTRLS;
+ k7_counters_mirrored = 0;
+ break;
+ }
+ }
+
ctxt = xmalloc_bytes(sizeof(struct amd_vpmu_context));
if ( !ctxt )
Index: xen-4.1.2-testing/xen/arch/x86/hvm/vpmu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/vpmu.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/vpmu.c
@@ -101,6 +101,7 @@ void vpmu_initialise(struct vcpu *v)
switch ( family )
{
case 0x10:
+ case 0x15:
vpmu->arch_vpmu_ops = &amd_vpmu_ops;
break;
default:
++++++
23308-xentrace_Move_the_global_variable_t_info_first_offset_into_calculate_tbuf_size.patch
++++++
changeset: 23308:fb5313e64335
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon May 09 09:58:36 2011 +0100
files: xen/common/trace.c
description:
xentrace: Move the global variable t_info_first_offset into
calculate_tbuf_size()
Move the global variable t_info_first_offset into
calculate_tbuf_size() because it is only used there. Change the type
from u32 to uint32_t to match type in other places.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -55,7 +55,6 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
static u32 data_size;
-static u32 t_info_first_offset __read_mostly;
/* High water mark for trace buffers; */
/* Send virtual interrupt when buffer level reaches this point */
@@ -94,10 +93,10 @@ static struct notifier_block cpu_nfb = {
.notifier_call = cpu_callback
};
-static void calc_tinfo_first_offset(void)
+static uint32_t calc_tinfo_first_offset(void)
{
int offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
- t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes);
+ return fit_to_type(uint32_t, offset_in_bytes);
}
/**
@@ -107,7 +106,7 @@ static void calc_tinfo_first_offset(void
* The t_info layout is fixed and cant be changed without breaking xentrace.
* Initialize t_info_pages based on number of trace pages.
*/
-static int calculate_tbuf_size(unsigned int pages)
+static int calculate_tbuf_size(unsigned int pages, uint32_t
t_info_first_offset)
{
struct t_buf dummy_size;
typeof(dummy_size.prod) max_size;
@@ -156,6 +155,7 @@ static int alloc_trace_bufs(unsigned int
int i, cpu, order;
/* Start after a fixed-size array of NR_CPUS */
uint32_t *t_info_mfn_list;
+ uint32_t t_info_first_offset;
int offset;
if ( t_info )
@@ -165,9 +165,9 @@ static int alloc_trace_bufs(unsigned int
return -EINVAL;
/* Calculate offset in u32 of first mfn */
- calc_tinfo_first_offset();
+ t_info_first_offset = calc_tinfo_first_offset();
- pages = calculate_tbuf_size(pages);
+ pages = calculate_tbuf_size(pages, t_info_first_offset);
order = get_order_from_pages(pages);
t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);
++++++
23309-xentrace_Mark_data_size___read_mostly_because_its_only_written_once.patch
++++++
changeset: 23309:0ddcc8063690
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon May 09 09:59:13 2011 +0100
files: xen/common/trace.c
description:
xentrace: Mark data_size __read_mostly because it's only written once
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -54,7 +54,7 @@ static unsigned int t_info_pages;
static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static u32 data_size;
+static u32 data_size __read_mostly;
/* High water mark for trace buffers; */
/* Send virtual interrupt when buffer level reaches this point */
++++++
23310-xentrace_Remove_unneeded_cast_when_assigning_pointer_value_to_dst.patch
++++++
changeset: 23310:b7ca55907bd3
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon May 09 09:59:50 2011 +0100
files: xen/common/trace.c
description:
xentrace: Remove unneeded cast when assigning pointer value to dst
Remove unneeded cast when assigning pointer value to dst.
Both arrays are uint32_t and memcpy takes a void pointer.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -483,7 +483,7 @@ static inline void __insert_record(struc
const void *extra_data)
{
struct t_rec *rec;
- unsigned char *dst;
+ uint32_t *dst;
unsigned int extra_word = extra / sizeof(u32);
unsigned int local_rec_size = calc_rec_size(cycles, extra);
uint32_t next;
@@ -508,13 +508,13 @@ static inline void __insert_record(struc
rec->event = event;
rec->extra_u32 = extra_word;
- dst = (unsigned char *)rec->u.nocycles.extra_u32;
+ dst = rec->u.nocycles.extra_u32;
if ( (rec->cycles_included = cycles) != 0 )
{
u64 tsc = (u64)get_cycles();
rec->u.cycles.cycles_lo = (uint32_t)tsc;
rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32);
- dst = (unsigned char *)rec->u.cycles.extra_u32;
+ dst = rec->u.cycles.extra_u32;
}
if ( extra_data && extra )
++++++ 23334-amd-fam12+14-vpmu.patch ++++++
References: FATE#309902, FATE#309903
# HG changeset patch
# User Jacob Shin <jacob.shin@xxxxxxx>
# Date 1305188046 -3600
# Node ID 23e33ea79cac0303c729d4e82905054cded16348
# Parent fabdd682420c0c7b5e81f07f2f54211ebc11babe
hvm: vpmu: Enable HVM VPMU for AMD Family 12h and 14h processors
HVM VPMU support can be enabled for AMD Family 12h and 14h processors
by taking the same code path as 10h.
Signed-off-by: Jacob Shin <jacob.shin@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/vpmu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/vpmu.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/vpmu.c
@@ -317,6 +317,8 @@ static void amd_vpmu_initialise(struct v
k7_counters_mirrored = 1;
break;
case 0x10:
+ case 0x12:
+ case 0x14:
default:
num_counters = F10H_NUM_COUNTERS;
counters = AMD_F10H_COUNTERS;
Index: xen-4.1.2-testing/xen/arch/x86/hvm/vpmu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/vpmu.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/vpmu.c
@@ -101,6 +101,8 @@ void vpmu_initialise(struct vcpu *v)
switch ( family )
{
case 0x10:
+ case 0x12:
+ case 0x14:
case 0x15:
vpmu->arch_vpmu_ops = &amd_vpmu_ops;
break;
++++++ 23383-libxc-rm-static-vars.patch ++++++
# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxx>
# Date 1306228450 -3600
# Node ID 23b423a3955785c9a8679c3a877c3472066a2e1f
# Parent ba8da39c67298b19c2c277e5794981b7f23bedf2
libxc: save/restore: remove static context variables
20544:ad9d75d74bd5 and 20545:cc7d66ba0dad seemingly intended to change these
global static variables into stack variables but didn't remove the static
qualifier.
Also zero the entire struct once with memset rather than clearing fields
piecemeal in two different places.
Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Acked-by: Ian Jackson <ian.jackson.citrix.com>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
Acked-by: Vincent Hanquez <vincent.hanquez@xxxxxxxxxxxxx>
Index: xen-4.1.2-testing/tools/libxc/xc_domain_restore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_domain_restore.c
+++ xen-4.1.2-testing/tools/libxc/xc_domain_restore.c
@@ -1134,23 +1134,19 @@ int xc_domain_restore(xc_interface *xch,
int orig_io_fd_flags;
- static struct restore_ctx _ctx = {
- .live_p2m = NULL,
- .p2m = NULL,
- };
- static struct restore_ctx *ctx = &_ctx;
+ struct restore_ctx _ctx;
+ struct restore_ctx *ctx = &_ctx;
struct domain_info_context *dinfo = &ctx->dinfo;
pagebuf_init(&pagebuf);
memset(&tailbuf, 0, sizeof(tailbuf));
tailbuf.ishvm = hvm;
- /* For info only */
- ctx->nr_pfns = 0;
-
if ( superpages )
return 1;
+ memset(ctx, 0, sizeof(*ctx));
+
ctxt = xc_hypercall_buffer_alloc(xch, ctxt, sizeof(*ctxt));
if ( ctxt == NULL )
Index: xen-4.1.2-testing/tools/libxc/xc_domain_save.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_domain_save.c
+++ xen-4.1.2-testing/tools/libxc/xc_domain_save.c
@@ -958,11 +958,8 @@ int xc_domain_save(xc_interface *xch, in
unsigned long mfn;
struct outbuf ob;
- static struct save_ctx _ctx = {
- .live_p2m = NULL,
- .live_m2p = NULL,
- };
- static struct save_ctx *ctx = &_ctx;
+ struct save_ctx _ctx;
+ struct save_ctx *ctx = &_ctx;
struct domain_info_context *dinfo = &ctx->dinfo;
int completed = 0;
@@ -976,6 +973,8 @@ int xc_domain_save(xc_interface *xch, in
outbuf_init(xch, &ob, OUTBUF_SIZE);
+ memset(ctx, 0, sizeof(*ctx));
+
/* If no explicit control parameters given, use defaults */
max_iters = max_iters ? : DEF_MAX_ITERS;
max_factor = max_factor ? : DEF_MAX_FACTOR;
++++++
23404-xentrace_reduce_trace_buffer_size_to_something_mfn_offset_can_reach.patch
++++++
changeset: 23404:dd0eb070ee44
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu May 26 12:34:44 2011 +0100
files: xen/common/trace.c
description:
xentrace: reduce trace buffer size to something mfn_offset can reach
The start of the array which holds the list of mfns for each cpus
tracebuffer is stored in an unsigned short. This limits the total
amount of pages for each cpu as the number of active cpus increases.
Update the math in calculate_tbuf_size() to apply also this rule to
the max number of trace pages. Without this change the index can
overflow.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -112,11 +112,14 @@ static int calculate_tbuf_size(unsigned
typeof(dummy_size.prod) max_size;
struct t_info dummy_pages;
typeof(dummy_pages.tbuf_size) max_pages;
+ typeof(dummy_pages.mfn_offset[0]) max_mfn_offset;
+ unsigned int max_cpus = num_online_cpus();
unsigned int t_info_words;
/* force maximum value for an unsigned type */
max_size = -1;
max_pages = -1;
+ max_mfn_offset = -1;
/* max size holds up to n pages */
max_size /= PAGE_SIZE;
@@ -124,6 +127,18 @@ static int calculate_tbuf_size(unsigned
if ( max_size < max_pages )
max_pages = max_size;
+ /*
+ * max mfn_offset holds up to n pages per cpu
+ * The array of mfns for the highest cpu can start at the maximum value
+ * mfn_offset can hold. So reduce the number of cpus and also the
mfn_offset.
+ */
+ max_mfn_offset -= t_info_first_offset - 1;
+ max_cpus--;
+ if ( max_cpus )
+ max_mfn_offset /= max_cpus;
+ if ( max_mfn_offset < max_pages )
+ max_pages = max_mfn_offset;
+
if ( pages > max_pages )
{
printk(XENLOG_INFO "xentrace: requested number of %u pages "
++++++ 23405-xentrace_fix_type_of_offset_to_avoid_ouf-of-bounds_access.patch
++++++
changeset: 23405:3057b531d905
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu May 26 12:35:30 2011 +0100
files: xen/common/trace.c
description:
xentrace: fix type of offset to avoid ouf-of-bounds access
Update the type of the local offset variable to match the type where
this variable is stored. Also update the type of t_info_first_offset
because it has also a limited range.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -106,7 +106,7 @@ static uint32_t calc_tinfo_first_offset(
* The t_info layout is fixed and cant be changed without breaking xentrace.
* Initialize t_info_pages based on number of trace pages.
*/
-static int calculate_tbuf_size(unsigned int pages, uint32_t
t_info_first_offset)
+static int calculate_tbuf_size(unsigned int pages, uint16_t
t_info_first_offset)
{
struct t_buf dummy_size;
typeof(dummy_size.prod) max_size;
@@ -170,8 +170,8 @@ static int alloc_trace_bufs(unsigned int
int i, cpu, order;
/* Start after a fixed-size array of NR_CPUS */
uint32_t *t_info_mfn_list;
- uint32_t t_info_first_offset;
- int offset;
+ uint16_t t_info_first_offset;
+ uint16_t offset;
if ( t_info )
return -EBUSY;
@@ -179,7 +179,7 @@ static int alloc_trace_bufs(unsigned int
if ( pages == 0 )
return -EINVAL;
- /* Calculate offset in u32 of first mfn */
+ /* Calculate offset in units of u32 of first mfn */
t_info_first_offset = calc_tinfo_first_offset();
pages = calculate_tbuf_size(pages, t_info_first_offset);
++++++
23406-xentrace_update___insert_record_to_copy_the_trace_record_to_individual_mfns.patch
++++++
changeset: 23406:956438803307
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu May 26 12:36:03 2011 +0100
files: xen/common/trace.c
description:
xentrace: update __insert_record() to copy the trace record to individual mfns
Update __insert_record() to copy the trace record to individual mfns.
This is a prereq before changing the per-cpu allocation from
contiguous to non-contiguous allocation.
v2:
update offset calculation to use shift and mask
update type of mfn_offset to match type of data source
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 71 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 55 insertions(+), 16 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -52,7 +52,6 @@ static struct t_info *t_info;
static unsigned int t_info_pages;
static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
-static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
static u32 data_size __read_mostly;
@@ -208,7 +207,6 @@ static int alloc_trace_bufs(unsigned int
per_cpu(t_bufs, cpu) = buf = rawbuf;
buf->cons = buf->prod = 0;
- per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
}
offset = t_info_first_offset;
@@ -472,10 +470,16 @@ static inline u32 calc_bytes_avail(const
return data_size - calc_unconsumed_bytes(buf);
}
-static inline struct t_rec *next_record(const struct t_buf *buf,
- uint32_t *next)
+static unsigned char *next_record(const struct t_buf *buf, uint32_t *next,
+ unsigned char **next_page,
+ uint32_t *offset_in_page)
{
u32 x = buf->prod, cons = buf->cons;
+ uint16_t per_cpu_mfn_offset;
+ uint32_t per_cpu_mfn_nr;
+ uint32_t *mfn_list;
+ uint32_t mfn;
+ unsigned char *this_page;
barrier(); /* must read buf->prod and buf->cons only once */
*next = x;
@@ -487,7 +491,27 @@ static inline struct t_rec *next_record(
ASSERT(x < data_size);
- return (struct t_rec *)&this_cpu(t_data)[x];
+ /* add leading header to get total offset of next record */
+ x += sizeof(struct t_buf);
+ *offset_in_page = x & ~PAGE_MASK;
+
+ /* offset into array of mfns */
+ per_cpu_mfn_nr = x >> PAGE_SHIFT;
+ per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()];
+ mfn_list = (uint32_t *)t_info;
+ mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr];
+ this_page = mfn_to_virt(mfn);
+ if (per_cpu_mfn_nr + 1 >= opt_tbuf_size)
+ {
+ /* reached end of buffer? */
+ *next_page = NULL;
+ }
+ else
+ {
+ mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1];
+ *next_page = mfn_to_virt(mfn);
+ }
+ return this_page;
}
static inline void __insert_record(struct t_buf *buf,
@@ -497,28 +521,37 @@ static inline void __insert_record(struc
unsigned int rec_size,
const void *extra_data)
{
- struct t_rec *rec;
+ struct t_rec split_rec, *rec;
uint32_t *dst;
+ unsigned char *this_page, *next_page;
unsigned int extra_word = extra / sizeof(u32);
unsigned int local_rec_size = calc_rec_size(cycles, extra);
uint32_t next;
+ uint32_t offset;
+ uint32_t remaining;
BUG_ON(local_rec_size != rec_size);
BUG_ON(extra & 3);
- rec = next_record(buf, &next);
- if ( !rec )
+ this_page = next_record(buf, &next, &next_page, &offset);
+ if ( !this_page )
return;
- /* Double-check once more that we have enough space.
- * Don't bugcheck here, in case the userland tool is doing
- * something stupid. */
- if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
+
+ remaining = PAGE_SIZE - offset;
+
+ if ( unlikely(rec_size > remaining) )
{
- if ( printk_ratelimit() )
+ if ( next_page == NULL )
+ {
+ /* access beyond end of buffer */
printk(XENLOG_WARNING
- "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
- __func__, data_size, next, buf->cons, rec_size);
- return;
+ "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n",
+ __func__, data_size, next, buf->cons, rec_size, remaining);
+ return;
+ }
+ rec = &split_rec;
+ } else {
+ rec = (struct t_rec*)(this_page + offset);
}
rec->event = event;
@@ -535,6 +568,12 @@ static inline void __insert_record(struc
if ( extra_data && extra )
memcpy(dst, extra_data, extra);
+ if ( unlikely(rec_size > remaining) )
+ {
+ memcpy(this_page + offset, rec, remaining);
+ memcpy(next_page, (char *)rec + remaining, rec_size - remaining);
+ }
+
wmb();
next += rec_size;
++++++ 23407-xentrace_allocate_non-contiguous_per-cpu_trace_buffers.patch ++++++
changeset: 23407:b19898ac3e32
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu May 26 12:36:27 2011 +0100
files: xen/common/trace.c
description:
xentrace: allocate non-contiguous per-cpu trace buffers
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 92 ++++++++++++++++++++++++++++-------------------------
1 file changed, 50 insertions(+), 42 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -166,7 +166,7 @@ static int calculate_tbuf_size(unsigned
*/
static int alloc_trace_bufs(unsigned int pages)
{
- int i, cpu, order;
+ int i, cpu;
/* Start after a fixed-size array of NR_CPUS */
uint32_t *t_info_mfn_list;
uint16_t t_info_first_offset;
@@ -182,34 +182,11 @@ static int alloc_trace_bufs(unsigned int
t_info_first_offset = calc_tinfo_first_offset();
pages = calculate_tbuf_size(pages, t_info_first_offset);
- order = get_order_from_pages(pages);
t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);
if ( t_info == NULL )
- goto out_dealloc;
+ goto out_dealloc_t_info;
- /*
- * First, allocate buffers for all of the cpus. If any
- * fails, deallocate what you have so far and exit.
- */
- for_each_online_cpu(cpu)
- {
- void *rawbuf;
- struct t_buf *buf;
-
- if ( (rawbuf = alloc_xenheap_pages(
- order, MEMF_bits(32 + PAGE_SHIFT))) == NULL )
- {
- printk(XENLOG_INFO "xentrace: memory allocation failed "
- "on cpu %d\n", cpu);
- goto out_dealloc;
- }
-
- per_cpu(t_bufs, cpu) = buf = rawbuf;
- buf->cons = buf->prod = 0;
- }
-
- offset = t_info_first_offset;
t_info_mfn_list = (uint32_t *)t_info;
for(i = 0; i < t_info_pages; i++)
@@ -219,27 +196,53 @@ static int alloc_trace_bufs(unsigned int
t_info->tbuf_size = pages;
/*
- * Now share the pages so xentrace can map them, and write them in
- * the global t_info structure.
+ * Allocate buffers for all of the cpus.
+ * If any fails, deallocate what you have so far and exit.
*/
for_each_online_cpu(cpu)
{
- void *rawbuf = per_cpu(t_bufs, cpu);
- struct page_info *p = virt_to_page(rawbuf);
- uint32_t mfn = virt_to_mfn(rawbuf);
+ offset = t_info_first_offset + (cpu * pages);
+ t_info->mfn_offset[cpu] = offset;
for ( i = 0; i < pages; i++ )
{
- share_xen_page_with_privileged_guests(p + i, XENSHARE_writable);
-
- t_info_mfn_list[offset + i]=mfn + i;
+ void *p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT));
+ if ( !p )
+ {
+ printk(XENLOG_INFO "xentrace: memory allocation failed "
+ "on cpu %d after %d pages\n", cpu, i);
+ t_info_mfn_list[offset + i] = 0;
+ goto out_dealloc;
+ }
+ t_info_mfn_list[offset + i] = virt_to_mfn(p);
}
- t_info->mfn_offset[cpu]=offset;
- printk(XENLOG_INFO "xentrace: p%d mfn %"PRIx32" offset %d\n",
- cpu, mfn, offset);
- offset+=i;
+ }
+
+ /*
+ * Initialize buffers for all of the cpus.
+ */
+ for_each_online_cpu(cpu)
+ {
+ struct t_buf *buf;
+ struct page_info *pg;
spin_lock_init(&per_cpu(t_lock, cpu));
+
+ offset = t_info->mfn_offset[cpu];
+
+ /* Initialize the buffer metadata */
+ per_cpu(t_bufs, cpu) = buf = mfn_to_virt(t_info_mfn_list[offset]);
+ buf->cons = buf->prod = 0;
+
+ printk(XENLOG_INFO "xentrace: p%d mfn %x offset %u\n",
+ cpu, t_info_mfn_list[offset], offset);
+
+ /* Now share the trace pages */
+ for ( i = 0; i < pages; i++ )
+ {
+ pg = mfn_to_page(t_info_mfn_list[offset + i]);
+ share_xen_page_with_privileged_guests(pg, XENSHARE_writable);
+ }
}
data_size = (pages * PAGE_SIZE - sizeof(struct t_buf));
@@ -255,14 +258,19 @@ static int alloc_trace_bufs(unsigned int
out_dealloc:
for_each_online_cpu(cpu)
{
- void *rawbuf = per_cpu(t_bufs, cpu);
- per_cpu(t_bufs, cpu) = NULL;
- if ( rawbuf )
+ offset = t_info->mfn_offset[cpu];
+ if ( !offset )
+ continue;
+ for ( i = 0; i < pages; i++ )
{
- ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
- free_xenheap_pages(rawbuf, order);
+ uint32_t mfn = t_info_mfn_list[offset + i];
+ if ( !mfn )
+ break;
+ ASSERT(!(mfn_to_page(mfn)->count_info & PGC_allocated));
+ free_xenheap_pages(mfn_to_virt(mfn), 0);
}
}
+out_dealloc_t_info:
free_xenheap_pages(t_info, get_order_from_pages(t_info_pages));
t_info = NULL;
printk(XENLOG_WARNING "xentrace: allocation failed! Tracing disabled.\n");
++++++ 23437-amd-fam15-TSC-scaling.patch ++++++
References: FATE#309901
# HG changeset patch
# User Wei Huang <wei.huang2@xxxxxxx>
# Date 1306569488 -3600
# Node ID d7c755c25bb9d6ed77d64cb6736b6c4f339db1bf
# Parent f6ce871e568949f5817470f6c7bab6ed1f8f6c13
HVM/SVM: enable tsc scaling ratio for SVM
Future AMD CPUs support TSC scaling. It allows guests to have a
different TSC frequency from host system using this formula: guest_tsc
= host_tsc * tsc_ratio + vmcb_offset. The tsc_ratio is a 64bit MSR
contains a fixed-point number in 8.32 format (8 bits for integer part
and 32bits for fractional part). For instance 0x00000003_80000000
means tsc_ratio=3.5.
This patch enables TSC scaling ratio for SVM. With it, guest VMs don't
need take #VMEXIT to calculate a translated TSC value when it is
running under TSC emulation mode. This can substancially reduce the
rdtsc overhead.
Signed-off-by: Wei Huang <wei.huang2@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/svm.c
@@ -588,6 +588,22 @@ static void svm_set_segment_register(str
static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ struct domain *d = v->domain;
+
+ /* Re-adjust the offset value when TSC_RATIO is available */
+ if ( cpu_has_tsc_ratio && d->arch.vtsc )
+ {
+ uint64_t host_tsc, guest_tsc;
+
+ rdtscll(host_tsc);
+ guest_tsc = hvm_get_guest_tsc(v);
+
+ /* calculate hi,lo parts in 64bits to prevent overflow */
+ offset = (((host_tsc >> 32) * d->arch.tsc_khz / cpu_khz) << 32) +
+ (host_tsc & 0xffffffffULL) * d->arch.tsc_khz / cpu_khz;
+ offset = guest_tsc - offset;
+ }
+
vmcb_set_tsc_offset(vmcb, offset);
}
@@ -638,6 +654,19 @@ static void svm_init_hypercall_page(stru
*(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
}
+static inline void svm_tsc_ratio_save(struct vcpu *v)
+{
+ /* Other vcpus might not have vtsc enabled. So disable TSC_RATIO here. */
+ if ( cpu_has_tsc_ratio && v->domain->arch.vtsc )
+ wrmsrl(MSR_AMD64_TSC_RATIO, DEFAULT_TSC_RATIO);
+}
+
+static inline void svm_tsc_ratio_load(struct vcpu *v)
+{
+ if ( cpu_has_tsc_ratio && v->domain->arch.vtsc )
+ wrmsrl(MSR_AMD64_TSC_RATIO, vcpu_tsc_ratio(v));
+}
+
static void svm_ctxt_switch_from(struct vcpu *v)
{
int cpu = smp_processor_id();
@@ -646,6 +675,7 @@ static void svm_ctxt_switch_from(struct
svm_save_dr(v);
vpmu_save(v);
+ svm_tsc_ratio_save(v);
svm_sync_vmcb(v);
svm_vmload(per_cpu(root_vmcb, cpu));
@@ -689,6 +719,7 @@ static void svm_ctxt_switch_to(struct vc
svm_vmload(vmcb);
vmcb->cleanbits.bytes = 0;
vpmu_load(v);
+ svm_tsc_ratio_load(v);
if ( cpu_has_rdtscp )
wrmsrl(MSR_TSC_AUX, hvm_msr_tsc_aux(v));
Index: xen-4.1.2-testing/xen/arch/x86/hvm/svm/vmcb.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/svm/vmcb.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/svm/vmcb.c
@@ -165,7 +165,9 @@ static int construct_vmcb(struct vcpu *v
/* TSC. */
vmcb->_tsc_offset = 0;
- if ( v->domain->arch.vtsc )
+
+ /* Don't need to intercept RDTSC if CPU supports TSC rate scaling */
+ if ( v->domain->arch.vtsc && !cpu_has_tsc_ratio )
{
vmcb->_general1_intercepts |= GENERAL1_INTERCEPT_RDTSC;
vmcb->_general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP;
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/svm.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/svm.h
@@ -91,5 +91,13 @@ extern u32 svm_feature_flags;
#define cpu_has_svm_cleanbits cpu_has_svm_feature(SVM_FEATURE_VMCBCLEAN)
#define cpu_has_svm_decode cpu_has_svm_feature(SVM_FEATURE_DECODEASSISTS)
#define cpu_has_pause_filter cpu_has_svm_feature(SVM_FEATURE_PAUSEFILTER)
+#define cpu_has_tsc_ratio cpu_has_svm_feature(SVM_FEATURE_TSCRATEMSR)
+
+/* TSC rate */
+#define DEFAULT_TSC_RATIO 0x0000000100000000ULL
+#define TSC_RATIO_RSVD_BITS 0xffffff0000000000ULL
+#define TSC_RATIO(g_khz, h_khz) ( (((u64)(g_khz)<<32)/(u64)(h_khz)) & \
+ ~TSC_RATIO_RSVD_BITS )
+#define vcpu_tsc_ratio(v) TSC_RATIO((v)->domain->arch.tsc_khz, cpu_khz)
#endif /* __ASM_X86_HVM_SVM_H__ */
Index: xen-4.1.2-testing/xen/include/asm-x86/msr-index.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/msr-index.h
+++ xen-4.1.2-testing/xen/include/asm-x86/msr-index.h
@@ -261,6 +261,9 @@
#define MSR_AMD_PATCHLEVEL 0x0000008b
#define MSR_AMD_PATCHLOADER 0xc0010020
+/* AMD TSC RATE MSR */
+#define MSR_AMD64_TSC_RATIO 0xc0000104
+
/* AMD OS Visible Workaround MSRs */
#define MSR_AMD_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD_OSVW_STATUS 0xc0010141
++++++ 23462-libxc-cpu-feature.patch ++++++
References: FATE#311951
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1307023264 -3600
# Node ID 4804af7048cafecfc014c30cfea374eb0a0360e8
# Parent 5839e797a1307fceffcd0b9ad35ed31644378b47
libxc: Simplify and clean up xc_cpufeature.h
* Remove Linux-private defns with no direct relation to CPUID
* Remove word offsets into Linux-defined cpu_caps array
* Hard tabs -> soft tabs
Signed-off-by: Keir Fraser <keir@xxxxxxx>
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1307118421 -3600
# Node ID bcd2476c2e2d00dc6371e52fbff66fe3178b7944
# Parent 55c5eff9bf84d4c5f3463c01f038edc1c46f30bc
libxc: Don't refer to meaningless 'word offsets' in xc_cpufeature.h
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/tools/libxc/xc_cpufeature.h
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_cpufeature.h
+++ xen-4.1.2-testing/tools/libxc/xc_cpufeature.h
@@ -17,134 +17,115 @@
#ifndef __LIBXC_CPUFEATURE_H
#define __LIBXC_CPUFEATURE_H
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers,
RDMSR, WRMSR */
-#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address
Extensions */
-#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture
*/
-#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture
*/
-#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and
FCOMI too if FPU present) */
-#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */
-#define X86_FEATURE_DS (0*32+21) /* Debug Store */
-#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
(fast save and restore */
- /* of FPU context), and CR4.OSFXSR
available */
-#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions
*/
-#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
-#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
-#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */
-#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx) */
+#define X86_FEATURE_FPU 0 /* Onboard FPU */
+#define X86_FEATURE_VME 1 /* Virtual Mode Extensions */
+#define X86_FEATURE_DE 2 /* Debugging Extensions */
+#define X86_FEATURE_PSE 3 /* Page Size Extensions */
+#define X86_FEATURE_TSC 4 /* Time Stamp Counter */
+#define X86_FEATURE_MSR 5 /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE 6 /* Physical Address Extensions */
+#define X86_FEATURE_MCE 7 /* Machine Check Architecture */
+#define X86_FEATURE_CX8 8 /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC 9 /* Onboard APIC */
+#define X86_FEATURE_SEP 11 /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR 12 /* Memory Type Range Registers */
+#define X86_FEATURE_PGE 13 /* Page Global Enable */
+#define X86_FEATURE_MCA 14 /* Machine Check Architecture */
+#define X86_FEATURE_CMOV 15 /* CMOV instruction */
+#define X86_FEATURE_PAT 16 /* Page Attribute Table */
+#define X86_FEATURE_PSE36 17 /* 36-bit PSEs */
+#define X86_FEATURE_PN 18 /* Processor serial number */
+#define X86_FEATURE_CLFLSH 19 /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DS 21 /* Debug Store */
+#define X86_FEATURE_ACPI 22 /* ACPI via MSR */
+#define X86_FEATURE_MMX 23 /* Multimedia Extensions */
+#define X86_FEATURE_FXSR 24 /* FXSAVE and FXRSTOR instructions */
+#define X86_FEATURE_XMM 25 /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2 26 /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP 27 /* CPU self snoop */
+#define X86_FEATURE_HT 28 /* Hyper-Threading */
+#define X86_FEATURE_ACC 29 /* Automatic clock control */
+#define X86_FEATURE_IA64 30 /* IA-64 processor */
+#define X86_FEATURE_PBE 31 /* Pending Break Enable */
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* AMD-defined CPU features, CPUID level 0x80000001 */
/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FFXSR (1*32+25) /* FFXSR instruction optimizations */
-#define X86_FEATURE_PAGE1GB (1*32+26) /* 1Gb large page support */
-#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */
-#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */
-#define X86_FEATURE_P3 (3*32+ 6) /* P3 */
-#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
-#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
-#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* Carry-less multiplication */
-#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */
-#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */
-#define X86_FEATURE_VMXE (4*32+ 5) /* Virtual Machine Extensions */
-#define X86_FEATURE_SMXE (4*32+ 6) /* Safer Mode Extensions */
-#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental Streaming SIMD
Extensions-3 */
-#define X86_FEATURE_CID (4*32+10) /* Context ID */
-#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capability MSR */
-#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */
-#define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */
-#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
-#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE (4*32+24) /* "tdt" TSC Deadline Timer */
-#define X86_FEATURE_AES (4*32+25) /* AES acceleration
instructions */
-#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions
*/
-#define X86_FEATURE_F16C (4*32+29) /* Half-precision convert instruction
*/
-#define X86_FEATURE_RDRAND (4*32+30) /* Digital Random Number Generator */
-#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running under some hypervisor */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
-#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */
-#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */
-#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE (5*32+ 10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN (5*32+ 11) /* PHE enabled */
-#define X86_FEATURE_PMM (5*32+ 12) /* PadLock Montgomery
Multiplier */
-#define X86_FEATURE_PMM_EN (5*32+ 13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE (7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
-#define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution
Protection */
-#define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_SYSCALL 11 /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP 19 /* MP Capable. */
+#define X86_FEATURE_NX 20 /* Execute Disable */
+#define X86_FEATURE_MMXEXT 22 /* AMD MMX extensions */
+#define X86_FEATURE_FFXSR 25 /* FFXSR instruction optimizations */
+#define X86_FEATURE_PAGE1GB 26 /* 1Gb large page support */
+#define X86_FEATURE_RDTSCP 27 /* RDTSCP */
+#define X86_FEATURE_LM 29 /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT 30 /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW 31 /* 3DNow! */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
+#define X86_FEATURE_XMM3 0 /* Streaming SIMD Extensions-3 */
+#define X86_FEATURE_PCLMULQDQ 1 /* Carry-less multiplication */
+#define X86_FEATURE_DTES64 2 /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT 3 /* Monitor/Mwait support */
+#define X86_FEATURE_DSCPL 4 /* CPL Qualified Debug Store */
+#define X86_FEATURE_VMXE 5 /* Virtual Machine Extensions */
+#define X86_FEATURE_SMXE 6 /* Safer Mode Extensions */
+#define X86_FEATURE_EST 7 /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 8 /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 9 /* Supplemental Streaming SIMD Exts-3 */
+#define X86_FEATURE_CID 10 /* Context ID */
+#define X86_FEATURE_CX16 13 /* CMPXCHG16B */
+#define X86_FEATURE_XTPR 14 /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM 15 /* Perf/Debug Capability MSR */
+#define X86_FEATURE_DCA 18 /* Direct Cache Access */
+#define X86_FEATURE_SSE4_1 19 /* Streaming SIMD Extensions 4.1 */
+#define X86_FEATURE_SSE4_2 20 /* Streaming SIMD Extensions 4.2 */
+#define X86_FEATURE_X2APIC 21 /* x2APIC */
+#define X86_FEATURE_POPCNT 23 /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE 24 /* "tdt" TSC Deadline Timer */
+#define X86_FEATURE_AES 25 /* AES acceleration instructions */
+#define X86_FEATURE_XSAVE 26 /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_AVX 28 /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C 29 /* Half-precision convert instruction */
+#define X86_FEATURE_RDRAND 30 /* Digital Random Number Generator */
+#define X86_FEATURE_HYPERVISOR 31 /* Running under some hypervisor */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001 */
+#define X86_FEATURE_XSTORE 2 /* on-CPU RNG present (xstore insn) */
+#define X86_FEATURE_XSTORE_EN 3 /* on-CPU RNG enabled */
+#define X86_FEATURE_XCRYPT 6 /* on-CPU crypto (xcrypt insn) */
+#define X86_FEATURE_XCRYPT_EN 7 /* on-CPU crypto enabled */
+#define X86_FEATURE_ACE2 8 /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN 9 /* ACE v2 enabled */
+#define X86_FEATURE_PHE 10 /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN 11 /* PHE enabled */
+#define X86_FEATURE_PMM 12 /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN 13 /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx */
+#define X86_FEATURE_LAHF_LM 0 /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY 1 /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM 2 /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC 3 /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY 4 /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM 5 /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A 6 /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE 7 /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH 8 /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW 9 /* OS Visible Workaround */
+#define X86_FEATURE_IBS 10 /* Instruction Based Sampling */
+#define X86_FEATURE_XOP 11 /* extended AVX instructions */
+#define X86_FEATURE_SKINIT 12 /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT 13 /* Watchdog timer */
+#define X86_FEATURE_LWP 15 /* Light Weight Profiling */
+#define X86_FEATURE_FMA4 16 /* 4 operands MAC instructions */
+#define X86_FEATURE_NODEID_MSR 19 /* NodeId MSR */
+#define X86_FEATURE_TBM 21 /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT 22 /* topology extensions CPUID leafs */
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
+#define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */
#endif /* __LIBXC_CPUFEATURE_H */
Index: xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_cpuid_x86.c
+++ xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c
@@ -25,9 +25,9 @@
#include "xc_cpufeature.h"
#include <xen/hvm/params.h>
-#define bitmaskof(idx) (1u << ((idx) & 31))
-#define clear_bit(idx, dst) ((dst) &= ~(1u << ((idx) & 31)))
-#define set_bit(idx, dst) ((dst) |= (1u << ((idx) & 31)))
+#define bitmaskof(idx) (1u << (idx))
+#define clear_bit(idx, dst) ((dst) &= ~(1u << (idx)))
+#define set_bit(idx, dst) ((dst) |= (1u << (idx)))
#define DEF_MAX_BASE 0x0000000du
#define DEF_MAX_EXT 0x80000008u
++++++ 23506-x86_Disable_set_gpfn_from_mfn_until_m2p_table_is_allocated..patch
++++++
changeset: 23506:d1309a79bde8
user: Keir Fraser <keir@xxxxxxx>
date: Fri Jun 10 08:18:33 2011 +0100
files: xen/arch/x86/x86_64/mm.c xen/include/asm-x86/mm.h
description:
x86: Disable set_gpfn_from_mfn until m2p table is allocated.
This is a prerequisite for calling set_gpfn_from_mfn() unconditionally
from free_heap_pages().
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/x86_64/mm.c | 4 ++++
xen/include/asm-x86/mm.h | 15 +++++++++++++--
2 files changed, 17 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
@@ -47,6 +47,8 @@ unsigned int __read_mostly pfn_pdx_hole_
unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
+bool_t __read_mostly machine_to_phys_mapping_valid = 0;
+
/* Top-level master (and idle-domain) page directory. */
l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
idle_pg_table[L4_PAGETABLE_ENTRIES];
@@ -800,6 +802,8 @@ void __init paging_init(void)
#undef CNT
#undef MFN
+ machine_to_phys_mapping_valid = 1;
+
/* Set up linear page table mapping. */
l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)],
l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR));
Index: xen-4.1.2-testing/xen/include/asm-x86/mm.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/mm.h
+++ xen-4.1.2-testing/xen/include/asm-x86/mm.h
@@ -469,7 +469,7 @@ TYPE_SAFE(unsigned long,mfn);
#ifdef CONFIG_COMPAT
#define compat_machine_to_phys_mapping ((unsigned int
*)RDWR_COMPAT_MPT_VIRT_START)
-#define set_gpfn_from_mfn(mfn, pfn) ({ \
+#define _set_gpfn_from_mfn(mfn, pfn) ({ \
struct domain *d = page_get_owner(__mfn_to_page(mfn)); \
unsigned long entry = (d && (d == dom_cow)) ? \
SHARED_M2P_ENTRY : (pfn); \
@@ -478,7 +478,7 @@ TYPE_SAFE(unsigned long,mfn);
machine_to_phys_mapping[(mfn)] = (entry)); \
})
#else
-#define set_gpfn_from_mfn(mfn, pfn) ({ \
+#define _set_gpfn_from_mfn(mfn, pfn) ({ \
struct domain *d = page_get_owner(__mfn_to_page(mfn)); \
if(d && (d == dom_cow)) \
machine_to_phys_mapping[(mfn)] = SHARED_M2P_ENTRY; \
@@ -486,6 +486,17 @@ TYPE_SAFE(unsigned long,mfn);
machine_to_phys_mapping[(mfn)] = (pfn); \
})
#endif
+
+/*
+ * Disable some users of set_gpfn_from_mfn() (e.g., free_heap_pages()) until
+ * the machine_to_phys_mapping is actually set up.
+ */
+extern bool_t machine_to_phys_mapping_valid;
+#define set_gpfn_from_mfn(mfn, pfn) do { \
+ if ( machine_to_phys_mapping_valid ) \
+ _set_gpfn_from_mfn(mfn, pfn); \
+} while (0)
+
#define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
#define mfn_to_gmfn(_d, mfn) \
++++++
23507-xenpaging_update_machine_to_phys_mapping_during_page_deallocation.patch
++++++
changeset: 23507:0a29c8c3ddf7
user: Keir Fraser <keir@xxxxxxx>
date: Fri Jun 10 08:19:07 2011 +0100
files: xen/common/page_alloc.c
description:
xenpaging: update machine_to_phys_mapping[] during page deallocation
The machine_to_phys_mapping[] array needs updating during page
deallocation. If that page is allocated again, a call to
get_gpfn_from_mfn() will still return an old gfn from another guest.
This will cause trouble because this gfn number has no or different
meaning in the context of the current guest.
This happens when the entire guest ram is paged-out before
xen_vga_populate_vram() runs. Then XENMEM_populate_physmap is called
with gfn 0xff000. A new page is allocated with alloc_domheap_pages.
This new page does not have a gfn yet. However, in
guest_physmap_add_entry() the passed mfn maps still to an old gfn
(perhaps from another old guest). This old gfn is in paged-out state
in this guests context and has no mfn anymore. As a result, the
ASSERT() triggers because p2m_is_ram() is true for p2m_ram_paging*
types. If the machine_to_phys_mapping[] array is updated properly,
both loops in guest_physmap_add_entry() turn into no-ops for the new
page and the mfn/gfn mapping will be done at the end of the function.
If XENMEM_add_to_physmap is used with XENMAPSPACE_gmfn,
get_gpfn_from_mfn() will return an appearently valid gfn. As a
result, guest_physmap_remove_page() is called. The ASSERT in
p2m_remove_page triggers because the passed mfn does not match the old
mfn for the passed gfn.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/page_alloc.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/common/page_alloc.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/page_alloc.c
+++ xen-4.1.2-testing/xen/common/page_alloc.c
@@ -528,7 +528,7 @@ static int reserve_offlined_page(struct
static void free_heap_pages(
struct page_info *pg, unsigned int order)
{
- unsigned long mask;
+ unsigned long mask, mfn = page_to_mfn(pg);
unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
unsigned int zone = page_to_zone(pg);
@@ -539,6 +539,10 @@ static void free_heap_pages(
for ( i = 0; i < (1 << order); i++ )
{
+ /* This page is not a guest frame any more. */
+ page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
+ set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY);
+
/*
* Cannot assume that count_info == 0, as there are some corner cases
* where it isn't the case and yet it isn't a bug:
++++++ 23508-vmx-proc-based-ctls-probe.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1307691167 -3600
# Node ID 2ef6bbee50371e1135236035ed1a9a7b8748e09f
# Parent 0a29c8c3ddf7395ea8e68c5f4cd8633023490022
x86/vmx: Small fixes to MSR_IA32_VMX_PROCBASED_CTLS feature probing.
Should check for VIRTUAL_INTR_PENDING as we unconditionally make use
of it. Also check for CR8 exiting unconditionally on x86/64, as this
is of use to nestedvmx, and every 64-bit cpu should support it.
Signed-off-by: Eddie Dong <eddie.dong@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/hvm/vmx/vmcs.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/vmx/vmcs.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/vmx/vmcs.c
@@ -148,6 +148,11 @@ static int vmx_init_vmcs_config(void)
MSR_IA32_VMX_PINBASED_CTLS, &mismatch);
min = (CPU_BASED_HLT_EXITING |
+ CPU_BASED_VIRTUAL_INTR_PENDING |
+#ifdef __x86_64__
+ CPU_BASED_CR8_LOAD_EXITING |
+ CPU_BASED_CR8_STORE_EXITING |
+#endif
CPU_BASED_INVLPG_EXITING |
CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING |
@@ -166,13 +171,9 @@ static int vmx_init_vmcs_config(void)
MSR_IA32_VMX_PROCBASED_CTLS, &mismatch);
_vmx_cpu_based_exec_control &= ~CPU_BASED_RDTSC_EXITING;
#ifdef __x86_64__
- if ( !(_vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) )
- {
- min |= CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING;
- _vmx_cpu_based_exec_control = adjust_vmx_controls(
- "CPU-Based Exec Control", min, opt,
- MSR_IA32_VMX_PROCBASED_CTLS, &mismatch);
- }
+ if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
+ _vmx_cpu_based_exec_control &=
+ ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
#endif
if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
++++++ 23509-x86_32_Fix_build_Define_machine_to_phys_mapping_valid.patch ++++++
changeset: 23509:782bc7b2661a
user: Keir Fraser <keir@xxxxxxx>
date: Fri Jun 10 13:51:39 2011 +0100
files: xen/arch/x86/x86_32/mm.c
description:
x86_32: Fix build: Define machine_to_phys_mapping_valid
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/x86_32/mm.c | 4 ++++
1 file changed, 4 insertions(+)
Index: xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_32/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
@@ -39,6 +39,8 @@ extern l1_pgentry_t l1_identmap[L1_PAGET
unsigned int __read_mostly PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
unsigned int __read_mostly PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
+bool_t __read_mostly machine_to_phys_mapping_valid = 0;
+
static unsigned long __read_mostly mpt_size;
void *alloc_xen_pagetable(void)
@@ -123,6 +125,8 @@ void __init paging_init(void)
#undef CNT
#undef MFN
+ machine_to_phys_mapping_valid = 1;
+
/* Create page tables for ioremap()/map_domain_page_global(). */
for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
{
++++++ 23511-amd-fam15-no-flush-for-C3.patch ++++++
References: FATE#309893
# HG changeset patch
# User Mark Langsdorf <mark.langsdorf@xxxxxxx>
# Date 1308051989 -3600
# Node ID 450f1d198e1e299b69489d513f591f0301cc5166
# Parent 864a3dd1d9b4664f1ece44c9eaf390969253b7a8
x86/amd: Eliminate cache flushing when entering C3 on select AMD processors
AMD Fam15h processors have a shared cache. It does not need
to be be flushed when entering C3 and doing so causes reduces
performance. Modify acpi_processor_power_init_bm_check to
prevent these processors from flushing when entering C3.
Signed-off-by: Mark Langsdorf <mark.langsdorf@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/acpi/cpu_idle.c
+++ xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
@@ -710,7 +710,8 @@ static void acpi_processor_power_init_bm
flags->bm_check = 0;
if ( num_online_cpus() == 1 )
flags->bm_check = 1;
- else if ( c->x86_vendor == X86_VENDOR_INTEL )
+ else if ( (c->x86_vendor == X86_VENDOR_INTEL) ||
+ ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 0x15)) )
{
/*
* Today all MP CPUs that support C3 share cache.
++++++ 23562-xenpaging_remove_unused_spinlock_in_pager.patch ++++++
changeset: 23562:8a7f52c59d64
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:02 2011 +0200
files: tools/xenpaging/mem_event.h tools/xenpaging/spinlock.h
tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: remove unused spinlock in pager
The spinlock code in the pager is a no-op because xenpaging is a single
threaded application. There is no locking when put_response() places a
response into the ringbuffer.
The only locking is inside the hypervisor, where mem_event_put_request() and
mem_event_get_response() lock the ringbuffer to protect multiple vcpus from
each other.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/mem_event.h | 5 ---
tools/xenpaging/spinlock.h | 69 --------------------------------------------
tools/xenpaging/xenpaging.c | 12 -------
tools/xenpaging/xenpaging.h | 1
4 files changed, 87 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/mem_event.h
+++ xen-4.1.2-testing/tools/xenpaging/mem_event.h
@@ -25,7 +25,6 @@
#define __XEN_MEM_EVENT_H__
-#include "spinlock.h"
#include "xc.h"
#include <xc_private.h>
@@ -33,9 +32,6 @@
#include <xen/mem_event.h>
-#define mem_event_ring_lock_init(_m) spin_lock_init(&(_m)->ring_lock)
-#define mem_event_ring_lock(_m) spin_lock(&(_m)->ring_lock)
-#define mem_event_ring_unlock(_m) spin_unlock(&(_m)->ring_lock)
typedef struct mem_event {
@@ -45,7 +41,6 @@ typedef struct mem_event {
mem_event_back_ring_t back_ring;
mem_event_shared_page_t *shared_page;
void *ring_page;
- spinlock_t ring_lock;
} mem_event_t;
Index: xen-4.1.2-testing/tools/xenpaging/spinlock.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/spinlock.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/******************************************************************************
- * tools/xenpaging/spinlock.h
- *
- * Spinlock implementation.
- *
- * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#ifndef __SPINLOCK_H__
-#define __SPINLOCK_H__
-
-
-#include "bitops.h"
-
-
-#define SPIN_LOCK_UNLOCKED 0
-
-
-typedef int spinlock_t;
-
-
-static inline void spin_lock(spinlock_t *lock)
-{
- while ( test_and_set_bit(1, lock) );
-}
-
-static inline void spin_lock_init(spinlock_t *lock)
-{
- *lock = SPIN_LOCK_UNLOCKED;
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
- *lock = SPIN_LOCK_UNLOCKED;
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
- return !test_and_set_bit(1, lock);
-}
-
-
-#endif // __SPINLOCK_H__
-
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -32,7 +32,6 @@
#include <xen/mem_event.h>
#include "bitops.h"
-#include "spinlock.h"
#include "file_ops.h"
#include "xc.h"
@@ -127,9 +126,6 @@ static xenpaging_t *xenpaging_init(domid
BACK_RING_INIT(&paging->mem_event.back_ring,
(mem_event_sring_t *)paging->mem_event.ring_page,
PAGE_SIZE);
-
- /* Initialise lock */
- mem_event_ring_lock_init(&paging->mem_event);
/* Initialise Xen */
rc = xc_mem_event_enable(xch, paging->mem_event.domain_id,
@@ -302,8 +298,6 @@ static int get_request(mem_event_t *mem_
mem_event_back_ring_t *back_ring;
RING_IDX req_cons;
- mem_event_ring_lock(mem_event);
-
back_ring = &mem_event->back_ring;
req_cons = back_ring->req_cons;
@@ -315,8 +309,6 @@ static int get_request(mem_event_t *mem_
back_ring->req_cons = req_cons;
back_ring->sring->req_event = req_cons + 1;
- mem_event_ring_unlock(mem_event);
-
return 0;
}
@@ -325,8 +317,6 @@ static int put_response(mem_event_t *mem
mem_event_back_ring_t *back_ring;
RING_IDX rsp_prod;
- mem_event_ring_lock(mem_event);
-
back_ring = &mem_event->back_ring;
rsp_prod = back_ring->rsp_prod_pvt;
@@ -338,8 +328,6 @@ static int put_response(mem_event_t *mem
back_ring->rsp_prod_pvt = rsp_prod;
RING_PUSH_RESPONSES(back_ring);
- mem_event_ring_unlock(mem_event);
-
return 0;
}
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -25,7 +25,6 @@
#define __XEN_PAGING2_H__
-#include "spinlock.h"
#include "xc.h"
#include <xc_private.h>
++++++ 23571-vtd-fault-verbosity.patch ++++++
# HG changeset patch
# User Allen Kay <allen.m.kay@xxxxxxxxx>
# Date 1308823884 -3600
# Node ID d3ac71f22e8621d9a7604f82f3976337e6c97a9a
# Parent 065ca14be963fe4da55d629ed0b3692a14253a86
[VTD] print out debug message in vt-d fault handler only when iommu=debug is set
Print out debug messages in vtd_page_fault() handler only when
iommu=debug is set xen boot parameter.
Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx>
Index: xen-4.1.2-testing/xen/drivers/passthrough/amd/iommu_acpi.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/amd/iommu_acpi.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -822,7 +822,7 @@ static int __init parse_ivrs_table(struc
BUG_ON(!table);
- if ( amd_iommu_debug )
+ if ( iommu_debug )
dump_acpi_table_header(table);
/* parse IVRS blocks */
Index: xen-4.1.2-testing/xen/drivers/passthrough/iommu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/iommu.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/iommu.c
@@ -48,7 +48,7 @@ bool_t __read_mostly iommu_snoop = 1;
bool_t __read_mostly iommu_qinval = 1;
bool_t __read_mostly iommu_intremap = 1;
bool_t __read_mostly iommu_hap_pt_share;
-bool_t __read_mostly amd_iommu_debug;
+bool_t __read_mostly iommu_debug;
bool_t __read_mostly amd_iommu_perdev_intremap;
static void __init parse_iommu_param(char *s)
@@ -74,8 +74,8 @@ static void __init parse_iommu_param(cha
iommu_qinval = 0;
else if ( !strcmp(s, "no-intremap") )
iommu_intremap = 0;
- else if ( !strcmp(s, "amd-iommu-debug") )
- amd_iommu_debug = 1;
+ else if ( !strcmp(s, "debug") )
+ iommu_debug = 1;
else if ( !strcmp(s, "amd-iommu-perdev-intremap") )
amd_iommu_perdev_intremap = 1;
else if ( !strcmp(s, "dom0-passthrough") )
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/iommu.c
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.c
@@ -821,7 +821,7 @@ static int iommu_page_fault_do_one(struc
if ( fault_type == DMA_REMAP )
{
- dprintk(XENLOG_WARNING VTDPREFIX,
+ INTEL_IOMMU_DEBUG(
"DMAR:[%s] Request device [%02x:%02x.%d] "
"fault addr %"PRIx64", iommu reg = %p\n"
"DMAR:[fault reason %02xh] %s\n",
@@ -830,12 +830,13 @@ static int iommu_page_fault_do_one(struc
PCI_FUNC(source_id & 0xFF), addr, iommu->reg,
fault_reason, reason);
#ifndef __i386__ /* map_domain_page() cannot be used in this context */
- print_vtd_entries(iommu, (source_id >> 8),
+ if (iommu_debug)
+ print_vtd_entries(iommu, (source_id >> 8),
(source_id & 0xff), (addr >> PAGE_SHIFT));
#endif
}
else
- dprintk(XENLOG_WARNING VTDPREFIX,
+ INTEL_IOMMU_DEBUG(
"INTR-REMAP: Request device [%02x:%02x.%d] "
"fault index %"PRIx64", iommu reg = %p\n"
"INTR-REMAP:[fault reason %02xh] %s\n",
@@ -849,26 +850,19 @@ static int iommu_page_fault_do_one(struc
static void iommu_fault_status(u32 fault_status)
{
if ( fault_status & DMA_FSTS_PFO )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Fault Overflow\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Fault Overflow\n");
if ( fault_status & DMA_FSTS_PPF )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Primary Pending Fault\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Primary Pending Fault\n");
if ( fault_status & DMA_FSTS_AFO )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Advanced Fault Overflow\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Advanced Fault Overflow\n");
if ( fault_status & DMA_FSTS_APF )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Advanced Pending Fault\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Advanced Pending Fault\n");
if ( fault_status & DMA_FSTS_IQE )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Invalidation Queue Error\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Queue Error\n");
if ( fault_status & DMA_FSTS_ICE )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Invalidation Completion Error\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Completion
Error\n");
if ( fault_status & DMA_FSTS_ITE )
- dprintk(XENLOG_ERR VTDPREFIX,
- "iommu_fault_status: Invalidation Time-out Error\n");
+ INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Time-out Error\n");
}
#define PRIMARY_FAULT_REG_LEN (16)
Index: xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.h
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/passthrough/vtd/iommu.h
+++ xen-4.1.2-testing/xen/drivers/passthrough/vtd/iommu.h
@@ -512,4 +512,11 @@ struct intel_iommu {
struct acpi_drhd_unit *drhd;
};
+#define INTEL_IOMMU_DEBUG(fmt, args...) \
+ do \
+ { \
+ if ( iommu_debug ) \
+ dprintk(XENLOG_WARNING VTDPREFIX, fmt, ## args); \
+ } while(0)
+
#endif
Index: xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -34,7 +34,7 @@
#define AMD_IOMMU_DEBUG(fmt, args...) \
do \
{ \
- if ( amd_iommu_debug ) \
+ if ( iommu_debug ) \
printk(XENLOG_INFO "AMD-Vi: " fmt, ## args); \
} while(0)
Index: xen-4.1.2-testing/xen/include/xen/iommu.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/iommu.h
+++ xen-4.1.2-testing/xen/include/xen/iommu.h
@@ -31,7 +31,7 @@ extern bool_t force_iommu, iommu_verbose
extern bool_t iommu_workaround_bios_bug, iommu_passthrough;
extern bool_t iommu_snoop, iommu_qinval, iommu_intremap;
extern bool_t iommu_hap_pt_share;
-extern bool_t amd_iommu_debug;
+extern bool_t iommu_debug;
extern bool_t amd_iommu_perdev_intremap;
/* Does this domain have a P2M table we can use as its IOMMU pagetable? */
++++++ 23574-x86-dom0-compressed-ELF.patch ++++++
References: fate#311376, fate#311529, bnc#578927, bnc#628554
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1308825237 -3600
# Node ID d7644abc218d3232b9d957ce94fc4b4bcc1f456e
# Parent 584c2e5e03d96f912cdfe90f8e9f910d5d661706
x86: allow Dom0 image to be compressed ELF
Rather than being able to decompress only the payloads of bzImage
containers, extend the logic to also decompress simple compressed ELF
images. At once, allow uncompressed bzImage payloads.
This is a prerequisite for native EFI booting support (where, in the
absence of a capable secondary boot loader, the image will always be
in compressed form).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/bzimage.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/bzimage.c
+++ xen-4.1.2-testing/xen/arch/x86/bzimage.c
@@ -5,6 +5,7 @@
#include <xen/string.h>
#include <xen/types.h>
#include <xen/decompress.h>
+#include <xen/libelf.h>
#include <asm/bzimage.h>
#define HEAPORDER 3
@@ -199,25 +200,36 @@ static __init int bzimage_check(struct s
return 1;
}
-int __init bzimage_headroom(char *image_start, unsigned long image_length)
+static unsigned long __initdata orig_image_len;
+
+unsigned long __init bzimage_headroom(char *image_start,
+ unsigned long image_length)
{
struct setup_header *hdr = (struct setup_header *)image_start;
- char *img;
- int err, headroom;
+ int err;
+ unsigned long headroom;
err = bzimage_check(hdr, image_length);
- if (err < 1)
+ if ( err < 0 )
return 0;
- img = image_start + (hdr->setup_sects+1) * 512;
- img += hdr->payload_offset;
+ if ( err > 0 )
+ {
+ image_start += (hdr->setup_sects + 1) * 512 + hdr->payload_offset;
+ image_length = hdr->payload_length;
+ }
+
+ if ( elf_is_elfbinary(image_start) )
+ return 0;
- headroom = output_length(img, hdr->payload_length);
- if (gzip_check(img, hdr->payload_length)) {
+ orig_image_len = image_length;
+ headroom = output_length(image_start, image_length);
+ if (gzip_check(image_start, image_length))
+ {
headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */
headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */
} else
- headroom += hdr->payload_length;
+ headroom += image_length;
headroom = (headroom + 4095) & ~4095;
return headroom;
@@ -229,18 +241,24 @@ int __init bzimage_parse(char *image_bas
int err = bzimage_check(hdr, *image_len);
unsigned long output_len;
- if (err < 1)
+ if ( err < 0 )
return err;
+ if ( err > 0 )
+ {
+ *image_start += (hdr->setup_sects + 1) * 512 + hdr->payload_offset;
+ *image_len = hdr->payload_length;
+ }
+
+ if ( elf_is_elfbinary(*image_start) )
+ return 0;
+
BUG_ON(!(image_base < *image_start));
- *image_start += (hdr->setup_sects+1) * 512;
- *image_start += hdr->payload_offset;
- *image_len = hdr->payload_length;
- output_len = output_length(*image_start, *image_len);
+ output_len = output_length(*image_start, orig_image_len);
- if ( (err = perform_gunzip(image_base, *image_start, *image_len)) > 0 )
- err = decompress(*image_start, *image_len, image_base);
+ if ( (err = perform_gunzip(image_base, *image_start, orig_image_len)) > 0 )
+ err = decompress(*image_start, orig_image_len, image_base);
if ( !err )
{
Index: xen-4.1.2-testing/xen/include/asm-x86/bzimage.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/bzimage.h
+++ xen-4.1.2-testing/xen/include/asm-x86/bzimage.h
@@ -4,10 +4,9 @@
#include <xen/config.h>
#include <xen/init.h>
-int __init bzimage_headroom(char *image_start, unsigned long image_length);
+unsigned long bzimage_headroom(char *image_start, unsigned long image_length);
-int __init bzimage_parse(char *image_base,
- char **image_start,
- unsigned long *image_len);
+int bzimage_parse(char *image_base, char **image_start,
+ unsigned long *image_len);
#endif /* __X86_BZIMAGE_H__ */
++++++ 23575-x86-DMI.patch ++++++
References: fate#311376, fate#311529, bnc#578927, bnc#628554
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1308825280 -3600
# Node ID 4d9598a6a7777c50e109d7e2eb6d1cb28bcb4509
# Parent d7644abc218d3232b9d957ce94fc4b4bcc1f456e
x86/DMI: use proper structures instead of byte offsets
Besides being (in my eyes) desirable cleanup, this at once represents
another prerequisite for native EFI booting support.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/dmi_scan.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/dmi_scan.c
+++ xen-4.1.2-testing/xen/arch/x86/dmi_scan.c
@@ -10,11 +10,31 @@
#include <asm/system.h>
#include <xen/dmi.h>
-#define bt_ioremap(b,l) ((u8 *)__acpi_map_table(b,l))
+#define bt_ioremap(b,l) ((void *)__acpi_map_table(b,l))
#define bt_iounmap(b,l) ((void)0)
#define memcpy_fromio memcpy
#define alloc_bootmem(l) xmalloc_bytes(l)
+struct dmi_eps {
+ char anchor[5]; /* "_DMI_" */
+ u8 checksum;
+ u16 size;
+ u32 address;
+ u16 num_structures;
+ u8 revision;
+} __attribute__((packed));
+
+struct smbios_eps {
+ char anchor[4]; /* "_SM_" */
+ u8 checksum;
+ u8 length;
+ u8 major, minor;
+ u16 max_size;
+ u8 revision;
+ u8 _rsrvd_[5];
+ struct dmi_eps dmi;
+} __attribute__((packed));
+
struct dmi_header
{
u8 type;
@@ -90,62 +110,70 @@ static int __init dmi_table(u32 base, in
}
-inline static int __init dmi_checksum(u8 *buf)
+static inline bool_t __init dmi_checksum(const void __iomem *buf,
+ unsigned int len)
{
- u8 sum=0;
- int a;
+ u8 sum = 0;
+ const u8 *p = buf;
+ unsigned int a;
- for(a=0; a<15; a++)
- sum+=buf[a];
- return (sum==0);
+ for (a = 0; a < len; a++)
+ sum += p[a];
+ return sum == 0;
}
int __init dmi_get_table(u32 *base, u32 *len)
{
- u8 buf[15];
+ struct dmi_eps eps;
char __iomem *p, *q;
p = maddr_to_virt(0xF0000);
for (q = p; q < p + 0x10000; q += 16) {
- memcpy_fromio(buf, q, 15);
- if (memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf)) {
- *base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
- *len=buf[7]<<8|buf[6];
+ memcpy_fromio(&eps, q, 15);
+ if (memcmp(eps.anchor, "_DMI_", 5) == 0 &&
+ dmi_checksum(&eps, sizeof(eps))) {
+ *base = eps.address;
+ *len = eps.size;
return 0;
}
}
return -1;
}
+static int __init _dmi_iterate(const struct dmi_eps *dmi,
+ const struct smbios_eps __iomem *smbios,
+ void (*decode)(struct dmi_header *))
+{
+ u16 num = dmi->num_structures;
+ u16 len = dmi->size;
+ u32 base = dmi->address;
+
+ /*
+ * DMI version 0.0 means that the real version is taken from
+ * the SMBIOS version, which we may not know at this point.
+ */
+ if (dmi->revision)
+ printk(KERN_INFO "DMI %d.%d present.\n",
+ dmi->revision >> 4, dmi->revision & 0x0f);
+ else if (!smbios)
+ printk(KERN_INFO "DMI present.\n");
+ dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
+ num, len));
+ dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base));
+ return dmi_table(base, len, num, decode);
+}
+
static int __init dmi_iterate(void (*decode)(struct dmi_header *))
{
- u8 buf[15];
+ struct dmi_eps eps;
char __iomem *p, *q;
p = maddr_to_virt(0xF0000);
for (q = p; q < p + 0x10000; q += 16) {
- memcpy_fromio(buf, q, 15);
- if (memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf)) {
- u16 num=buf[13]<<8|buf[12];
- u16 len=buf[7]<<8|buf[6];
- u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
-
- /*
- * DMI version 0.0 means that the real version is taken
from
- * the SMBIOS version, which we don't know at this
point.
- */
- if(buf[14]!=0)
- printk(KERN_INFO "DMI %d.%d present.\n",
- buf[14]>>4, buf[14]&0x0F);
- else
- printk(KERN_INFO "DMI present.\n");
- dmi_printk((KERN_INFO "%d structures occupying %d
bytes.\n",
- num, len));
- dmi_printk((KERN_INFO "DMI table at 0x%08X.\n",
- base));
- if(dmi_table(base,len, num, decode)==0)
- return 0;
- }
+ memcpy_fromio(&eps, q, sizeof(eps));
+ if (memcmp(eps.anchor, "_DMI_", 5) == 0 &&
+ dmi_checksum(&eps, sizeof(eps)))
+ return _dmi_iterate(&eps, NULL, decode);
}
return -1;
}
++++++ 23576-x86_show_page_walk_also_for_early_page_faults.patch ++++++
changeset: 23576:e2235fe267eb
user: Jan Beulich <jbeulich@xxxxxxxxxx>
date: Thu Jun 23 11:35:55 2011 +0100
files: xen/arch/x86/mm.c xen/arch/x86/traps.c xen/arch/x86/x86_32/mm.c
xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/traps.c
description:
x86: show page walk also for early page faults
At once, move the common (between 32- and 64-bit) definition of
machine_to_phys_mapping_valid to a common location.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
xen/arch/x86/mm.c | 2 ++
xen/arch/x86/traps.c | 1 +
xen/arch/x86/x86_32/mm.c | 2 --
xen/arch/x86/x86_32/traps.c | 9 ++++++---
xen/arch/x86/x86_64/mm.c | 2 --
xen/arch/x86/x86_64/traps.c | 12 ++++++++----
6 files changed, 17 insertions(+), 11 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -151,6 +151,8 @@ unsigned long __read_mostly pdx_group_va
(FRAMETABLE_SIZE / sizeof(*frame_table) + PDX_GROUP_COUNT - 1)
/ PDX_GROUP_COUNT)] = { [0] = 1 };
+bool_t __read_mostly machine_to_phys_mapping_valid = 0;
+
#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
bool_t __read_mostly opt_allow_superpage;
Index: xen-4.1.2-testing/xen/arch/x86/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/traps.c
@@ -1428,6 +1428,7 @@ asmlinkage void __init do_early_page_fau
unsigned long *stk = (unsigned long *)regs;
printk("Early fatal page fault at %04x:%p (cr2=%p, ec=%04x)\n",
regs->cs, _p(regs->eip), _p(cr2), regs->error_code);
+ show_page_walk(cr2);
printk("Stack dump: ");
while ( ((long)stk & ((PAGE_SIZE - 1) & ~(BYTES_PER_LONG - 1))) != 0 )
printk("%p ", _p(*stk++));
Index: xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_32/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
@@ -39,8 +39,6 @@ extern l1_pgentry_t l1_identmap[L1_PAGET
unsigned int __read_mostly PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
unsigned int __read_mostly PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
-bool_t __read_mostly machine_to_phys_mapping_valid = 0;
-
static unsigned long __read_mostly mpt_size;
void *alloc_xen_pagetable(void)
Index: xen-4.1.2-testing/xen/arch/x86/x86_32/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_32/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_32/traps.c
@@ -164,7 +164,8 @@ void show_page_walk(unsigned long addr)
l3t += (cr3 & 0xFE0UL) >> 3;
l3e = l3t[l3_table_offset(addr)];
mfn = l3e_get_pfn(l3e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L3[0x%03lx] = %"PRIpte" %08lx\n",
l3_table_offset(addr), l3e_get_intpte(l3e), pfn);
unmap_domain_page(l3t);
@@ -175,7 +176,8 @@ void show_page_walk(unsigned long addr)
l2t = map_domain_page(mfn);
l2e = l2t[l2_table_offset(addr)];
mfn = l2e_get_pfn(l2e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L2[0x%03lx] = %"PRIpte" %08lx %s\n",
l2_table_offset(addr), l2e_get_intpte(l2e), pfn,
(l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
@@ -188,7 +190,8 @@ void show_page_walk(unsigned long addr)
l1t = map_domain_page(mfn);
l1e = l1t[l1_table_offset(addr)];
mfn = l1e_get_pfn(l1e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L1[0x%03lx] = %"PRIpte" %08lx\n",
l1_table_offset(addr), l1e_get_intpte(l1e), pfn);
unmap_domain_page(l1t);
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
@@ -47,8 +47,6 @@ unsigned int __read_mostly pfn_pdx_hole_
unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
-bool_t __read_mostly machine_to_phys_mapping_valid = 0;
-
/* Top-level master (and idle-domain) page directory. */
l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
idle_pg_table[L4_PAGETABLE_ENTRIES];
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/traps.c
@@ -176,7 +176,8 @@ void show_page_walk(unsigned long addr)
l4t = mfn_to_virt(mfn);
l4e = l4t[l4_table_offset(addr)];
mfn = l4e_get_pfn(l4e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L4[0x%03lx] = %"PRIpte" %016lx\n",
l4_table_offset(addr), l4e_get_intpte(l4e), pfn);
if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ||
@@ -186,7 +187,8 @@ void show_page_walk(unsigned long addr)
l3t = mfn_to_virt(mfn);
l3e = l3t[l3_table_offset(addr)];
mfn = l3e_get_pfn(l3e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n",
l3_table_offset(addr), l3e_get_intpte(l3e), pfn,
(l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : "");
@@ -198,7 +200,8 @@ void show_page_walk(unsigned long addr)
l2t = mfn_to_virt(mfn);
l2e = l2t[l2_table_offset(addr)];
mfn = l2e_get_pfn(l2e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L2[0x%03lx] = %"PRIpte" %016lx %s\n",
l2_table_offset(addr), l2e_get_intpte(l2e), pfn,
(l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
@@ -210,7 +213,8 @@ void show_page_walk(unsigned long addr)
l1t = mfn_to_virt(mfn);
l1e = l1t[l1_table_offset(addr)];
mfn = l1e_get_pfn(l1e);
- pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+ pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+ get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
printk(" L1[0x%03lx] = %"PRIpte" %016lx\n",
l1_table_offset(addr), l1e_get_intpte(l1e), pfn);
}
++++++ 23577-tools_merge_several_bitop_functions_into_xc_bitops.h.patch ++++++
++++ 1023 lines (skipped)
++++++ 23578-xenpaging_add_xs_handle_to_struct_xenpaging.patch ++++++
changeset: 23578:7299a9a44b35
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Jun 22 14:47:09 2011 +0100
files: tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: add xs_handle to struct xenpaging
A xs_handle is currently used in the xc_mem_paging_flush_ioemu_cache()
function and will be used by a subsequent patch.
Add it to struct xenpaging.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
---
tools/xenpaging/xenpaging.c | 14 ++++++++++++++
tools/xenpaging/xenpaging.h | 1 +
2 files changed, 15 insertions(+)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -28,6 +28,7 @@
#include <signal.h>
#include <unistd.h>
#include <xc_private.h>
+#include <xs.h>
#include <xen/mem_event.h>
@@ -92,6 +93,14 @@ static xenpaging_t *xenpaging_init(domid
paging = malloc(sizeof(xenpaging_t));
memset(paging, 0, sizeof(xenpaging_t));
+ /* Open connection to xenstore */
+ paging->xs_handle = xs_open(0);
+ if ( paging->xs_handle == NULL )
+ {
+ ERROR("Error initialising xenstore connection");
+ goto err;
+ }
+
p = getenv("XENPAGING_POLICY_MRU_SIZE");
if ( p && *p )
{
@@ -221,6 +230,8 @@ static xenpaging_t *xenpaging_init(domid
err:
if ( paging )
{
+ if ( paging->xs_handle )
+ xs_close(paging->xs_handle);
xc_interface_close(xch);
if ( paging->mem_event.shared_page )
{
@@ -277,6 +288,9 @@ static int xenpaging_teardown(xenpaging_
}
paging->mem_event.xce_handle = NULL;
+ /* Close connection to xenstore */
+ xs_close(paging->xs_handle);
+
/* Close connection to Xen */
rc = xc_interface_close(xch);
if ( rc != 0 )
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -36,6 +36,7 @@
typedef struct xenpaging {
xc_interface *xc_handle;
+ struct xs_handle *xs_handle;
xc_platform_info_t *platform_info;
xc_domaininfo_t *domain_info;
++++++ 23579-xenpaging_drop_xc.c_remove_ASSERT.patch ++++++
changeset: 23579:868c8c898f73
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:06 2011 +0200
files: tools/xenpaging/policy_default.c tools/xenpaging/xc.h
description:
xenpaging: drop xc.c, remove ASSERT
The ASSERT is not needed, victim is never NULL.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy_default.c | 1 -
tools/xenpaging/xc.h | 7 -------
2 files changed, 8 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -78,7 +78,6 @@ int policy_choose_victim(xenpaging_t *pa
{
xc_interface *xch = paging->xc_handle;
unsigned long wrap = current_gfn;
- ASSERT(victim != NULL);
do
{
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ xen-4.1.2-testing/tools/xenpaging/xc.h
@@ -30,13 +30,6 @@
#include <xen/mem_event.h>
-#if 1
-#define ASSERT(_p) \
- if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
- __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
++++++ 23580-xenpaging_drop_xc.c_remove_xc_platform_info_t.patch ++++++
changeset: 23580:771b6984aa2a
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:07 2011 +0200
files: tools/xenpaging/xc.c tools/xenpaging/xc.h
tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: drop xc.c, remove xc_platform_info_t
xc_platform_info_t is not used in xenpaging.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xc.c | 10 ----------
tools/xenpaging/xc.h | 8 --------
tools/xenpaging/xenpaging.c | 17 -----------------
tools/xenpaging/xenpaging.h | 1 -
4 files changed, 36 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.c
+++ xen-4.1.2-testing/tools/xenpaging/xc.c
@@ -26,7 +26,6 @@
#include <stdarg.h>
#include <sys/poll.h>
#include <xc_private.h>
-#include <xg_save_restore.h>
#include <xs.h>
#include "xc.h"
@@ -97,15 +96,6 @@ int xc_wait_for_event(xc_interface *xch,
return xc_wait_for_event_or_timeout(xch, xce, -1);
}
-int xc_get_platform_info(xc_interface *xc_handle, domid_t domain_id,
- xc_platform_info_t *platform_info)
-{
- return get_platform_info(xc_handle, domain_id,
- &platform_info->max_mfn,
- &platform_info->hvirt_start,
- &platform_info->pt_levels,
- &platform_info->guest_width);
-}
/*
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ xen-4.1.2-testing/tools/xenpaging/xc.h
@@ -34,12 +34,6 @@
-typedef struct xc_platform_info {
- unsigned long max_mfn;
- unsigned long hvirt_start;
- unsigned int pt_levels;
- unsigned int guest_width;
-} xc_platform_info_t;
@@ -47,8 +41,6 @@ int xc_mem_paging_flush_ioemu_cache(domi
int xc_wait_for_event(xc_interface *xch, xc_evtchn *xce);
int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms);
-int xc_get_platform_info(xc_interface *xc_handle, domid_t domain_id,
- xc_platform_info_t *platform_info);
#endif // __XC_H__
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -176,22 +176,6 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.port = rc;
- /* Get platform info */
- paging->platform_info = malloc(sizeof(xc_platform_info_t));
- if ( paging->platform_info == NULL )
- {
- ERROR("Error allocating memory for platform info");
- goto err;
- }
-
- rc = xc_get_platform_info(xch, paging->mem_event.domain_id,
- paging->platform_info);
- if ( rc != 1 )
- {
- ERROR("Error getting platform info");
- goto err;
- }
-
/* Get domaininfo */
paging->domain_info = malloc(sizeof(xc_domaininfo_t));
if ( paging->domain_info == NULL )
@@ -246,7 +230,6 @@ static xenpaging_t *xenpaging_init(domid
}
free(paging->bitmap);
- free(paging->platform_info);
free(paging->domain_info);
free(paging);
}
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -38,7 +38,6 @@ typedef struct xenpaging {
xc_interface *xc_handle;
struct xs_handle *xs_handle;
- xc_platform_info_t *platform_info;
xc_domaininfo_t *domain_info;
unsigned long *bitmap;
++++++ 23581-xenpaging_drop_xc.c_remove_xc_wait_for_event.patch ++++++
changeset: 23581:9ce56626a5ab
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:08 2011 +0200
files: tools/xenpaging/xc.c tools/xenpaging/xc.h
description:
xenpaging: drop xc.c, remove xc_wait_for_event
xc_wait_for_event is not used in xenpaging.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xc.c | 4 ----
tools/xenpaging/xc.h | 1 -
2 files changed, 5 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.c
+++ xen-4.1.2-testing/tools/xenpaging/xc.c
@@ -91,10 +91,6 @@ int xc_wait_for_event_or_timeout(xc_inte
return -errno;
}
-int xc_wait_for_event(xc_interface *xch, xc_evtchn *xce)
-{
- return xc_wait_for_event_or_timeout(xch, xce, -1);
-}
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ xen-4.1.2-testing/tools/xenpaging/xc.h
@@ -38,7 +38,6 @@
int xc_mem_paging_flush_ioemu_cache(domid_t domain_id);
-int xc_wait_for_event(xc_interface *xch, xc_evtchn *xce);
int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms);
++++++ 23582-xenpaging_drop_xc.c_move_xc_mem_paging_flush_ioemu_cache.patch
++++++
changeset: 23582:480e548fe76b
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:10 2011 +0200
files: tools/xenpaging/xc.c tools/xenpaging/xc.h
tools/xenpaging/xenpaging.c
description:
xenpaging: drop xc.c, move xc_mem_paging_flush_ioemu_cache
Move xc_mem_paging_flush_ioemu_cache() into xenpaging and massage it a bit to
use the required members from xenpaging_t.
Also update type of rc to match xs_write() return value.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xc.c | 18 ------------------
tools/xenpaging/xc.h | 1 -
tools/xenpaging/xenpaging.c | 16 +++++++++++++++-
3 files changed, 15 insertions(+), 20 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.c
+++ xen-4.1.2-testing/tools/xenpaging/xc.c
@@ -31,24 +31,6 @@
-int xc_mem_paging_flush_ioemu_cache(domid_t domain_id)
-{
- struct xs_handle *xsh = NULL;
- char path[80];
- int rc;
-
- sprintf(path, "/local/domain/0/device-model/%u/command", domain_id);
-
- xsh = xs_daemon_open();
- if ( xsh == NULL )
- return -EIO;
-
- rc = xs_write(xsh, XBT_NULL, path, "flush-cache", strlen("flush-cache"));
-
- xs_daemon_close(xsh);
-
- return rc ? 0 : -1;
-}
int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms)
{
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ xen-4.1.2-testing/tools/xenpaging/xc.h
@@ -37,7 +37,6 @@
-int xc_mem_paging_flush_ioemu_cache(domid_t domain_id);
int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms);
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -48,6 +48,20 @@ static void close_handler(int sig)
unlink(filename);
}
+static int xenpaging_mem_paging_flush_ioemu_cache(xenpaging_t *paging)
+{
+ struct xs_handle *xsh = paging->xs_handle;
+ domid_t domain_id = paging->mem_event.domain_id;
+ char path[80];
+ bool rc;
+
+ sprintf(path, "/local/domain/0/device-model/%u/command", domain_id);
+
+ rc = xs_write(xsh, XBT_NULL, path, "flush-cache", strlen("flush-cache"));
+
+ return rc == true ? 0 : -1;
+}
+
static void *init_page(void)
{
void *buffer;
@@ -484,7 +498,7 @@ static int evict_victim(xenpaging_t *pag
else
{
if ( j++ % 1000 == 0 )
- if (
xc_mem_paging_flush_ioemu_cache(paging->mem_event.domain_id) )
+ if ( xenpaging_mem_paging_flush_ioemu_cache(paging) )
ERROR("Error flushing ioemu cache");
}
}
++++++ 23583-xenpaging_drop_xc.c_move_xc_wait_for_event_or_timeout.patch ++++++
changeset: 23583:235d8fdcb3a9
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:11 2011 +0200
files: tools/xenpaging/xc.c tools/xenpaging/xc.h
tools/xenpaging/xenpaging.c
description:
xenpaging: drop xc.c, move xc_wait_for_event_or_timeout
Move xc_wait_for_event_or_timeout() into xenpaging and massage it a bit for
further changes in subsequent patches.
Include poll.h instead of sys/poll.h.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xc.c | 40 ------------------------------------
tools/xenpaging/xc.h | 1
tools/xenpaging/xenpaging.c | 48 +++++++++++++++++++++++++++++++++++++++++---
3 files changed, 45 insertions(+), 44 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.c
+++ xen-4.1.2-testing/tools/xenpaging/xc.c
@@ -32,46 +32,6 @@
-int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms)
-{
- struct pollfd fd = { .fd = xc_evtchn_fd(xce), .events = POLLIN | POLLERR };
- int port;
- int rc;
-
- rc = poll(&fd, 1, ms);
- if ( rc == -1 )
- {
- if (errno == EINTR)
- return 0;
-
- ERROR("Poll exited with an error");
- goto err;
- }
-
- if ( rc == 1 )
- {
- port = xc_evtchn_pending(xce);
- if ( port == -1 )
- {
- ERROR("Failed to read port from event channel");
- goto err;
- }
-
- rc = xc_evtchn_unmask(xce, port);
- if ( rc != 0 )
- {
- ERROR("Failed to unmask event channel port");
- goto err;
- }
- }
- else
- port = -1;
-
- return port;
-
- err:
- return -errno;
-}
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ xen-4.1.2-testing/tools/xenpaging/xc.h
@@ -37,7 +37,6 @@
-int xc_wait_for_event_or_timeout(xc_interface *xch, xc_evtchn *xce, unsigned
long ms);
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -27,6 +27,7 @@
#include <time.h>
#include <signal.h>
#include <unistd.h>
+#include <poll.h>
#include <xc_private.h>
#include <xs.h>
@@ -62,6 +63,47 @@ static int xenpaging_mem_paging_flush_io
return rc == true ? 0 : -1;
}
+static int xenpaging_wait_for_event_or_timeout(xenpaging_t *paging)
+{
+ xc_interface *xch = paging->xc_handle;
+ xc_evtchn *xce = paging->mem_event.xce_handle;
+ struct pollfd fd[1];
+ int port;
+ int rc;
+
+ fd[0].fd = xc_evtchn_fd(xce);
+ fd[0].events = POLLIN | POLLERR;
+ rc = poll(fd, 1, 100);
+ if ( rc < 0 )
+ {
+ if (errno == EINTR)
+ return 0;
+
+ ERROR("Poll exited with an error");
+ return -errno;
+ }
+
+ if ( rc && fd[0].revents & POLLIN )
+ {
+ DPRINTF("Got event from evtchn\n");
+ port = xc_evtchn_pending(xce);
+ if ( port == -1 )
+ {
+ ERROR("Failed to read port from event channel");
+ rc = -1;
+ goto err;
+ }
+
+ rc = xc_evtchn_unmask(xce, port);
+ if ( rc < 0 )
+ {
+ ERROR("Failed to unmask event channel port");
+ }
+ }
+err:
+ return rc;
+}
+
static void *init_page(void)
{
void *buffer;
@@ -598,13 +640,13 @@ int main(int argc, char *argv[])
while ( !interrupted )
{
/* Wait for Xen to signal that a page needs paged in */
- rc = xc_wait_for_event_or_timeout(xch, paging->mem_event.xce_handle,
100);
- if ( rc < -1 )
+ rc = xenpaging_wait_for_event_or_timeout(paging);
+ if ( rc < 0 )
{
ERROR("Error getting event");
goto out;
}
- else if ( rc != -1 )
+ else if ( rc != 0 )
{
DPRINTF("Got event from Xen\n");
}
++++++ 23584-xenpaging_drop_xc.c_remove_xc_files.patch ++++++
changeset: 23584:e30cff57b146
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:12 2011 +0200
files: tools/xenpaging/Makefile tools/xenpaging/mem_event.h
tools/xenpaging/xc.c tools/xenpaging/xc.h tools/xenpaging/xenpaging.c
tools/xenpaging/xenpaging.h
description:
xenpaging: drop xc.c, remove xc files
Finally remove xc.c/xc.h and its references since both are empty now.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/Makefile | 2 -
tools/xenpaging/mem_event.h | 1
tools/xenpaging/xc.c | 47 --------------------------------------
tools/xenpaging/xc.h | 54 --------------------------------------------
tools/xenpaging/xenpaging.c | 1
tools/xenpaging/xenpaging.h | 1
6 files changed, 1 insertion(+), 105 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/Makefile
+++ xen-4.1.2-testing/tools/xenpaging/Makefile
@@ -9,7 +9,7 @@ LDLIBS += $(LDLIBS_libxenctrl) $(LDLIBS
POLICY = default
SRC :=
-SRCS += file_ops.c xc.c xenpaging.c policy_$(POLICY).c
+SRCS += file_ops.c xenpaging.c policy_$(POLICY).c
CFLAGS += -Werror
CFLAGS += -Wno-unused
Index: xen-4.1.2-testing/tools/xenpaging/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/mem_event.h
+++ xen-4.1.2-testing/tools/xenpaging/mem_event.h
@@ -25,7 +25,6 @@
#define __XEN_MEM_EVENT_H__
-#include "xc.h"
#include <xc_private.h>
#include <xen/event_channel.h>
Index: xen-4.1.2-testing/tools/xenpaging/xc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/******************************************************************************
- * tools/xenpaging/lib/xc.c
- *
- * libxc-type add-ons for paging support.
- *
- * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#include <errno.h>
-#include <string.h>
-#include <stdarg.h>
-#include <sys/poll.h>
-#include <xc_private.h>
-#include <xs.h>
-#include "xc.h"
-
-
-
-
-
-
-
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
Index: xen-4.1.2-testing/tools/xenpaging/xc.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xc.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/******************************************************************************
- * tools/xenpaging/lib/xc.h
- *
- * libxc add-ons.
- *
- * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#ifndef __XC_H__
-#define __XC_H__
-
-
-#include <stdarg.h>
-#include <xc_private.h>
-#include <xen/mem_event.h>
-
-
-
-
-
-
-
-
-
-
-
-
-#endif // __XC_H__
-
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -35,7 +35,6 @@
#include "xc_bitops.h"
#include "file_ops.h"
-#include "xc.h"
#include "policy.h"
#include "xenpaging.h"
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -25,7 +25,6 @@
#define __XEN_PAGING2_H__
-#include "xc.h"
#include <xc_private.h>
#include <xen/event_channel.h>
++++++
23585-xenpaging_correct_dropping_of_pages_to_avoid_full_ring_buffer.patch ++++++
changeset: 23585:b4d18ac00a46
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:14 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: correct dropping of pages to avoid full ring buffer
Doing a one-way channel from Xen to xenpaging is not possible with the
current ring buffer implementation. xenpaging uses the mem_event ring
buffer, which expects request/response pairs to make progress. The
previous patch, which tried to establish a one-way communication from
Xen to xenpaging, stalled the guest once the buffer was filled up with
requests. Correct page-dropping by taking the slow path and let
p2m_mem_paging_resume() consume the response from xenpaging. This makes
room for yet another request/response pair and avoids hanging guests.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -690,19 +690,19 @@ int main(int argc, char *argv[])
ERROR("Error populating page");
goto out;
}
+ }
- /* Prepare the response */
- rsp.gfn = req.gfn;
- rsp.p2mt = req.p2mt;
- rsp.vcpu_id = req.vcpu_id;
- rsp.flags = req.flags;
+ /* Prepare the response */
+ rsp.gfn = req.gfn;
+ rsp.p2mt = req.p2mt;
+ rsp.vcpu_id = req.vcpu_id;
+ rsp.flags = req.flags;
- rc = xenpaging_resume_page(paging, &rsp, 1);
- if ( rc != 0 )
- {
- ERROR("Error resuming page");
- goto out;
- }
+ rc = xenpaging_resume_page(paging, &rsp, 1);
+ if ( rc != 0 )
+ {
+ ERROR("Error resuming page");
+ goto out;
}
/* Evict a new page to replace the one we just paged in */
++++++ 23586-xenpaging_do_not_bounce_p2mt_back_to_the_hypervisor.patch ++++++
changeset: 23586:bbdd7413a50a
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Jun 22 14:47:13 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: do not bounce p2mt back to the hypervisor
do not bounce p2mt back to the hypervisor because p2m_mem_paging_populate()
and p2m_mem_paging_resume() dont make use of p2mt.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 4 ----
1 file changed, 4 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -694,7 +694,6 @@ int main(int argc, char *argv[])
/* Prepare the response */
rsp.gfn = req.gfn;
- rsp.p2mt = req.p2mt;
rsp.vcpu_id = req.vcpu_id;
rsp.flags = req.flags;
@@ -711,10 +710,8 @@ int main(int argc, char *argv[])
else
{
DPRINTF("page already populated (domain = %d; vcpu = %d;"
- " p2mt = %x;"
" gfn = %"PRIx64"; paused = %d)\n",
paging->mem_event.domain_id, req.vcpu_id,
- req.p2mt,
req.gfn, req.flags & MEM_EVENT_FLAG_VCPU_PAUSED);
/* Tell Xen to resume the vcpu */
@@ -723,7 +720,6 @@ int main(int argc, char *argv[])
{
/* Prepare the response */
rsp.gfn = req.gfn;
- rsp.p2mt = req.p2mt;
rsp.vcpu_id = req.vcpu_id;
rsp.flags = req.flags;
++++++ 23587-xenpaging_remove_srand_call.patch ++++++
changeset: 23587:926febc8bd98
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:16 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: remove srand call
The policy uses now a linear algorithm instead of a random one.
Remove the call to srand().
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 3 ---
1 file changed, 3 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -581,9 +581,6 @@ int main(int argc, char *argv[])
domain_id = atoi(argv[1]);
num_pages = atoi(argv[2]);
- /* Seed random-number generator */
- srand(time(NULL));
-
/* Initialise domain paging */
paging = xenpaging_init(domain_id);
if ( paging == NULL )
++++++
23588-xenpaging_remove_return_values_from_functions_that_can_not_fail.patch
++++++
changeset: 23588:e48535e70145
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:18 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: remove return values from functions that can not fail
get_request() and put_response() can not fail, remove return value
and update calling functions.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 19 ++++---------------
1 file changed, 4 insertions(+), 15 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -342,7 +342,7 @@ static int xenpaging_teardown(xenpaging_
return -1;
}
-static int get_request(mem_event_t *mem_event, mem_event_request_t *req)
+static void get_request(mem_event_t *mem_event, mem_event_request_t *req)
{
mem_event_back_ring_t *back_ring;
RING_IDX req_cons;
@@ -357,11 +357,9 @@ static int get_request(mem_event_t *mem_
/* Update ring */
back_ring->req_cons = req_cons;
back_ring->sring->req_event = req_cons + 1;
-
- return 0;
}
-static int put_response(mem_event_t *mem_event, mem_event_response_t *rsp)
+static void put_response(mem_event_t *mem_event, mem_event_response_t *rsp)
{
mem_event_back_ring_t *back_ring;
RING_IDX rsp_prod;
@@ -376,8 +374,6 @@ static int put_response(mem_event_t *mem
/* Update ring */
back_ring->rsp_prod_pvt = rsp_prod;
RING_PUSH_RESPONSES(back_ring);
-
- return 0;
}
static int xenpaging_evict_page(xenpaging_t *paging,
@@ -437,9 +433,7 @@ static int xenpaging_resume_page(xenpagi
int ret;
/* Put the page info on the ring */
- ret = put_response(&paging->mem_event, rsp);
- if ( ret != 0 )
- goto out;
+ put_response(&paging->mem_event, rsp);
/* Notify policy of page being paged in */
if ( notify_policy )
@@ -649,12 +643,7 @@ int main(int argc, char *argv[])
while ( RING_HAS_UNCONSUMED_REQUESTS(&paging->mem_event.back_ring) )
{
- rc = get_request(&paging->mem_event, &req);
- if ( rc != 0 )
- {
- ERROR("Error getting request");
- goto out;
- }
+ get_request(&paging->mem_event, &req);
/* Check if the page has already been paged in */
if ( test_and_clear_bit(req.gfn, paging->bitmap) )
++++++ 23589-xenpaging_catch_xc_mem_paging_resume_errors.patch ++++++
changeset: 23589:49cb290ede16
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:19 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: catch xc_mem_paging_resume errors
In the unlikely event that xc_mem_paging_resume() fails, do not overwrite the
error with the return value from xc_evtchn_notify()
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -442,8 +442,9 @@ static int xenpaging_resume_page(xenpagi
/* Tell Xen page is ready */
ret = xc_mem_paging_resume(paging->xc_handle, paging->mem_event.domain_id,
rsp->gfn);
- ret = xc_evtchn_notify(paging->mem_event.xce_handle,
- paging->mem_event.port);
+ if ( ret == 0 )
+ ret = xc_evtchn_notify(paging->mem_event.xce_handle,
+ paging->mem_event.port);
out:
return ret;
++++++ 23590-xenpaging_remove_local_domain_id_variable.patch ++++++
changeset: 23590:d957acb8bee6
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:20 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: remove local domain_id variable
Remove the local domain_id variable, it is already fetched from
paging->mem_event in other places.
Update the sprintf format string to use unsigned argument.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -552,7 +552,6 @@ static int evict_victim(xenpaging_t *pag
int main(int argc, char *argv[])
{
struct sigaction act;
- domid_t domain_id;
int num_pages;
xenpaging_t *paging;
xenpaging_victim_t *victims;
@@ -573,11 +572,10 @@ int main(int argc, char *argv[])
return -1;
}
- domain_id = atoi(argv[1]);
num_pages = atoi(argv[2]);
/* Initialise domain paging */
- paging = xenpaging_init(domain_id);
+ paging = xenpaging_init(atoi(argv[1]));
if ( paging == NULL )
{
fprintf(stderr, "Error initialising paging");
@@ -585,10 +583,10 @@ int main(int argc, char *argv[])
}
xch = paging->xc_handle;
- DPRINTF("starting %s %u %d\n", argv[0], domain_id, num_pages);
+ DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
num_pages);
/* Open file */
- sprintf(filename, "page_cache_%d", domain_id);
+ sprintf(filename, "page_cache_%u", paging->mem_event.domain_id);
fd = open(filename, open_flags, open_mode);
if ( fd < 0 )
{
++++++ 23591-xenpaging_move_num_pages_into_xenpaging_struct.patch ++++++
changeset: 23591:4aaa90c1db42
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:22 2011 +0200
files: tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: move num_pages into xenpaging struct
Move num_pages into struct xenpaging.
num_pages will be used by the policy in a subsequent patch.
Also remove a memset, the victims array is allocated with calloc.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 30 ++++++++++++++----------------
tools/xenpaging/xenpaging.h | 1 +
2 files changed, 15 insertions(+), 16 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -128,7 +128,7 @@ static void *init_page(void)
return NULL;
}
-static xenpaging_t *xenpaging_init(domid_t domain_id)
+static xenpaging_t *xenpaging_init(domid_t domain_id, int num_pages)
{
xenpaging_t *paging;
xc_interface *xch;
@@ -256,6 +256,13 @@ static xenpaging_t *xenpaging_init(domid
}
DPRINTF("max_pages = %"PRIx64"\n", paging->domain_info->max_pages);
+ if ( num_pages < 0 || num_pages > paging->domain_info->max_pages )
+ {
+ num_pages = paging->domain_info->max_pages;
+ DPRINTF("setting num_pages to %d\n", num_pages);
+ }
+ paging->num_pages = num_pages;
+
/* Initialise policy */
rc = policy_init(paging);
if ( rc != 0 )
@@ -552,7 +559,6 @@ static int evict_victim(xenpaging_t *pag
int main(int argc, char *argv[])
{
struct sigaction act;
- int num_pages;
xenpaging_t *paging;
xenpaging_victim_t *victims;
mem_event_request_t req;
@@ -572,10 +578,8 @@ int main(int argc, char *argv[])
return -1;
}
- num_pages = atoi(argv[2]);
-
/* Initialise domain paging */
- paging = xenpaging_init(atoi(argv[1]));
+ paging = xenpaging_init(atoi(argv[1]), atoi(argv[2]));
if ( paging == NULL )
{
fprintf(stderr, "Error initialising paging");
@@ -583,7 +587,7 @@ int main(int argc, char *argv[])
}
xch = paging->xc_handle;
- DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
num_pages);
+ DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
paging->num_pages);
/* Open file */
sprintf(filename, "page_cache_%u", paging->mem_event.domain_id);
@@ -594,12 +598,7 @@ int main(int argc, char *argv[])
return 2;
}
- if ( num_pages < 0 || num_pages > paging->domain_info->max_pages )
- {
- num_pages = paging->domain_info->max_pages;
- DPRINTF("setting num_pages to %d\n", num_pages);
- }
- victims = calloc(num_pages, sizeof(xenpaging_victim_t));
+ victims = calloc(paging->num_pages, sizeof(xenpaging_victim_t));
/* ensure that if we get a signal, we'll do cleanup, then exit */
act.sa_handler = close_handler;
@@ -611,8 +610,7 @@ int main(int argc, char *argv[])
sigaction(SIGALRM, &act, NULL);
/* Evict pages */
- memset(victims, 0, sizeof(xenpaging_victim_t) * num_pages);
- for ( i = 0; i < num_pages; i++ )
+ for ( i = 0; i < paging->num_pages; i++ )
{
rc = evict_victim(paging, &victims[i], fd, i);
if ( rc == -ENOSPC )
@@ -648,13 +646,13 @@ int main(int argc, char *argv[])
if ( test_and_clear_bit(req.gfn, paging->bitmap) )
{
/* Find where in the paging file to read from */
- for ( i = 0; i < num_pages; i++ )
+ for ( i = 0; i < paging->num_pages; i++ )
{
if ( victims[i].gfn == req.gfn )
break;
}
- if ( i >= num_pages )
+ if ( i >= paging->num_pages )
{
DPRINTF("Couldn't find page %"PRIx64"\n", req.gfn);
goto out;
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -42,6 +42,7 @@ typedef struct xenpaging {
unsigned long *bitmap;
mem_event_t mem_event;
+ int num_pages;
int policy_mru_size;
} xenpaging_t;
++++++ 23592-xenpaging_start_paging_in_the_middle_of_gfn_range.patch ++++++
changeset: 23592:1e44e75d889c
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:23 2011 +0200
files: tools/xenpaging/policy_default.c
description:
xenpaging: start paging in the middle of gfn range
Set the starting gfn to somewhere in the middle of the gfn range to
avoid paging during BIOS startup. This can speedup booting of a guest.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy_default.c | 4 ++++
1 file changed, 4 insertions(+)
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -69,6 +69,10 @@ int policy_init(xenpaging_t *paging)
/* Don't page out page 0 */
set_bit(0, bitmap);
+ /* Start in the middle to avoid paging during BIOS startup */
+ current_gfn = max_pages / 2;
+ current_gfn -= paging->num_pages / 2;
+
rc = 0;
out:
return rc;
++++++ 23593-xenpaging_pass_integer_to_xenpaging_populate_page.patch ++++++
changeset: 23593:7d72475641fa
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:24 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: pass integer to xenpaging_populate_page
Pass gfn as integer to xenpaging_populate_page(). xc_map_foreign_pages()
takes a pointer to a list of gfns, but its a const pointer. So writing
the value back to the caller is not needed.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -458,27 +458,24 @@ static int xenpaging_resume_page(xenpagi
}
static int xenpaging_populate_page(xenpaging_t *paging,
- uint64_t *gfn, int fd, int i)
+ xen_pfn_t gfn, int fd, int i)
{
xc_interface *xch = paging->xc_handle;
- unsigned long _gfn;
void *page;
int ret;
unsigned char oom = 0;
- _gfn = *gfn;
- DPRINTF("populate_page < gfn %lx pageslot %d\n", _gfn, i);
+ DPRINTF("populate_page < gfn %"PRI_xen_pfn" pageslot %d\n", gfn, i);
do
{
/* Tell Xen to allocate a page for the domain */
- ret = xc_mem_paging_prep(xch, paging->mem_event.domain_id,
- _gfn);
+ ret = xc_mem_paging_prep(xch, paging->mem_event.domain_id, gfn);
if ( ret != 0 )
{
if ( errno == ENOMEM )
{
if ( oom++ == 0 )
- DPRINTF("ENOMEM while preparing gfn %lx\n", _gfn);
+ DPRINTF("ENOMEM while preparing gfn %"PRI_xen_pfn"\n",
gfn);
sleep(1);
continue;
}
@@ -491,8 +488,7 @@ static int xenpaging_populate_page(xenpa
/* Map page */
ret = -EFAULT;
page = xc_map_foreign_pages(xch, paging->mem_event.domain_id,
- PROT_READ | PROT_WRITE, &_gfn, 1);
- *gfn = _gfn;
+ PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
ERROR("Error mapping page: page is null");
@@ -667,7 +663,7 @@ int main(int argc, char *argv[])
else
{
/* Populate the page */
- rc = xenpaging_populate_page(paging, &req.gfn, fd, i);
+ rc = xenpaging_populate_page(paging, req.gfn, fd, i);
if ( rc != 0 )
{
ERROR("Error populating page");
++++++ 23594-xenpaging_add_helper_function_for_unlinking_pagefile.patch ++++++
changeset: 23594:2fe46305a00d
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:25 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: add helper function for unlinking pagefile
Unlink pagefile in the signal handler and also in the exit path.
This does not leave a stale pagefile if an error occoured.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -41,11 +41,20 @@
static char filename[80];
static int interrupted;
-static void close_handler(int sig)
+
+static void unlink_pagefile(void)
{
- interrupted = sig;
if ( filename[0] )
+ {
unlink(filename);
+ filename[0] = '\0';
+ }
+}
+
+static void close_handler(int sig)
+{
+ interrupted = sig;
+ unlink_pagefile();
}
static int xenpaging_mem_paging_flush_ioemu_cache(xenpaging_t *paging)
@@ -716,6 +725,7 @@ int main(int argc, char *argv[])
out:
close(fd);
+ unlink_pagefile();
free(victims);
/* Tear down domain paging */
++++++ 23595-xenpaging_add_watch_thread_to_catch_guest_shutdown.patch ++++++
changeset: 23595:389c8bf31688
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:27 2011 +0200
files: tools/xenpaging/xenpaging.c
description:
xenpaging: add watch thread to catch guest shutdown
If xenpaging is started manually then no event is sent to xenpaging when
the guest is shutdown or rebooted. Add a watch on the @releaseDomain
node to leave the loop and gracefully shutdown the pager.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 40 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 38 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -39,6 +39,7 @@
#include "policy.h"
#include "xenpaging.h"
+static char watch_token[16];
static char filename[80];
static int interrupted;
@@ -75,13 +76,19 @@ static int xenpaging_wait_for_event_or_t
{
xc_interface *xch = paging->xc_handle;
xc_evtchn *xce = paging->mem_event.xce_handle;
- struct pollfd fd[1];
+ char **vec;
+ unsigned int num;
+ struct pollfd fd[2];
int port;
int rc;
+ /* Wait for event channel and xenstore */
fd[0].fd = xc_evtchn_fd(xce);
fd[0].events = POLLIN | POLLERR;
- rc = poll(fd, 1, 100);
+ fd[1].fd = xs_fileno(paging->xs_handle);
+ fd[1].events = POLLIN | POLLERR;
+
+ rc = poll(fd, 2, 100);
if ( rc < 0 )
{
if (errno == EINTR)
@@ -91,6 +98,27 @@ static int xenpaging_wait_for_event_or_t
return -errno;
}
+ /* First check for guest shutdown */
+ if ( rc && fd[1].revents & POLLIN )
+ {
+ DPRINTF("Got event from xenstore\n");
+ vec = xs_read_watch(paging->xs_handle, &num);
+ if ( vec )
+ {
+ if ( strcmp(vec[XS_WATCH_TOKEN], watch_token) == 0 )
+ {
+ /* If our guest disappeared, set interrupt flag and fall
through */
+ if ( xs_is_domain_introduced(paging->xs_handle,
paging->mem_event.domain_id) == false )
+ {
+ xs_unwatch(paging->xs_handle, "@releaseDomain",
watch_token);
+ interrupted = SIGQUIT;
+ rc = 0;
+ }
+ }
+ free(vec);
+ }
+ }
+
if ( rc && fd[0].revents & POLLIN )
{
DPRINTF("Got event from evtchn\n");
@@ -165,6 +193,14 @@ static xenpaging_t *xenpaging_init(domid
goto err;
}
+ /* write domain ID to watch so we can ignore other domain shutdowns */
+ snprintf(watch_token, sizeof(watch_token), "%u", domain_id);
+ if ( xs_watch(paging->xs_handle, "@releaseDomain", watch_token) == false )
+ {
+ ERROR("Could not bind to shutdown watch\n");
+ goto err;
+ }
+
p = getenv("XENPAGING_POLICY_MRU_SIZE");
if ( p && *p )
{
++++++
23596-xenpaging_implement_stopping_of_pager_by_sending_SIGTERM-SIGINT.patch
++++++
changeset: 23596:c49e22648d0e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:28 2011 +0200
files: tools/xenpaging/Makefile tools/xenpaging/pagein.c
tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: implement stopping of pager by sending SIGTERM/SIGINT
Write all paged-out pages back into the guest if the pager is
interrupted by ctrl-c or if it receives SIGTERM.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/Makefile | 1
tools/xenpaging/pagein.c | 68 ++++++++++++++++++++++++++++++++++++++++++++
tools/xenpaging/xenpaging.c | 35 ++++++++++++++++++++--
tools/xenpaging/xenpaging.h | 3 +
4 files changed, 104 insertions(+), 3 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/Makefile
+++ xen-4.1.2-testing/tools/xenpaging/Makefile
@@ -10,6 +10,7 @@ POLICY = default
SRC :=
SRCS += file_ops.c xenpaging.c policy_$(POLICY).c
+SRCS += pagein.c
CFLAGS += -Werror
CFLAGS += -Wno-unused
Index: xen-4.1.2-testing/tools/xenpaging/pagein.c
===================================================================
--- /dev/null
+++ xen-4.1.2-testing/tools/xenpaging/pagein.c
@@ -0,0 +1,68 @@
+/* Trigger a page-in in a separate thread-of-execution to avoid deadlock */
+#include <pthread.h>
+#include "xc_private.h"
+
+struct page_in_args {
+ domid_t dom;
+ xc_interface *xch;
+};
+
+static struct page_in_args page_in_args;
+static unsigned long page_in_gfn;
+static unsigned int page_in_possible;
+
+static pthread_t page_in_thread;
+static pthread_cond_t page_in_cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t page_in_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void *page_in(void *arg)
+{
+ struct page_in_args *pia = arg;
+ void *page;
+ xen_pfn_t gfn;
+
+ while (1)
+ {
+ pthread_mutex_lock(&page_in_mutex);
+ while (!page_in_gfn)
+ pthread_cond_wait(&page_in_cond, &page_in_mutex);
+ gfn = page_in_gfn;
+ page_in_gfn = 0;
+ pthread_mutex_unlock(&page_in_mutex);
+
+ /* Ignore errors */
+ page = xc_map_foreign_pages(pia->xch, pia->dom, PROT_READ, &gfn, 1);
+ if (page)
+ munmap(page, PAGE_SIZE);
+ }
+ page_in_possible = 0;
+ pthread_exit(NULL);
+}
+
+void page_in_trigger(unsigned long gfn)
+{
+ if (!page_in_possible)
+ return;
+
+ pthread_mutex_lock(&page_in_mutex);
+ page_in_gfn = gfn;
+ pthread_mutex_unlock(&page_in_mutex);
+ pthread_cond_signal(&page_in_cond);
+}
+
+void create_page_in_thread(domid_t domain_id, xc_interface *xch)
+{
+ page_in_args.dom = domain_id;
+ page_in_args.xch = xch;
+ if (pthread_create(&page_in_thread, NULL, page_in, &page_in_args) == 0)
+ page_in_possible = 1;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -650,6 +650,9 @@ int main(int argc, char *argv[])
sigaction(SIGINT, &act, NULL);
sigaction(SIGALRM, &act, NULL);
+ /* listen for page-in events to stop pager */
+ create_page_in_thread(paging->mem_event.domain_id, xch);
+
/* Evict pages */
for ( i = 0; i < paging->num_pages; i++ )
{
@@ -665,7 +668,7 @@ int main(int argc, char *argv[])
DPRINTF("%d pages evicted. Done.\n", i);
/* Swap pages in and out */
- while ( !interrupted )
+ while ( 1 )
{
/* Wait for Xen to signal that a page needs paged in */
rc = xenpaging_wait_for_event_or_timeout(paging);
@@ -728,8 +731,12 @@ int main(int argc, char *argv[])
goto out;
}
- /* Evict a new page to replace the one we just paged in */
- evict_victim(paging, &victims[i], fd, i);
+ /* Evict a new page to replace the one we just paged in,
+ * or clear this pagefile slot on exit */
+ if ( interrupted )
+ victims[i].gfn = INVALID_MFN;
+ else
+ evict_victim(paging, &victims[i], fd, i);
}
else
{
@@ -756,6 +763,28 @@ int main(int argc, char *argv[])
}
}
}
+
+ /* Write all pages back into the guest */
+ if ( interrupted == SIGTERM || interrupted == SIGINT )
+ {
+ for ( i = 0; i < paging->domain_info->max_pages; i++ )
+ {
+ if ( test_bit(i, paging->bitmap) )
+ {
+ page_in_trigger(i);
+ break;
+ }
+ }
+ /* If no more pages to process, exit loop */
+ if ( i == paging->domain_info->max_pages )
+ break;
+ }
+ else
+ {
+ /* Exit on any other signal */
+ if ( interrupted )
+ break;
+ }
}
DPRINTF("xenpaging got signal %d\n", interrupted);
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -53,6 +53,9 @@ typedef struct xenpaging_victim {
} xenpaging_victim_t;
+extern void create_page_in_thread(domid_t domain_id, xc_interface *xch);
+extern void page_in_trigger(unsigned long gfn);
+
#endif // __XEN_PAGING_H__
++++++ 23597-xenpaging_remove_private_mem_event.h.patch ++++++
changeset: 23597:3dcb553f3ba9
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Jun 10 10:47:29 2011 +0200
files: tools/xenpaging/mem_event.h tools/xenpaging/xenpaging.c
tools/xenpaging/xenpaging.h
description:
xenpaging: remove private mem_event.h
tools/xenpaging/mem_event.h is only included in xenpaging.h.
Add the contents into that file and remove mem_event.h.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/mem_event.h | 57 --------------------------------------------
tools/xenpaging/xenpaging.c | 3 --
tools/xenpaging/xenpaging.h | 11 ++++++--
3 files changed, 8 insertions(+), 63 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/mem_event.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/******************************************************************************
- * tools/xenpaging/mem_event.h
- *
- * Memory event structures.
- *
- * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#ifndef __XEN_MEM_EVENT_H__
-#define __XEN_MEM_EVENT_H__
-
-
-#include <xc_private.h>
-
-#include <xen/event_channel.h>
-#include <xen/mem_event.h>
-
-
-
-
-typedef struct mem_event {
- domid_t domain_id;
- xc_evtchn *xce_handle;
- int port;
- mem_event_back_ring_t back_ring;
- mem_event_shared_page_t *shared_page;
- void *ring_page;
-} mem_event_t;
-
-
-#endif // __XEN_MEM_EVENT_H__
-
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -31,11 +31,8 @@
#include <xc_private.h>
#include <xs.h>
-#include <xen/mem_event.h>
-
#include "xc_bitops.h"
#include "file_ops.h"
-
#include "policy.h"
#include "xenpaging.h"
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -26,12 +26,17 @@
#include <xc_private.h>
-
#include <xen/event_channel.h>
#include <xen/mem_event.h>
-#include "mem_event.h"
-
+typedef struct mem_event {
+ domid_t domain_id;
+ xc_evtchn *xce_handle;
+ int port;
+ mem_event_back_ring_t back_ring;
+ mem_event_shared_page_t *shared_page;
+ void *ring_page;
+} mem_event_t;
typedef struct xenpaging {
xc_interface *xc_handle;
++++++ 23599-tools_fix_build_after_recent_xenpaging_changes.patch ++++++
changeset: 23599:d3027374a8c0
user: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
date: Mon Jun 27 14:48:57 2011 +0100
files: tools/xenpaging/Makefile
description:
tools: fix build after recent xenpaging changes
xenpaging now uses pthreads, so must link appropriately.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
tools/xenpaging/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/Makefile
+++ xen-4.1.2-testing/tools/xenpaging/Makefile
@@ -4,7 +4,7 @@ include $(XEN_ROOT)/tools/Rules.mk
CFLAGS += -I $(XEN_XC)
CFLAGS += -I ./
CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenstore)
-LDLIBS += $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore)
+LDLIBS += $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore) -pthread
POLICY = default
++++++ 23610-x86-topology-info.patch ++++++
References: fate#309894
# HG changeset patch
# User Wei Huang <wei.huang2@xxxxxxx>
# Date 1309248811 -3600
# Node ID 87c2013c2aa2d4874f892507e6cc7b52219a3acf
# Parent 819c315a919daec434f80ffb6e790ac492cbd2a7
x86: consolidate cpu_core_id and phys_proc_id into cpuinfo_x86 struct
This patch moves cpu_core_id and phys_proc_id into cpuinfo_x86
structure. This is similar to upstream Linux kernel's approach.
Signed-off-by: Wei Huang <wei.huang2@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/cpu/amd.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/amd.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/amd.c
@@ -579,11 +579,11 @@ static void __devinit init_amd(struct cp
while ((1 << bits) < c->x86_max_cores)
bits++;
}
- cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
- phys_proc_id[cpu] >>= bits;
+ c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
+ c->phys_proc_id >>= bits;
if (opt_cpu_info)
printk("CPU %d(%d) -> Core %d\n",
- cpu, c->x86_max_cores, cpu_core_id[cpu]);
+ cpu, c->x86_max_cores, c->cpu_core_id);
}
#endif
Index: xen-4.1.2-testing/xen/arch/x86/cpu/common.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/common.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/common.c
@@ -326,7 +326,7 @@ void __cpuinit generic_identify(struct c
early_intel_workaround(c);
#ifdef CONFIG_X86_HT
- phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+ c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
#endif
}
@@ -362,6 +362,8 @@ void __cpuinit identify_cpu(struct cpuin
c->x86_max_cores = 1;
c->x86_num_siblings = 1;
c->x86_clflush_size = 0;
+ c->phys_proc_id = BAD_APICID;
+ c->cpu_core_id = BAD_APICID;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
if (!have_cpuid_p()) {
@@ -510,7 +512,6 @@ void __cpuinit detect_extended_topology(
unsigned int ht_mask_width, core_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
unsigned int initial_apicid;
- int cpu = smp_processor_id();
if ( c->cpuid_level < 0xb )
return;
@@ -545,9 +546,9 @@ void __cpuinit detect_extended_topology(
core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
- cpu_core_id[cpu] = phys_pkg_id(initial_apicid, ht_mask_width)
+ c->cpu_core_id = phys_pkg_id(initial_apicid, ht_mask_width)
& core_select_mask;
- phys_proc_id[cpu] = phys_pkg_id(initial_apicid, core_plus_mask_width);
+ c->phys_proc_id = phys_pkg_id(initial_apicid, core_plus_mask_width);
c->apicid = phys_pkg_id(initial_apicid, 0);
c->x86_max_cores = (core_level_siblings / c->x86_num_siblings);
@@ -555,10 +556,10 @@ void __cpuinit detect_extended_topology(
if ( opt_cpu_info )
{
printk("CPU: Physical Processor ID: %d\n",
- phys_proc_id[cpu]);
+ c->phys_proc_id);
if ( c->x86_max_cores > 1 )
printk("CPU: Processor Core ID: %d\n",
- cpu_core_id[cpu]);
+ c->cpu_core_id);
}
}
@@ -567,7 +568,6 @@ void __cpuinit detect_ht(struct cpuinfo_
{
u32 eax, ebx, ecx, edx;
int index_msb, core_bits;
- int cpu = smp_processor_id();
cpuid(1, &eax, &ebx, &ecx, &edx);
@@ -590,11 +590,11 @@ void __cpuinit detect_ht(struct cpuinfo_
}
index_msb = get_count_order(c->x86_num_siblings);
- phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+ c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
if (opt_cpu_info)
printk("CPU: Physical Processor ID: %d\n",
- phys_proc_id[cpu]);
+ c->phys_proc_id);
c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores;
@@ -602,12 +602,12 @@ void __cpuinit detect_ht(struct cpuinfo_
core_bits = get_count_order(c->x86_max_cores);
- cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+ c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
((1 << core_bits) - 1);
if (opt_cpu_info && c->x86_max_cores > 1)
printk("CPU: Processor Core ID: %d\n",
- cpu_core_id[cpu]);
+ c->cpu_core_id);
}
}
#endif
Index: xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/mce.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/mcheck/mce.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/mce.c
@@ -1041,9 +1041,9 @@ void x86_mc_get_cpu_info(unsigned cpu, u
if (nthreads != NULL)
*nthreads = 1;
} else {
- *chipid = phys_proc_id[cpu];
+ *chipid = c->phys_proc_id;
if (c->x86_max_cores > 1)
- *coreid = cpu_core_id[cpu];
+ *coreid = c->cpu_core_id;
else
*coreid = 0;
*threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1);
Index: xen-4.1.2-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/smpboot.c
+++ xen-4.1.2-testing/xen/arch/x86/smpboot.c
@@ -51,12 +51,6 @@
/* Set if we find a B stepping CPU */
static int smp_b_stepping;
-/* Package ID of each logical CPU */
-int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-
-/* Core ID of each logical CPU */
-int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_sibling_map);
/* representing HT and core siblings of each logical CPU */
@@ -257,8 +251,8 @@ static void set_cpu_sibling_map(int cpu)
{
for_each_cpu_mask ( i, cpu_sibling_setup_map )
{
- if ( (phys_proc_id[cpu] == phys_proc_id[i]) &&
- (cpu_core_id[cpu] == cpu_core_id[i]) )
+ if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+ (c[cpu].cpu_core_id == c[i].cpu_core_id) )
{
cpu_set(i, per_cpu(cpu_sibling_map, cpu));
cpu_set(cpu, per_cpu(cpu_sibling_map, i));
@@ -281,7 +275,7 @@ static void set_cpu_sibling_map(int cpu)
for_each_cpu_mask ( i, cpu_sibling_setup_map )
{
- if ( phys_proc_id[cpu] == phys_proc_id[i] )
+ if ( c[cpu].phys_proc_id == c[i].phys_proc_id )
{
cpu_set(i, per_cpu(cpu_core_map, cpu));
cpu_set(cpu, per_cpu(cpu_core_map, i));
@@ -842,8 +836,8 @@ remove_siblinginfo(int cpu)
cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
cpus_clear(per_cpu(cpu_sibling_map, cpu));
cpus_clear(per_cpu(cpu_core_map, cpu));
- phys_proc_id[cpu] = BAD_APICID;
- cpu_core_id[cpu] = BAD_APICID;
+ c[cpu].phys_proc_id = BAD_APICID;
+ c[cpu].cpu_core_id = BAD_APICID;
cpu_clear(cpu, cpu_sibling_setup_map);
}
Index: xen-4.1.2-testing/xen/include/asm-x86/processor.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/processor.h
+++ xen-4.1.2-testing/xen/include/asm-x86/processor.h
@@ -175,6 +175,8 @@ struct cpuinfo_x86 {
__u32 x86_max_cores; /* cpuid returned max cores value */
__u32 booted_cores; /* number of cores as seen by OS */
__u32 x86_num_siblings; /* cpuid logical cpus per chip value */
+ int phys_proc_id; /* package ID of each logical CPU */
+ int cpu_core_id; /* core ID of each logical CPU*/
__u32 apicid;
unsigned short x86_clflush_size;
} __cacheline_aligned;
@@ -196,8 +198,6 @@ extern struct cpuinfo_x86 cpu_data[];
extern void set_cpuid_faulting(bool_t enable);
extern u64 host_pat;
-extern int phys_proc_id[NR_CPUS];
-extern int cpu_core_id[NR_CPUS];
extern bool_t opt_cpu_info;
/* Maximum width of physical addresses supported by the hardware */
@@ -217,8 +217,8 @@ extern void detect_ht(struct cpuinfo_x86
static always_inline void detect_ht(struct cpuinfo_x86 *c) {}
#endif
-#define cpu_to_core(_cpu) (cpu_core_id[_cpu])
-#define cpu_to_socket(_cpu) (phys_proc_id[_cpu])
+#define cpu_to_core(_cpu) (cpu_data[_cpu].cpu_core_id)
+#define cpu_to_socket(_cpu) (cpu_data[_cpu].phys_proc_id)
/*
* Generic CPUID function
++++++ 23611-amd-fam15-topology.patch ++++++
References: fate#309894
# HG changeset patch
# User Wei Huang <wei.huang2@xxxxxxx>
# Date 1309248833 -3600
# Node ID c2c12b2dafb5b1d3bfcc4c05a60ca4f9051c90e7
# Parent 87c2013c2aa2d4874f892507e6cc7b52219a3acf
x86: AMD core-pair topology detection code
This patch is to support core-pair topology introduced by AMD CPUs,
which introduces a new concept of [core, compute unit]. There is a new
feature bit for topology extension in CPUID:0x80000001. Also a new
CPUID 0x8000001E is introduced for CPU topology enumeration. This
patch collects the sibling information from the new CPUID and will be
stored in the sibling map in Xen hypervisor.
Signed-off-by: Wei Huang <wei.huang2@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/cpu/amd.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/amd.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/amd.c
@@ -344,6 +344,49 @@ static void check_syscfg_dram_mod_en(voi
wrmsrl(MSR_K8_SYSCFG, syscfg);
}
+static void __devinit amd_get_topology(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_HT
+ int cpu;
+ unsigned bits;
+
+ if (c->x86_max_cores <= 1)
+ return;
+ /*
+ * On a AMD multi core setup the lower bits of the APIC id
+ * distingush the cores.
+ */
+ cpu = smp_processor_id();
+ bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
+
+ if (bits == 0) {
+ while ((1 << bits) < c->x86_max_cores)
+ bits++;
+ }
+
+ /* Low order bits define the core id */
+ c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
+ /* Convert local APIC ID into the socket ID */
+ c->phys_proc_id >>= bits;
+ /* Collect compute unit ID if available */
+ if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+ c->compute_unit_id = ebx & 0xFF;
+ c->x86_num_siblings = ((ebx >> 8) & 0x3) + 1;
+ }
+
+ if (opt_cpu_info)
+ printk("CPU %d(%d) -> Processor %d, %s %d\n",
+ cpu, c->x86_max_cores, c->phys_proc_id,
+ cpu_has(c, X86_FEATURE_TOPOEXT) ? "Compute Unit" :
+ "Core",
+ cpu_has(c, X86_FEATURE_TOPOEXT) ? c->compute_unit_id :
+ c->cpu_core_id);
+#endif
+}
+
static void __devinit init_amd(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -566,26 +609,7 @@ static void __devinit init_amd(struct cp
}
}
-#ifdef CONFIG_X86_HT
- /*
- * On a AMD multi core setup the lower bits of the APIC id
- * distingush the cores.
- */
- if (c->x86_max_cores > 1) {
- int cpu = smp_processor_id();
- unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
-
- if (bits == 0) {
- while ((1 << bits) < c->x86_max_cores)
- bits++;
- }
- c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
- c->phys_proc_id >>= bits;
- if (opt_cpu_info)
- printk("CPU %d(%d) -> Core %d\n",
- cpu, c->x86_max_cores, c->cpu_core_id);
- }
-#endif
+ amd_get_topology(c);
/* Pointless to use MWAIT on Family10 as it does not deep sleep. */
if (c->x86 >= 0x10 && !force_mwait)
Index: xen-4.1.2-testing/xen/arch/x86/cpu/common.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/common.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/common.c
@@ -364,6 +364,7 @@ void __cpuinit identify_cpu(struct cpuin
c->x86_clflush_size = 0;
c->phys_proc_id = BAD_APICID;
c->cpu_core_id = BAD_APICID;
+ c->compute_unit_id = BAD_APICID;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
if (!have_cpuid_p()) {
Index: xen-4.1.2-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/smpboot.c
+++ xen-4.1.2-testing/xen/arch/x86/smpboot.c
@@ -240,6 +240,14 @@ static int booting_cpu;
/* CPUs for which sibling maps can be computed. */
static cpumask_t cpu_sibling_setup_map;
+static void link_thread_siblings(int cpu1, int cpu2)
+{
+ cpu_set(cpu1, per_cpu(cpu_sibling_map, cpu2));
+ cpu_set(cpu2, per_cpu(cpu_sibling_map, cpu1));
+ cpu_set(cpu1, per_cpu(cpu_core_map, cpu2));
+ cpu_set(cpu2, per_cpu(cpu_core_map, cpu1));
+}
+
static void set_cpu_sibling_map(int cpu)
{
int i;
@@ -251,13 +259,13 @@ static void set_cpu_sibling_map(int cpu)
{
for_each_cpu_mask ( i, cpu_sibling_setup_map )
{
- if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
- (c[cpu].cpu_core_id == c[i].cpu_core_id) )
- {
- cpu_set(i, per_cpu(cpu_sibling_map, cpu));
- cpu_set(cpu, per_cpu(cpu_sibling_map, i));
- cpu_set(i, per_cpu(cpu_core_map, cpu));
- cpu_set(cpu, per_cpu(cpu_core_map, i));
+ if ( cpu_has(c, X86_FEATURE_TOPOEXT) ) {
+ if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+ (c[cpu].compute_unit_id == c[i].compute_unit_id) )
+ link_thread_siblings(cpu, i);
+ } else if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+ (c[cpu].cpu_core_id == c[i].cpu_core_id) ) {
+ link_thread_siblings(cpu, i);
}
}
}
@@ -838,6 +846,7 @@ remove_siblinginfo(int cpu)
cpus_clear(per_cpu(cpu_core_map, cpu));
c[cpu].phys_proc_id = BAD_APICID;
c[cpu].cpu_core_id = BAD_APICID;
+ c[cpu].compute_unit_id = BAD_APICID;
cpu_clear(cpu, cpu_sibling_setup_map);
}
Index: xen-4.1.2-testing/xen/include/asm-x86/processor.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/processor.h
+++ xen-4.1.2-testing/xen/include/asm-x86/processor.h
@@ -175,9 +175,10 @@ struct cpuinfo_x86 {
__u32 x86_max_cores; /* cpuid returned max cores value */
__u32 booted_cores; /* number of cores as seen by OS */
__u32 x86_num_siblings; /* cpuid logical cpus per chip value */
+ __u32 apicid;
int phys_proc_id; /* package ID of each logical CPU */
int cpu_core_id; /* core ID of each logical CPU*/
- __u32 apicid;
+ int compute_unit_id; /* AMD compute unit ID of each logical CPU */
unsigned short x86_clflush_size;
} __cacheline_aligned;
++++++ 23613-EFI-headers.patch ++++++
++++ 2741 lines (skipped)
++++++ 23614-x86_64-EFI-boot.patch ++++++
++++ 2579 lines (skipped)
++++++ 23615-x86_64-EFI-runtime.patch ++++++
++++ 850 lines (skipped)
++++++ 23616-x86_64-EFI-MPS.patch ++++++
References: fate#311376, fate#311529, bnc#578927, bnc#628554
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1309249288 -3600
# Node ID dffcd8b4c197b58d2acb914d0e07a100e340f7ae
# Parent d19e778442673050bba8ea8cf61585902ff81162
x86-64: EFI MPS support
It's not clear this is needed - Linux doesn't use the MPS table even
if available, and no system having one was seen so far.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1309268736 -3600
# Node ID d22b64ccf088db8bfce1d6c4830f08e3e834ec84
# Parent 6d404796a8e587eb648a66f2859991d385b65eb6
x86_32: Fix build after EFI MPS patch.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/efi/boot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/efi/boot.c
+++ xen-4.1.2-testing/xen/arch/x86/efi/boot.c
@@ -897,12 +897,15 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
{
static EFI_GUID __initdata acpi2_guid = ACPI_20_TABLE_GUID;
static EFI_GUID __initdata acpi_guid = ACPI_TABLE_GUID;
+ static EFI_GUID __initdata mps_guid = MPS_TABLE_GUID;
static EFI_GUID __initdata smbios_guid = SMBIOS_TABLE_GUID;
if ( match_guid(&acpi2_guid, &efi_ct[i].VendorGuid) )
efi.acpi20 = (long)efi_ct[i].VendorTable;
if ( match_guid(&acpi_guid, &efi_ct[i].VendorGuid) )
efi.acpi = (long)efi_ct[i].VendorTable;
+ if ( match_guid(&mps_guid, &efi_ct[i].VendorGuid) )
+ efi.mps = (long)efi_ct[i].VendorTable;
if ( match_guid(&smbios_guid, &efi_ct[i].VendorGuid) )
efi.smbios = (long)efi_ct[i].VendorTable;
}
Index: xen-4.1.2-testing/xen/arch/x86/efi/runtime.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/efi/runtime.c
+++ xen-4.1.2-testing/xen/arch/x86/efi/runtime.c
@@ -29,6 +29,7 @@ void *__read_mostly efi_memmap;
struct efi __read_mostly efi = {
.acpi = EFI_INVALID_TABLE_ADDR,
.acpi20 = EFI_INVALID_TABLE_ADDR,
+ .mps = EFI_INVALID_TABLE_ADDR,
.smbios = EFI_INVALID_TABLE_ADDR,
};
Index: xen-4.1.2-testing/xen/arch/x86/mpparse.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mpparse.c
+++ xen-4.1.2-testing/xen/arch/x86/mpparse.c
@@ -19,6 +19,8 @@
#include <xen/init.h>
#include <xen/acpi.h>
#include <xen/delay.h>
+#include <xen/efi.h>
+#include <xen/pfn.h>
#include <xen/sched.h>
#include <asm/mc146818rtc.h>
@@ -655,6 +657,14 @@ static inline void __init construct_defa
}
}
+#define FIX_EFI_MPF FIX_KEXEC_BASE_0
+
+static __init void efi_unmap_mpf(void)
+{
+ if (efi_enabled)
+ __set_fixmap(FIX_EFI_MPF, 0, 0);
+}
+
static struct intel_mp_floating *mpf_found;
/*
@@ -669,6 +679,7 @@ void __init get_smp_config (void)
* processors, where MPS only supports physical.
*/
if (acpi_lapic && acpi_ioapic) {
+ efi_unmap_mpf();
printk(KERN_INFO "Using ACPI (MADT) for SMP configuration
information\n");
return;
}
@@ -699,6 +710,7 @@ void __init get_smp_config (void)
* override the defaults.
*/
if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) {
+ efi_unmap_mpf();
smp_found_config = 0;
printk(KERN_ERR "BIOS bug, MP table errors
detected!...\n");
printk(KERN_ERR "... disabling SMP support. (tell your
hw vendor)\n");
@@ -725,6 +737,8 @@ void __init get_smp_config (void)
} else
BUG();
+ efi_unmap_mpf();
+
printk(KERN_INFO "Processors: %d\n", num_processors);
/*
* Only use the first configuration found.
@@ -779,10 +793,37 @@ static int __init smp_scan_config (unsig
return 0;
}
+static void __init efi_check_config(void)
+{
+ struct intel_mp_floating *mpf;
+
+ if (efi.mps == EFI_INVALID_TABLE_ADDR)
+ return;
+
+ __set_fixmap(FIX_EFI_MPF, PFN_DOWN(efi.mps), __PAGE_HYPERVISOR);
+ mpf = (void *)fix_to_virt(FIX_EFI_MPF) + ((long)efi.mps &
(PAGE_SIZE-1));
+
+ if (memcmp(mpf->mpf_signature, "_MP_", 4) == 0 &&
+ mpf->mpf_length == 1 &&
+ mpf_checksum((void *)mpf, 16) &&
+ (mpf->mpf_specification == 1 || mpf->mpf_specification == 4)) {
+ smp_found_config = 1;
+ printk(KERN_INFO "SMP MP-table at %08lx\n", efi.mps);
+ mpf_found = mpf;
+ }
+ else
+ efi_unmap_mpf();
+}
+
void __init find_smp_config (void)
{
unsigned int address;
+ if (efi_enabled) {
+ efi_check_config();
+ return;
+ }
+
/*
* FIXME: Linux assumes you have 640K of base ram..
* this continues the error...
Index: xen-4.1.2-testing/xen/include/xen/efi.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/efi.h
+++ xen-4.1.2-testing/xen/include/xen/efi.h
@@ -17,6 +17,7 @@ extern const bool_t efi_enabled;
/* Add fields here only if they need to be referenced from non-EFI code. */
struct efi {
+ unsigned long mps; /* MPS table */
unsigned long acpi; /* ACPI table (IA64 ext 0.71) */
unsigned long acpi20; /* ACPI table (ACPI 2.0) */
unsigned long smbios; /* SM BIOS table */
++++++ 23643-xentrace_Allow_tracing_to_be_enabled_at_boot.patch ++++++
changeset: 23643:335e96664589
user: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
date: Fri Jul 01 20:31:18 2011 +0100
files: xen/common/trace.c
description:
xentrace: Allow tracing to be enabled at boot
Add a "tevt_mask" parameter to the xen command-line, allowing
trace records to be gathered early in boot. They will be placed
into the trace buffers, and read when the user runs "xentrace".
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
xen/common/trace.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -45,7 +45,9 @@ CHECK_t_buf;
/* opt_tbuf_size: trace buffer size (in pages) for each cpu */
static unsigned int opt_tbuf_size;
+static unsigned int opt_tevt_mask;
integer_param("tbuf_size", opt_tbuf_size);
+integer_param("tevt_mask", opt_tevt_mask);
/* Pointers to the meta-data objects for all system trace buffers */
static struct t_info *t_info;
@@ -338,11 +340,21 @@ void __init init_trace_bufs(void)
{
register_cpu_notifier(&cpu_nfb);
- if ( opt_tbuf_size && alloc_trace_bufs(opt_tbuf_size) )
+ if ( opt_tbuf_size )
{
- printk(XENLOG_INFO "xentrace: allocation size %d failed, disabling\n",
- opt_tbuf_size);
- opt_tbuf_size = 0;
+ if ( alloc_trace_bufs(opt_tbuf_size) )
+ {
+ printk("xentrace: allocation size %d failed, disabling\n",
+ opt_tbuf_size);
+ opt_tbuf_size = 0;
+ }
+ else if ( opt_tevt_mask )
+ {
+ printk("xentrace: Starting tracing, enabling mask %x\n",
+ opt_tevt_mask);
+ tb_event_mask = opt_tevt_mask;
+ tb_init_done=1;
+ }
}
}
++++++ 23676-x86_64-image-map-bounds.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1310631973 -3600
# Node ID 80c9db90bba96e443a22d268c06948fdef9c6a75
# Parent 88823213a4780ebced6d7adcb1ffd2dda6a339ca
x86-64: properly handle alias mappings beyond _end
Changeset 19632:b0966b6f5180 wasn't really complete: The Xen image
mapping doesn't end at _end, but a full 16Mb gets mapped during boot
(and never got unmapped so far), hence all of this space was subject
to alias mappings when it comes to cache attribute changes. Unmap all
full large pages between _end and the 16Mb boundary, and include all
other pages beyond _end when checking for aliases.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -778,7 +778,7 @@ static int update_xen_mappings(unsigned
int err = 0;
#ifdef __x86_64__
bool_t alias = mfn >= PFN_DOWN(xen_phys_start) &&
- mfn < PFN_UP(xen_phys_start + (unsigned long)_end - XEN_VIRT_START);
+ mfn < PFN_UP(xen_phys_start + xen_virt_end - XEN_VIRT_START);
unsigned long xen_va =
XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT);
Index: xen-4.1.2-testing/xen/arch/x86/setup.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/setup.c
+++ xen-4.1.2-testing/xen/arch/x86/setup.c
@@ -99,6 +99,8 @@ unsigned long __read_mostly xen_phys_sta
/* Limits of Xen heap, used to initialise the allocator. */
unsigned long __initdata xenheap_initial_phys_start;
unsigned long __read_mostly xenheap_phys_end;
+#else
+unsigned long __read_mostly xen_virt_end;
#endif
DEFINE_PER_CPU(struct tss_struct, init_tss);
@@ -1098,6 +1100,9 @@ void __init __start_xen(unsigned long mb
map_pages_to_xen((unsigned long)__va(kexec_crash_area.start),
kexec_crash_area.start >> PAGE_SHIFT,
PFN_UP(kexec_crash_area.size), PAGE_HYPERVISOR);
+ xen_virt_end = ((unsigned long)_end + (1UL << L2_PAGETABLE_SHIFT) - 1) &
+ ~((1UL << L2_PAGETABLE_SHIFT) - 1);
+ destroy_xen_mappings(xen_virt_end, XEN_VIRT_START + BOOTSTRAP_MAP_BASE);
#endif
memguard_init();
Index: xen-4.1.2-testing/xen/include/asm-x86/x86_64/page.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/x86_64/page.h
+++ xen-4.1.2-testing/xen/include/asm-x86/x86_64/page.h
@@ -34,6 +34,8 @@
#include <xen/config.h>
#include <asm/types.h>
+extern unsigned long xen_virt_end;
+
extern unsigned long max_pdx;
extern unsigned long pfn_pdx_bottom_mask, ma_va_bottom_mask;
extern unsigned int pfn_pdx_hole_shift;
++++++ 23719-xentrace_update___trace_var_comment.patch ++++++
changeset: 23719:c2888876abd3
user: Olaf Hering <olaf@xxxxxxxxx>
date: Tue Jul 19 08:22:19 2011 +0100
files: xen/common/trace.c
description:
xentrace: update __trace_var comment
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/common/trace.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/xen/common/trace.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/trace.c
+++ xen-4.1.2-testing/xen/common/trace.c
@@ -657,13 +657,13 @@ static DECLARE_SOFTIRQ_TASKLET(trace_not
trace_notify_dom0, 0);
/**
- * trace - Enters a trace tuple into the trace buffer for the current CPU.
+ * __trace_var - Enters a trace tuple into the trace buffer for the current
CPU.
* @event: the event type being logged
- * @d1...d5: the data items for the event being logged
+ * @cycles: include tsc timestamp into trace record
+ * @extra: size of additional trace data in bytes
+ * @extra_data: pointer to additional trace data
*
- * Logs a trace record into the appropriate buffer. Returns nonzero on
- * failure, otherwise 0. Failure occurs only if the trace buffers are not yet
- * initialised.
+ * Logs a trace record into the appropriate buffer.
*/
void __trace_var(u32 event, bool_t cycles, unsigned int extra,
const void *extra_data)
++++++ 23723-x86-CMOS-lock.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311081053 -3600
# Node ID 18653a163b1e8e10b4353272bcb9e8302bfd2e19
# Parent 7bc5825e471db5a3a989f47d21334ef63a6b5610
x86: consistently serialize CMOS/RTC accesses on rtc_lock
Since RTC/CMOS accesses aren't atomic, there are possible races
between code paths setting the index register and subsequently
reading/writing the data register. This is supposed to be dealt with
by acquiring rtc_lock, but two places up to now lacked respective
synchronization: Accesses to the EFI time functions and
smpboot_{setup,restore}_warm_reset_vector().
This in turn requires no longer directly passing through guest writes
to the index register, but instead using a machanism similar to that
for PCI config space method 1 accesses.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/efi/runtime.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/efi/runtime.c
+++ xen-4.1.2-testing/xen/arch/x86/efi/runtime.c
@@ -3,6 +3,7 @@
#include <xen/errno.h>
#include <xen/guest_access.h>
#include <xen/time.h>
+#include <asm/mc146818rtc.h>
DEFINE_XEN_GUEST_HANDLE(CHAR16);
@@ -80,9 +81,11 @@ unsigned long efi_get_time(void)
{
EFI_TIME time;
EFI_STATUS status;
- unsigned long cr3 = efi_rs_enter();
+ unsigned long cr3 = efi_rs_enter(), flags;
+ spin_lock_irqsave(&rtc_lock, flags);
status = efi_rs->GetTime(&time, NULL);
+ spin_unlock_irqrestore(&rtc_lock, flags);
efi_rs_leave(cr3);
if ( EFI_ERROR(status) )
@@ -223,7 +226,7 @@ static inline EFI_GUID *cast_guid(struct
int efi_runtime_call(struct xenpf_efi_runtime_call *op)
{
- unsigned long cr3;
+ unsigned long cr3, flags;
EFI_STATUS status = EFI_NOT_STARTED;
int rc = 0;
@@ -237,7 +240,9 @@ int efi_runtime_call(struct xenpf_efi_ru
return -EINVAL;
cr3 = efi_rs_enter();
+ spin_lock_irqsave(&rtc_lock, flags);
status = efi_rs->GetTime(cast_time(&op->u.get_time.time), &caps);
+ spin_unlock_irqrestore(&rtc_lock, flags);
efi_rs_leave(cr3);
if ( !EFI_ERROR(status) )
@@ -255,7 +260,9 @@ int efi_runtime_call(struct xenpf_efi_ru
return -EINVAL;
cr3 = efi_rs_enter();
+ spin_lock_irqsave(&rtc_lock, flags);
status = efi_rs->SetTime(cast_time(&op->u.set_time));
+ spin_unlock_irqrestore(&rtc_lock, flags);
efi_rs_leave(cr3);
break;
@@ -267,8 +274,10 @@ int efi_runtime_call(struct xenpf_efi_ru
return -EINVAL;
cr3 = efi_rs_enter();
+ spin_lock_irqsave(&rtc_lock, flags);
status = efi_rs->GetWakeupTime(&enabled, &pending,
cast_time(&op->u.get_wakeup_time));
+ spin_unlock_irqrestore(&rtc_lock, flags);
efi_rs_leave(cr3);
if ( !EFI_ERROR(status) )
@@ -287,12 +296,14 @@ int efi_runtime_call(struct xenpf_efi_ru
return -EINVAL;
cr3 = efi_rs_enter();
+ spin_lock_irqsave(&rtc_lock, flags);
status = efi_rs->SetWakeupTime(!!(op->misc &
XEN_EFI_SET_WAKEUP_TIME_ENABLE),
(op->misc &
XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY) ?
NULL :
cast_time(&op->u.set_wakeup_time));
+ spin_unlock_irqrestore(&rtc_lock, flags);
efi_rs_leave(cr3);
op->misc = 0;
Index: xen-4.1.2-testing/xen/arch/x86/hpet.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hpet.c
+++ xen-4.1.2-testing/xen/arch/x86/hpet.c
@@ -525,18 +525,10 @@ static void hpet_detach_channel(int cpu,
#include <asm/mc146818rtc.h>
-void (*pv_rtc_handler)(unsigned int port, uint8_t value);
+void (*__read_mostly pv_rtc_handler)(uint8_t index, uint8_t value);
-static void handle_rtc_once(unsigned int port, uint8_t value)
+static void handle_rtc_once(uint8_t index, uint8_t value)
{
- static int index;
-
- if ( port == 0x70 )
- {
- index = value;
- return;
- }
-
if ( index != RTC_REG_B )
return;
Index: xen-4.1.2-testing/xen/arch/x86/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/traps.c
@@ -67,6 +67,8 @@
#include <asm/hypercall.h>
#include <asm/mce.h>
#include <asm/apic.h>
+#include <asm/mc146818rtc.h>
+#include <asm/hpet.h>
#include <public/arch-x86/cpuid.h>
/*
@@ -1630,6 +1632,10 @@ static int admin_io_okay(
if ( (port == 0xcf8) && (bytes == 4) )
return 0;
+ /* We also never permit direct access to the RTC/CMOS registers. */
+ if ( ((port & ~1) == RTC_PORT(0)) )
+ return 0;
+
return ioports_access_permitted(v->domain, port, port + bytes - 1);
}
@@ -1659,6 +1665,21 @@ static uint32_t guest_io_read(
{
sub_data = pv_pit_handler(port, 0, 0);
}
+ else if ( (port == RTC_PORT(0)) )
+ {
+ sub_data = v->domain->arch.cmos_idx;
+ }
+ else if ( (port == RTC_PORT(1)) &&
+ ioports_access_permitted(v->domain, RTC_PORT(0),
+ RTC_PORT(1)) )
+ {
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ outb(v->domain->arch.cmos_idx & 0x7f, RTC_PORT(0));
+ sub_data = inb(RTC_PORT(1));
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ }
else if ( (port == 0xcf8) && (bytes == 4) )
{
size = 4;
@@ -1684,8 +1705,6 @@ static uint32_t guest_io_read(
return data;
}
-extern void (*pv_rtc_handler)(unsigned int port, uint8_t value);
-
static void guest_io_write(
unsigned int port, unsigned int bytes, uint32_t data,
struct vcpu *v, struct cpu_user_regs *regs)
@@ -1694,8 +1713,6 @@ static void guest_io_write(
{
switch ( bytes ) {
case 1:
- if ( ((port == 0x70) || (port == 0x71)) && pv_rtc_handler )
- pv_rtc_handler(port, (uint8_t)data);
outb((uint8_t)data, port);
if ( pv_post_outb_hook )
pv_post_outb_hook(port, (uint8_t)data);
@@ -1718,6 +1735,23 @@ static void guest_io_write(
{
pv_pit_handler(port, (uint8_t)data, 1);
}
+ else if ( (port == RTC_PORT(0)) )
+ {
+ v->domain->arch.cmos_idx = data;
+ }
+ else if ( (port == RTC_PORT(1)) &&
+ ioports_access_permitted(v->domain, RTC_PORT(0),
+ RTC_PORT(1)) )
+ {
+ unsigned long flags;
+
+ if ( pv_rtc_handler )
+ pv_rtc_handler(v->domain->arch.cmos_idx & 0x7f, data);
+ spin_lock_irqsave(&rtc_lock, flags);
+ outb(v->domain->arch.cmos_idx & 0x7f, RTC_PORT(0));
+ outb(data, RTC_PORT(1));
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ }
else if ( (port == 0xcf8) && (bytes == 4) )
{
size = 4;
@@ -2083,10 +2117,6 @@ static int emulate_privileged_op(struct
goto fail;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
- if ( (op_bytes == 1) &&
- ((port == 0x71) || (port == 0x70)) &&
- pv_rtc_handler )
- pv_rtc_handler(port, regs->eax);
io_emul(regs);
if ( (op_bytes == 1) && pv_post_outb_hook )
pv_post_outb_hook(port, regs->eax);
Index: xen-4.1.2-testing/xen/include/asm-x86/domain.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/domain.h
+++ xen-4.1.2-testing/xen/include/asm-x86/domain.h
@@ -251,6 +251,7 @@ struct arch_domain
/* I/O-port admin-specified access capabilities. */
struct rangeset *ioport_caps;
uint32_t pci_cf8;
+ uint8_t cmos_idx;
struct list_head pdev_list;
struct hvm_domain hvm_domain;
Index: xen-4.1.2-testing/xen/include/asm-x86/hpet.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/hpet.h
+++ xen-4.1.2-testing/xen/include/asm-x86/hpet.h
@@ -52,6 +52,7 @@
#define HPET_TN_FSB_CAP 0x8000
#define HPET_TN_ROUTE_SHIFT 9
+extern void (*pv_rtc_handler)(uint8_t reg, uint8_t value);
#define hpet_read32(x) \
(*(volatile u32 *)(fix_to_virt(FIX_HPET_BASE) + (x)))
Index: xen-4.1.2-testing/xen/include/asm-x86/mach-default/smpboot_hooks.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/mach-default/smpboot_hooks.h
+++ xen-4.1.2-testing/xen/include/asm-x86/mach-default/smpboot_hooks.h
@@ -3,7 +3,11 @@
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0xa, 0xf);
+ spin_unlock_irqrestore(&rtc_lock, flags);
flush_tlb_local();
Dprintk("1.\n");
*((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
@@ -14,6 +18,8 @@ static inline void smpboot_setup_warm_re
static inline void smpboot_restore_warm_reset_vector(void)
{
+ unsigned long flags;
+
/*
* Install writable page 0 entry to set BIOS data area.
*/
@@ -23,7 +29,9 @@ static inline void smpboot_restore_warm_
* Paranoid: Set warm reset code and vector here back
* to default values.
*/
+ spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0, 0xf);
+ spin_unlock_irqrestore(&rtc_lock, flags);
*((volatile int *) maddr_to_virt(0x467)) = 0;
}
++++++ 23724-x86-smpboot-x2apic.patch ++++++
# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxx>
# Date 1311081181 -3600
# Node ID b3434f24b0827c5ef34e4b4a72893288e2ffbe40
# Parent 18653a163b1e8e10b4353272bcb9e8302bfd2e19
x86: Remove timeouts from INIT-SIPI-SIPI sequence when using x2apic.
Some of the timeouts are pointless since they're waiting for the ICR
to ack the IPI delivery and that doesn't happen on x2apic.
The others should be benign (and are suggested in the SDM) but
removing them makes AP bringup much more reliable on some test boxes.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/smpboot.c
+++ xen-4.1.2-testing/xen/arch/x86/smpboot.c
@@ -447,29 +447,30 @@ static int wakeup_secondary_cpu(int phys
apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
phys_apicid);
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- if ( !x2apic_enabled )
+ if ( !x2apic_enabled )
+ {
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while ( send_status && (timeout++ < 1000) );
+ } while ( send_status && (timeout++ < 1000) );
- mdelay(10);
+ mdelay(10);
- Dprintk("Deasserting INIT.\n");
+ Dprintk("Deasserting INIT.\n");
- apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- if ( !x2apic_enabled )
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while ( send_status && (timeout++ < 1000) );
+ } while ( send_status && (timeout++ < 1000) );
+ }
/*
* Should we send STARTUP IPIs ?
@@ -498,22 +499,24 @@ static int wakeup_secondary_cpu(int phys
*/
apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), phys_apicid);
- /* Give the other CPU some time to accept the IPI. */
- udelay(300);
-
- Dprintk("Startup point 1.\n");
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- if ( !x2apic_enabled )
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while ( send_status && (timeout++ < 1000) );
-
- /* Give the other CPU some time to accept the IPI. */
- udelay(200);
+ if ( !x2apic_enabled )
+ {
+ /* Give the other CPU some time to accept the IPI. */
+ udelay(300);
+
+ Dprintk("Startup point 1.\n");
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while ( send_status && (timeout++ < 1000) );
+
+ /* Give the other CPU some time to accept the IPI. */
+ udelay(200);
+ }
/* Due to the Pentium erratum 3AP. */
if ( maxlvt > 3 )
++++++ 23726-x86-intel-flexmigration-v2.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311081291 -3600
# Node ID fd97ca086df6808bffc6ecf3f79cebca64c60bc3
# Parent 4dc6a9ba90d60fdf0cc0898fc9a8fe84ae9030fc
x86: update Intel CPUID masking code to latest spec
..., which adds masking of the xsave feature leaf.
Also fix the printing (to actually make it do what it was supposed to
do from the beginning) of what specific masking couldn't be done in
case the user requested something the hardware doesn't support.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311255291 -3600
# Node ID 48f72b389b04cfa8d44924577a69ed59e48fbe77
# Parent dd5eecf739d152fb16bd44897875ea878d4c9d59
x86: add change missing in c/s 23726:fd97ca086df6
The early "do we need to do anything" check needs adjustment, too.
Thanks to Haitao Shan for pointing this out.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -27,10 +27,15 @@ boolean_param("noserialnumber", disable_
static bool_t __cpuinitdata use_xsave = 1;
boolean_param("xsave", use_xsave);
+
unsigned int __devinitdata opt_cpuid_mask_ecx = ~0u;
integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx);
unsigned int __devinitdata opt_cpuid_mask_edx = ~0u;
integer_param("cpuid_mask_edx", opt_cpuid_mask_edx);
+
+unsigned int __devinitdata opt_cpuid_mask_xsave_eax = ~0u;
+integer_param("cpuid_mask_xsave_eax", opt_cpuid_mask_xsave_eax);
+
unsigned int __devinitdata opt_cpuid_mask_ext_ecx = ~0u;
integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
unsigned int __devinitdata opt_cpuid_mask_ext_edx = ~0u;
--- a/xen/arch/x86/cpu/cpu.h
+++ b/xen/arch/x86/cpu/cpu.h
@@ -22,6 +22,7 @@ struct cpu_dev {
extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
extern unsigned int opt_cpuid_mask_ecx, opt_cpuid_mask_edx;
+extern unsigned int opt_cpuid_mask_xsave_eax;
extern unsigned int opt_cpuid_mask_ext_ecx, opt_cpuid_mask_ext_edx;
extern int get_model_name(struct cpuinfo_x86 *c);
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -59,10 +59,12 @@ void set_cpuid_faulting(bool_t enable)
*/
static void __devinit set_cpuidmask(const struct cpuinfo_x86 *c)
{
+ u32 eax, edx;
const char *extra = "";
if (!~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx &
- opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx))
+ opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx &
+ opt_cpuid_mask_xsave_eax))
return;
/* Only family 6 supports this feature */
@@ -75,9 +77,12 @@ static void __devinit set_cpuidmask(cons
wrmsr(MSR_INTEL_CPUID_FEATURE_MASK,
opt_cpuid_mask_ecx,
opt_cpuid_mask_edx);
- if (!~(opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx))
+ if (~(opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx))
+ extra = "extended ";
+ else if (~opt_cpuid_mask_xsave_eax)
+ extra = "xsave ";
+ else
return;
- extra = "extended ";
break;
/*
* CPU supports this feature if the processor signature meets the following:
@@ -97,11 +102,25 @@ static void __devinit set_cpuidmask(cons
wrmsr(MSR_INTEL_CPUID80000001_FEATURE_MASK,
opt_cpuid_mask_ext_ecx,
opt_cpuid_mask_ext_edx);
+ if (!~opt_cpuid_mask_xsave_eax)
+ return;
+ extra = "xsave ";
+ break;
+ case 0x2a:
+ wrmsr(MSR_INTEL_CPUID1_FEATURE_MASK_V2,
+ opt_cpuid_mask_ecx,
+ opt_cpuid_mask_edx);
+ rdmsr(MSR_INTEL_CPUIDD_01_FEATURE_MASK, eax, edx);
+ wrmsr(MSR_INTEL_CPUIDD_01_FEATURE_MASK,
+ opt_cpuid_mask_xsave_eax, edx);
+ wrmsr(MSR_INTEL_CPUID80000001_FEATURE_MASK_V2,
+ opt_cpuid_mask_ext_ecx,
+ opt_cpuid_mask_ext_edx);
return;
}
- printk(XENLOG_ERR "Cannot set CPU feature mask on CPU#%d\n",
- smp_processor_id());
+ printk(XENLOG_ERR "Cannot set CPU %sfeature mask on CPU#%d\n",
+ extra, smp_processor_id());
}
void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -488,6 +488,10 @@
#define MSR_INTEL_CPUID1_FEATURE_MASK 0x00000130
#define MSR_INTEL_CPUID80000001_FEATURE_MASK 0x00000131
+#define MSR_INTEL_CPUID1_FEATURE_MASK_V2 0x00000132
+#define MSR_INTEL_CPUID80000001_FEATURE_MASK_V2 0x00000133
+#define MSR_INTEL_CPUIDD_01_FEATURE_MASK 0x00000134
+
/* Intel cpuid faulting MSRs */
#define MSR_INTEL_PLATFORM_INFO 0x000000ce
#define MSR_INTEL_MISC_FEATURES_ENABLES 0x00000140
++++++ 23735-guest-dom0-cap.patch ++++++
References: bnc#702407
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311407355 -3600
# Node ID 537918f518eec3d8e2e2dad403fce40303321523
# Parent 42edf1481c5704c8ce1eb171a713b5411df0551a
add privileged (dom0) kernel feature indication
With our switching away from supporting 32-bit Dom0 operation, users
complained that attempts (perhaps due to lack of knowledge of that
change) to boot the no longer privileged kernel in Dom0 resulted in
apparently silent failure. To make the mismatch explicit and visible,
add dom0 feature flag that the kernel can set to indicate operation as
dom0 is supported.
Due to the way elf_xen_parse_features() worked up to now (getting
fixed here), adding features indications to the old, string based ELF
note would make the respective kernel unusable on older hypervisors.
For that reason, a new ELF Note is being introduced that allows
specifying supported features as a bit array instead (with features
unknown to the hypervisor simply ignored, as now also done by
elf_xen_parse_features(), whereas here unknown kernel-required
features still keep the kernel [and hence VM] from booting).
Introduce and use elf_note_numeric_array() to be forward
compatible (or else an old hypervisor wouldn't be able to parse kernel
specified features occupying more than 64 bits - thanks, Ian!).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311598088 -3600
# Node ID 50ddc200a60cad3929a79a992f09145fd39af49d
# Parent d8725d9fb8657874011d2f2772f5e970b24dfe9b
fix regression from c/s 23735:537918f518ee
This was checking presence of the wrong (old) ELF note. I don't really
understand how this failed consistently only for one of the xen-boot
tests...
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/tools/libxc/xc_dom_elfloader.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_dom_elfloader.c
+++ xen-4.1.2-testing/tools/libxc/xc_dom_elfloader.c
@@ -286,6 +286,13 @@ static int xc_dom_parse_elf_kernel(struc
if ( (rc = elf_xen_parse(elf, &dom->parms)) != 0 )
return rc;
+ if ( elf_xen_feature_get(XENFEAT_dom0, dom->parms.f_required) )
+ {
+ xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Kernel does not"
+ " support unprivileged (DomU) operation", __FUNCTION__);
+ return -EINVAL;
+ }
+
/* find kernel segment */
dom->kernel_seg.vstart = dom->parms.virt_kstart;
dom->kernel_seg.vend = dom->parms.virt_kend;
Index: xen-4.1.2-testing/xen/arch/ia64/xen/domain.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/ia64/xen/domain.c
+++ xen-4.1.2-testing/xen/arch/ia64/xen/domain.c
@@ -2164,6 +2164,13 @@ int __init construct_dom0(struct domain
return -1;
}
+ if (parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type !=
XEN_ENT_NONE &&
+ !test_bit(XENFEAT_dom0, parms.f_supported))
+ {
+ printk("Kernel does not support Dom0 operation\n");
+ return -1;
+ }
+
p_start = parms.virt_base;
pkern_start = parms.virt_kstart;
pkern_end = parms.virt_kend;
Index: xen-4.1.2-testing/xen/arch/x86/domain_build.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/domain_build.c
+++ xen-4.1.2-testing/xen/arch/x86/domain_build.c
@@ -417,6 +417,13 @@ int __init construct_dom0(
return -EINVAL;
}
+ if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE
&&
+ !test_bit(XENFEAT_dom0, parms.f_supported) )
+ {
+ printk("Kernel does not support Dom0 operation\n");
+ return -EINVAL;
+ }
+
#if defined(__x86_64__)
if ( compat32 )
{
Index: xen-4.1.2-testing/xen/common/kernel.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/kernel.c
+++ xen-4.1.2-testing/xen/common/kernel.c
@@ -287,6 +287,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
(1U << XENFEAT_auto_translated_physmap);
if ( supervisor_mode_kernel )
fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
+ if ( current->domain == dom0 )
+ fi.submap |= 1U << XENFEAT_dom0;
#ifdef CONFIG_X86
if ( !is_hvm_vcpu(current) )
fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) |
Index: xen-4.1.2-testing/xen/common/libelf/libelf-dominfo.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/libelf/libelf-dominfo.c
+++ xen-4.1.2-testing/xen/common/libelf/libelf-dominfo.c
@@ -26,7 +26,8 @@ static const char *const elf_xen_feature
[XENFEAT_writable_descriptor_tables] = "writable_descriptor_tables",
[XENFEAT_auto_translated_physmap] = "auto_translated_physmap",
[XENFEAT_supervisor_mode_kernel] = "supervisor_mode_kernel",
- [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb"
+ [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb",
+ [XENFEAT_dom0] = "dom0"
};
static const int elf_xen_features =
sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]);
@@ -82,7 +83,7 @@ int elf_xen_parse_features(const char *f
}
}
}
- if ( i == elf_xen_features )
+ if ( i == elf_xen_features && required && feature[0] == '!' )
return -1;
}
@@ -113,6 +114,7 @@ int elf_xen_parse_note(struct elf_binary
[XEN_ELFNOTE_LOADER] = { "LOADER", 1},
[XEN_ELFNOTE_PAE_MODE] = { "PAE_MODE", 1},
[XEN_ELFNOTE_FEATURES] = { "FEATURES", 1},
+ [XEN_ELFNOTE_SUPPORTED_FEATURES] = { "SUPPORTED_FEATURES", 0},
[XEN_ELFNOTE_BSD_SYMTAB] = { "BSD_SYMTAB", 1},
[XEN_ELFNOTE_SUSPEND_CANCEL] = { "SUSPEND_CANCEL", 0 },
[XEN_ELFNOTE_MOD_START_PFN] = { "MOD_START_PFN", 0 },
@@ -121,6 +123,7 @@ int elf_xen_parse_note(struct elf_binary
const char *str = NULL;
uint64_t val = 0;
+ unsigned int i;
int type = elf_uval(elf, note, type);
if ( (type >= sizeof(note_desc) / sizeof(note_desc[0])) ||
@@ -199,6 +202,12 @@ int elf_xen_parse_note(struct elf_binary
return -1;
break;
+ case XEN_ELFNOTE_SUPPORTED_FEATURES:
+ for ( i = 0; i < XENFEAT_NR_SUBMAPS; ++i )
+ parms->f_supported[i] |= elf_note_numeric_array(
+ elf, note, sizeof(*parms->f_supported), i);
+ break;
+
}
return 0;
}
Index: xen-4.1.2-testing/xen/common/libelf/libelf-tools.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/libelf/libelf-tools.c
+++ xen-4.1.2-testing/xen/common/libelf/libelf-tools.c
@@ -227,6 +227,27 @@ uint64_t elf_note_numeric(struct elf_bin
return 0;
}
}
+
+uint64_t elf_note_numeric_array(struct elf_binary *elf, const elf_note *note,
+ unsigned int unitsz, unsigned int idx)
+{
+ const void *desc = elf_note_desc(elf, note);
+ int descsz = elf_uval(elf, note, descsz);
+
+ if ( descsz % unitsz || idx >= descsz / unitsz )
+ return 0;
+ switch (unitsz)
+ {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ return elf_access_unsigned(elf, desc, idx * unitsz, unitsz);
+ default:
+ return 0;
+ }
+}
+
const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note)
{
int namesz = (elf_uval(elf, note, namesz) + 3) & ~3;
Index: xen-4.1.2-testing/xen/include/public/elfnote.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/elfnote.h
+++ xen-4.1.2-testing/xen/include/public/elfnote.h
@@ -179,9 +179,22 @@
#define XEN_ELFNOTE_MOD_START_PFN 16
/*
+ * The features supported by this kernel (numeric).
+ *
+ * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a
+ * kernel to specify support for features that older hypervisors don't
+ * know about. The set of features 4.2 and newer hypervisors will
+ * consider supported by the kernel is the combination of the sets
+ * specified through this and the string note.
+ *
+ * LEGACY: FEATURES
+ */
+#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
+
+/*
* The number of the highest elfnote defined.
*/
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_MOD_START_PFN
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES
/*
* System information exported through crash notes.
Index: xen-4.1.2-testing/xen/include/public/features.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/features.h
+++ xen-4.1.2-testing/xen/include/public/features.h
@@ -75,7 +75,10 @@
#define XENFEAT_hvm_safe_pvclock 9
/* x86: pirq can be used by HVM guests */
-#define XENFEAT_hvm_pirqs 10
+#define XENFEAT_hvm_pirqs 10
+
+/* operation as Dom0 is supported */
+#define XENFEAT_dom0 11
#define XENFEAT_NR_SUBMAPS 1
Index: xen-4.1.2-testing/xen/include/xen/libelf.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/libelf.h
+++ xen-4.1.2-testing/xen/include/xen/libelf.h
@@ -179,6 +179,8 @@ const elf_sym *elf_sym_by_index(struct e
const char *elf_note_name(struct elf_binary *elf, const elf_note * note);
const void *elf_note_desc(struct elf_binary *elf, const elf_note * note);
uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note);
+uint64_t elf_note_numeric_array(struct elf_binary *, const elf_note *,
+ unsigned int unitsz, unsigned int idx);
const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note);
int elf_is_elfbinary(const void *image);
++++++ 23747-mmcfg-base-address.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311608539 -3600
# Node ID b07b6fa766562c990b1d1e59af032feda15c2edb
# Parent aa54b8175954bd6ffeb3bcf72e782e133896b388
x86-64/MMCFG: correct base address computation for regions not starting at bus 0
As per the specification, the base address reported by ACPI is the one
that would be used if the region started at bus 0. Hence the
start_bus_number offset needs to be added not only to the virtual
address, but also the physical one when establishing the mapping, and
it then needs to be subtracted when obtaining the virtual address for
doing accesses.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mmconfig_64.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c
@@ -25,7 +25,7 @@ struct mmcfg_virt {
static struct mmcfg_virt *pci_mmcfg_virt;
static int __initdata mmcfg_pci_segment_shift;
-static char __iomem *get_virt(unsigned int seg, unsigned bus)
+static char __iomem *get_virt(unsigned int seg, unsigned int *bus)
{
struct acpi_mcfg_allocation *cfg;
int cfg_num;
@@ -33,9 +33,11 @@ static char __iomem *get_virt(unsigned i
for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
cfg = pci_mmcfg_virt[cfg_num].cfg;
if (cfg->pci_segment == seg &&
- (cfg->start_bus_number <= bus) &&
- (cfg->end_bus_number >= bus))
+ (cfg->start_bus_number <= *bus) &&
+ (cfg->end_bus_number >= *bus)) {
+ *bus -= cfg->start_bus_number;
return pci_mmcfg_virt[cfg_num].virt;
+ }
}
/* Fall back to type 0 */
@@ -46,7 +48,7 @@ static char __iomem *pci_dev_base(unsign
{
char __iomem *addr;
- addr = get_virt(seg, bus);
+ addr = get_virt(seg, &bus);
if (!addr)
return NULL;
return addr + ((bus << 20) | (devfn << 12));
@@ -121,8 +123,11 @@ static void __iomem * __init mcfg_iorema
if (virt + size < virt || virt + size > PCI_MCFG_VIRT_END)
return NULL;
- map_pages_to_xen(virt, cfg->address >> PAGE_SHIFT,
- size >> PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE);
+ if (map_pages_to_xen(virt,
+ (cfg->address >> PAGE_SHIFT) +
+ (cfg->start_bus_number << (20 - PAGE_SHIFT)),
+ size >> PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE))
+ return NULL;
return (void __iomem *) virt;
}
++++++ 23749-mmcfg-reservation.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1311608606 -3600
# Node ID e8d1c8f074babcb0e4511393106e80a918a38204
# Parent e1717d180897e6e7a04d83a41d86b35ac16912b9
x86-64/MMCFG: pass down firmware (ACPI) reservation status of used memory space
Reserving the MMCFG address range(s) in E820 is specified to only be
optional for the firmware to do. The requirement is to have them
reserved in ACPI resources. Those, however, aren't directly visible to
Xen as they require the ACPI interpreter to be active. Thus, if a
range isn't reserved in E820, we should not completely disable use of
MMCFG on the respective bus range, but rather keep it disabled until
Dom0 can pass down information on the ACPI reservation status (though
a new physdevop hypercall).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1322813126 -3600
# Node ID 60d4e257d04ba0bd663bbef5e93a97b6d8b66e54
# Parent 3f815406feb25a9348d8be9bc49fdc8c93ccb7c2
x86-64/mmcfg: remove __initdata annotation overlooked in 23749:e8d1c8f074ba
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -16,6 +16,10 @@
#include <xsm/xsm.h>
#include <asm/p2m.h>
+#ifdef CONFIG_X86_64
+#include "x86_64/mmconfig.h"
+#endif
+
#ifndef COMPAT
typedef long ret_t;
#endif
@@ -512,6 +516,24 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
break;
}
+#ifdef __x86_64__
+ case PHYSDEVOP_pci_mmcfg_reserved: {
+ struct physdev_pci_mmcfg_reserved info;
+
+ ret = -EPERM;
+ if ( !IS_PRIV(current->domain) )
+ break;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&info, arg, 1) )
+ break;
+
+ ret = pci_mmcfg_reserved(info.address, info.segment,
+ info.start_bus, info.end_bus, info.flags);
+ break;
+ }
+#endif
+
case PHYSDEVOP_restore_msi: {
struct physdev_restore_msi restore_msi;
struct pci_dev *pdev;
--- a/xen/arch/x86/x86_64/mmconfig.h
+++ b/xen/arch/x86/x86_64/mmconfig.h
@@ -84,6 +84,11 @@ extern int pci_mmcfg_config_num;
extern struct acpi_mcfg_allocation *pci_mmcfg_config;
/* function prototypes */
+struct acpi_table_header;
int acpi_parse_mcfg(struct acpi_table_header *header);
+int pci_mmcfg_reserved(uint64_t address, unsigned int segment,
+ unsigned int start_bus, unsigned int end_bus,
+ unsigned int flags);
int pci_mmcfg_arch_init(void);
-void pci_mmcfg_arch_free(void);
+int pci_mmcfg_arch_enable(unsigned int);
+void pci_mmcfg_arch_disable(unsigned int);
--- a/xen/arch/x86/x86_64/mmconfig-shared.c
+++ b/xen/arch/x86/x86_64/mmconfig-shared.c
@@ -22,10 +22,10 @@
#include <asm/e820.h>
#include <asm/msr.h>
#include <asm/msr-index.h>
+#include <public/physdev.h>
#include "mmconfig.h"
-static int __initdata known_bridge;
unsigned int pci_probe = PCI_PROBE_CONF1 | PCI_PROBE_MMCONF;
static void __init parse_mmcfg(char *s)
@@ -316,26 +316,21 @@ static int __init pci_mmcfg_check_hostbr
return name != NULL;
}
-typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type);
-
static int __init is_mmconf_reserved(
- check_reserved_t is_reserved,
u64 addr, u64 size, int i,
- typeof(pci_mmcfg_config[0]) *cfg, int with_e820)
+ typeof(pci_mmcfg_config[0]) *cfg)
{
u64 old_size = size;
int valid = 0;
- while (!is_reserved(addr, addr + size - 1, E820_RESERVED)) {
+ while (!e820_all_mapped(addr, addr + size - 1, E820_RESERVED)) {
size >>= 1;
if (size < (16UL<<20))
break;
}
if (size >= (16UL<<20) || size == old_size) {
- printk(KERN_NOTICE
- "PCI: MCFG area at %lx reserved in %s\n",
- addr, with_e820?"E820":"ACPI motherboard resources");
+ printk(KERN_NOTICE "PCI: MCFG area at %lx reserved in E820\n", addr);
valid = 1;
if (old_size != size) {
@@ -352,15 +347,16 @@ static int __init is_mmconf_reserved(
return valid;
}
-static void __init pci_mmcfg_reject_broken(void)
+static bool_t __init pci_mmcfg_reject_broken(void)
{
typeof(pci_mmcfg_config[0]) *cfg;
int i;
+ bool_t valid = 1;
if ((pci_mmcfg_config_num == 0) ||
(pci_mmcfg_config == NULL) ||
(pci_mmcfg_config[0].address == 0))
- return;
+ return 0;
cfg = &pci_mmcfg_config[0];
@@ -374,27 +370,25 @@ static void __init pci_mmcfg_reject_brok
size = cfg->end_bus_number + 1 - cfg->start_bus_number;
size <<= 20;
printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx "
- "segment %hu buses %u - %u\n",
+ "segment %04x buses %02x - %02x\n",
i, (unsigned long)cfg->address, cfg->pci_segment,
(unsigned int)cfg->start_bus_number,
(unsigned int)cfg->end_bus_number);
- if (!is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1))
- goto reject;
+ if (!is_mmconf_reserved(addr, size, i, cfg) ||
+ pci_mmcfg_arch_enable(i)) {
+ pci_mmcfg_arch_disable(i);
+ valid = 0;
+ }
}
- return;
-
-reject:
- printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
- pci_mmcfg_arch_free();
- xfree(pci_mmcfg_config);
- pci_mmcfg_config = NULL;
- pci_mmcfg_config_num = 0;
+ return valid;
}
void __init acpi_mmcfg_init(void)
{
+ bool_t valid = 1;
+
/* MMCONFIG disabled */
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
return;
@@ -403,16 +397,17 @@ void __init acpi_mmcfg_init(void)
if (!(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF))
return;
- /* for late to exit */
- if (known_bridge)
- return;
-
- if (pci_mmcfg_check_hostbridge())
- known_bridge = 1;
+ if (pci_mmcfg_check_hostbridge()) {
+ unsigned int i;
- if (!known_bridge) {
+ pci_mmcfg_arch_init();
+ for (i = 0; i < pci_mmcfg_config_num; ++i)
+ if (pci_mmcfg_arch_enable(i))
+ valid = 0;
+ } else {
acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
- pci_mmcfg_reject_broken();
+ pci_mmcfg_arch_init();
+ valid = pci_mmcfg_reject_broken();
}
if ((pci_mmcfg_config_num == 0) ||
@@ -420,9 +415,41 @@ void __init acpi_mmcfg_init(void)
(pci_mmcfg_config[0].address == 0))
return;
- if (pci_mmcfg_arch_init()) {
+ if (valid)
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+}
+
+int pci_mmcfg_reserved(uint64_t address, unsigned int segment,
+ unsigned int start_bus, unsigned int end_bus,
+ unsigned int flags)
+{
+ unsigned int i;
+
+ if (flags & ~XEN_PCI_MMCFG_RESERVED)
+ return -EINVAL;
+
+ for (i = 0; i < pci_mmcfg_config_num; ++i) {
+ const typeof(pci_mmcfg_config[0]) *cfg = &pci_mmcfg_config[i];
+
+ if (cfg->pci_segment == segment &&
+ cfg->start_bus_number == start_bus &&
+ cfg->end_bus_number == end_bus) {
+ if (cfg->address != address) {
+ printk(KERN_WARNING
+ "Base address presented for segment %04x bus %02x-%02x"
+ " (%08" PRIx64 ") does not match previously obtained"
+ " one (%08" PRIx64 ")\n",
+ segment, start_bus, end_bus, address, cfg->address);
+ return -EIO;
+ }
+ if (flags & XEN_PCI_MMCFG_RESERVED)
+ return pci_mmcfg_arch_enable(i);
+ pci_mmcfg_arch_disable(i);
+ return 0;
+ }
}
+
+ return -ENODEV;
}
/**
--- a/xen/arch/x86/x86_64/mmconfig_64.c
+++ b/xen/arch/x86/x86_64/mmconfig_64.c
@@ -23,7 +23,7 @@ struct mmcfg_virt {
char __iomem *virt;
};
static struct mmcfg_virt *pci_mmcfg_virt;
-static int __initdata mmcfg_pci_segment_shift;
+static unsigned int mmcfg_pci_segment_shift;
static char __iomem *get_virt(unsigned int seg, unsigned int *bus)
{
@@ -112,7 +112,8 @@ int pci_mmcfg_write(unsigned int seg, un
return 0;
}
-static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
+static void __iomem *mcfg_ioremap(const struct acpi_mcfg_allocation *cfg,
+ unsigned int prot)
{
unsigned long virt, size;
@@ -126,19 +127,55 @@ static void __iomem * __init mcfg_iorema
if (map_pages_to_xen(virt,
(cfg->address >> PAGE_SHIFT) +
(cfg->start_bus_number << (20 - PAGE_SHIFT)),
- size >> PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE))
+ size >> PAGE_SHIFT, prot))
return NULL;
return (void __iomem *) virt;
}
+int pci_mmcfg_arch_enable(unsigned int idx)
+{
+ const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
+
+ if (pci_mmcfg_virt[idx].virt)
+ return 0;
+ pci_mmcfg_virt[idx].virt = mcfg_ioremap(cfg, PAGE_HYPERVISOR_NOCACHE);
+ if (!pci_mmcfg_virt[idx].virt) {
+ printk(KERN_ERR "PCI: Cannot map MCFG aperture for segment %04x\n",
+ cfg->pci_segment);
+ return -ENOMEM;
+ }
+ printk(KERN_INFO "PCI: Using MCFG for segment %04x bus %02x-%02x\n",
+ cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
+ return 0;
+}
+
+void pci_mmcfg_arch_disable(unsigned int idx)
+{
+ const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
+
+ pci_mmcfg_virt[idx].virt = NULL;
+ /*
+ * Don't use destroy_xen_mappings() here, or make sure that at least
+ * the necessary L4 entries get populated (so that they get properly
+ * propagated to guest domains' page tables).
+ */
+ mcfg_ioremap(cfg, 0);
+ printk(KERN_WARNING "PCI: Not using MCFG for segment %04x bus %02x-%02x\n",
+ cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
+}
+
int __init pci_mmcfg_arch_init(void)
{
int i;
+ if (pci_mmcfg_virt)
+ return 0;
+
pci_mmcfg_virt = xmalloc_array(struct mmcfg_virt, pci_mmcfg_config_num);
if (pci_mmcfg_virt == NULL) {
printk(KERN_ERR "PCI: Can not allocate memory for mmconfig
structures\n");
+ pci_mmcfg_config_num = 0;
return 0;
}
memset(pci_mmcfg_virt, 0, sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num);
@@ -149,34 +186,5 @@ int __init pci_mmcfg_arch_init(void)
++mmcfg_pci_segment_shift;
}
mmcfg_pci_segment_shift += 20;
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
- pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]);
- if (!pci_mmcfg_virt[i].virt) {
- printk(KERN_ERR "PCI: Cannot map mmconfig aperture for "
- "segment %d\n",
- pci_mmcfg_config[i].pci_segment);
- pci_mmcfg_arch_free();
- return 0;
- }
- }
return 1;
}
-
-void __init pci_mmcfg_arch_free(void)
-{
- int i;
-
- if (pci_mmcfg_virt == NULL)
- return;
-
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
- if (pci_mmcfg_virt[i].virt) {
- iounmap(pci_mmcfg_virt[i].virt);
- pci_mmcfg_virt[i].virt = NULL;
- pci_mmcfg_virt[i].cfg = NULL;
- }
- }
-
- xfree(pci_mmcfg_virt);
- pci_mmcfg_virt = NULL;
-}
--- a/xen/arch/x86/x86_64/physdev.c
+++ b/xen/arch/x86/x86_64/physdev.c
@@ -54,6 +54,10 @@
#define physdev_get_free_pirq compat_physdev_get_free_pirq
#define physdev_get_free_pirq_t physdev_get_free_pirq_compat_t
+#define xen_physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved
+CHECK_physdev_pci_mmcfg_reserved;
+#undef xen_physdev_pci_mmcfg_reserved
+
#define COMPAT
#undef guest_handle_okay
#define guest_handle_okay compat_handle_okay
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -255,6 +255,19 @@ struct physdev_get_free_pirq {
typedef struct physdev_get_free_pirq physdev_get_free_pirq_t;
DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t);
+#define XEN_PCI_MMCFG_RESERVED 0x1
+
+#define PHYSDEVOP_pci_mmcfg_reserved 24
+struct physdev_pci_mmcfg_reserved {
+ uint64_t address;
+ uint16_t segment;
+ uint8_t start_bus;
+ uint8_t end_bus;
+ uint32_t flags;
+};
+typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t);
+
/*
* Notify that some PIRQ-bound event channels have been unmasked.
* ** This command is obsolete since interface version 0x00030202 and is **
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -60,6 +60,7 @@
! memory_map memory.h
! memory_reservation memory.h
! pod_target memory.h
+? physdev_pci_mmcfg_reserved physdev.h
! sched_poll sched.h
? sched_remote_shutdown sched.h
? sched_shutdown sched.h
++++++ 23752-x86-shared-IRQ-vector-maps.patch ++++++
References: bnc#713503
# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1311701818 -3600
# Node ID ef9ed3d2aa870a37ed5e611be9c524d526a2d604
# Parent 590aadf7c46ae979da3552332f592f9492ce6d8b
xen: Infrastructure to allow irqs to share vector maps
Laying the groundwork for per-device vector maps. This generic
code allows any irq to point to a vector map; all irqs sharing the
same vector map will avoid sharing vectors.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1314026133 -3600
# Node ID 3a05da2dc7c0a5fc0fcfc40c535d1fcb71203625
# Parent d1cd78a73a79e0e648937322cdb8d92a7f86327a
x86: Fix up irq vector map logic
We need to make sure that cfg->used_vector is only cleared once;
otherwise there may be a race condition that allows the same vector to
be assigned twice, defeating the whole purpose of the map.
This makes two changes:
* __clear_irq_vector() only clears the vector if the irq is not being
moved
* smp_iqr_move_cleanup_interrupt() only clears used_vector if this
is the last place it's being used (move_cleanup_count==0 after
decrement).
Also make use of asserts more consistent, to catch this kind of logic
bug in the future.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -548,6 +548,13 @@ fastcall void smp_irq_move_cleanup_inter
}
__get_cpu_var(vector_irq)[vector] = -1;
cfg->move_cleanup_count--;
+
+ if ( cfg->move_cleanup_count == 0
+ && cfg->used_vectors )
+ {
+ ASSERT(test_bit(vector, cfg->used_vectors));
+ clear_bit(vector, cfg->used_vectors);
+ }
unlock:
spin_unlock(&desc->lock);
}
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -94,6 +94,11 @@ static int __init __bind_irq_vector(int
per_cpu(vector_irq, cpu)[vector] = irq;
cfg->vector = vector;
cfg->cpu_mask = online_mask;
+ if ( cfg->used_vectors )
+ {
+ ASSERT(!test_bit(vector, cfg->used_vectors));
+ set_bit(vector, cfg->used_vectors);
+ }
irq_status[irq] = IRQ_USED;
if (IO_APIC_IRQ(irq))
irq_vector[irq] = vector;
@@ -159,6 +164,7 @@ static void dynamic_irq_cleanup(unsigned
desc->depth = 1;
desc->msi_desc = NULL;
desc->handler = &no_irq_type;
+ desc->chip_data->used_vectors=NULL;
cpus_setall(desc->affinity);
spin_unlock_irqrestore(&desc->lock, flags);
@@ -191,6 +197,7 @@ static void __clear_irq_vector(int irq)
if (likely(!cfg->move_in_progress))
return;
+
cpus_and(tmp_mask, cfg->old_cpu_mask, cpu_online_map);
for_each_cpu_mask(cpu, tmp_mask) {
for (vector = FIRST_DYNAMIC_VECTOR; vector <= LAST_DYNAMIC_VECTOR;
@@ -202,6 +209,12 @@ static void __clear_irq_vector(int irq)
}
}
+ if ( cfg->used_vectors )
+ {
+ ASSERT(test_bit(vector, cfg->used_vectors));
+ clear_bit(vector, cfg->used_vectors);
+ }
+
cfg->move_in_progress = 0;
}
@@ -261,6 +274,7 @@ static void init_one_irq_cfg(struct irq_
cfg->vector = IRQ_VECTOR_UNASSIGNED;
cpus_clear(cfg->cpu_mask);
cpus_clear(cfg->old_cpu_mask);
+ cfg->used_vectors = NULL;
}
int init_irq_data(void)
@@ -387,6 +401,10 @@ next:
if (test_bit(vector, used_vectors))
goto next;
+ if (cfg->used_vectors
+ && test_bit(vector, cfg->used_vectors) )
+ goto next;
+
for_each_cpu_mask(new_cpu, tmp_mask)
if (per_cpu(vector_irq, new_cpu)[vector] != -1)
goto next;
@@ -402,6 +420,11 @@ next:
per_cpu(vector_irq, new_cpu)[vector] = irq;
cfg->vector = vector;
cpus_copy(cfg->cpu_mask, tmp_mask);
+ if ( cfg->used_vectors )
+ {
+ ASSERT(!test_bit(vector, cfg->used_vectors));
+ set_bit(vector, cfg->used_vectors);
+ }
irq_status[irq] = IRQ_USED;
if (IO_APIC_IRQ(irq))
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -23,11 +23,16 @@
#define irq_to_desc(irq) (&irq_desc[irq])
#define irq_cfg(irq) (&irq_cfg[irq])
+typedef struct {
+ DECLARE_BITMAP(_bits,NR_VECTORS);
+} vmask_t;
+
struct irq_cfg {
int vector;
cpumask_t cpu_mask;
cpumask_t old_cpu_mask;
unsigned move_cleanup_count;
+ vmask_t *used_vectors;
u8 move_in_progress : 1;
};
++++++ 23754-AMD-perdev-vector-map.patch ++++++
References: bnc#713503
# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1311701836 -3600
# Node ID 2e0cf9428554da666616982cd0074024ff85b221
# Parent ef9ed3d2aa870a37ed5e611be9c524d526a2d604
xen: Option to allow per-device vector maps for MSI IRQs
Add a vector-map to pci_dev, and add an option to point MSI-related
IRQs to the vector-map of the device.
This prevents irqs from the same device from being assigned
the same vector on different pcpus. This is required for systems
using an AMD IOMMU, since the intremap tables on AMD only look at
vector, and not destination ID.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1311701852 -3600
# Node ID fa4e2ca9ecffbc432b451f495ad0a403644a6be8
# Parent 2e0cf9428554da666616982cd0074024ff85b221
xen: AMD IOMMU: Automatically enable per-device vector maps
Automatically enable per-device vector maps when using IOMMU,
unless disabled specifically by an IOMMU parameter.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1315231215 -3600
# Node ID 32814ad7458dc842a7c588eee13e5c4ee11709a3
# Parent f1349a968a5ac5577d67ad4a3f3490c580dbe264
xen: Add global irq_vector_map option, set if using AMD global intremap tables
As mentioned in previous changesets, AMD IOMMU interrupt
remapping tables only look at the vector, not the destination
id of an interrupt. This means that all IRQs going through
the same interrupt remapping table need to *not* share vectors.
The irq "vector map" functionality was originally introduced
after a patch which disabled global AMD IOMMUs entirely. That
patch has since been reverted, meaning that AMD intremap tables
can either be per-device or global.
This patch therefore introduces a global irq vector map option,
and enables it if we're using an AMD IOMMU with a global
interrupt remapping table.
This patch removes the "irq-perdev-vector-map" boolean
command-line optino and replaces it with "irq_vector_map",
which can have one of three values: none, global, or per-device.
Setting the irq_vector_map to any value will override the
default that the AMD code sets.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1317730316 -7200
# Node ID a99d75671a911f9c0d5d11e0fe88a0a65863cb44
# Parent 3d1664cc9e458809e399320204aca8536e401ee1
AMD-IOMMU: remove dead variable references
These got orphaned up by recent changes.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/docs/src/user.tex
+++ b/docs/src/user.tex
@@ -4197,6 +4197,10 @@ writing to the VGA console after domain
\item [ vcpu\_migration\_delay=$<$minimum\_time$>$] Set minimum time of
vcpu migration in microseconds (default 0). This parameter avoids agressive
vcpu migration. For example, the linux kernel uses 0.5ms by default.
+\item [ irq_vector_map=xxx ] Enable irq vector non-sharing maps. Setting
'global'
+ will ensure that no IRQs will share vectors. Setting 'per-device' will
ensure
+ that no IRQs from the same device will share vectors. Setting to 'none' will
+ disable it entirely, overriding any defaults the IOMMU code may set.
\end{description}
In addition, the following options may be specified on the Xen command
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -24,6 +24,8 @@
#include <asm/mach-generic/mach_apic.h>
#include <public/physdev.h>
+static void parse_irq_vector_map_param(char *s);
+
/* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
bool_t __read_mostly opt_noirqbalance = 0;
boolean_param("noirqbalance", opt_noirqbalance);
@@ -32,6 +34,12 @@ unsigned int __read_mostly nr_irqs_gsi =
unsigned int __read_mostly nr_irqs;
integer_param("nr_irqs", nr_irqs);
+/* This default may be changed by the AMD IOMMU code */
+int __read_mostly opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_DEFAULT;
+custom_param("irq_vector_map", parse_irq_vector_map_param);
+
+vmask_t global_used_vector_map;
+
u8 __read_mostly *irq_vector;
struct irq_desc __read_mostly *irq_desc = NULL;
@@ -60,6 +68,26 @@ static struct timer irq_ratelimit_timer;
static unsigned int __read_mostly irq_ratelimit_threshold = 10000;
integer_param("irq_ratelimit", irq_ratelimit_threshold);
+static void __init parse_irq_vector_map_param(char *s)
+{
+ char *ss;
+
+ do {
+ ss = strchr(s, ',');
+ if ( ss )
+ *ss = '\0';
+
+ if ( !strcmp(s, "none"))
+ opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_NONE;
+ else if ( !strcmp(s, "global"))
+ opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_GLOBAL;
+ else if ( !strcmp(s, "per-device"))
+ opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_PERDEV;
+
+ s = ss + 1;
+ } while ( ss );
+}
+
/* Must be called when irq disabled */
void lock_vector_lock(void)
{
@@ -344,6 +372,41 @@ hw_irq_controller no_irq_type = {
end_none
};
+static vmask_t *irq_get_used_vector_mask(int irq)
+{
+ vmask_t *ret = NULL;
+
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_GLOBAL )
+ {
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ ret = &global_used_vector_map;
+
+ if ( desc->chip_data->used_vectors )
+ {
+ printk(XENLOG_INFO "%s: Strange, unassigned irq %d already has
used_vectors!\n",
+ __func__, irq);
+ }
+ else
+ {
+ int vector;
+
+ vector = irq_to_vector(irq);
+ if ( vector > 0 )
+ {
+ printk(XENLOG_INFO "%s: Strange, irq %d already assigned
vector %d!\n",
+ __func__, irq, vector);
+
+ ASSERT(!test_bit(vector, ret));
+
+ set_bit(vector, ret);
+ }
+ }
+ }
+
+ return ret;
+}
+
int __assign_irq_vector(int irq, struct irq_cfg *cfg, const cpumask_t *mask)
{
/*
@@ -362,6 +425,7 @@ int __assign_irq_vector(int irq, struct
int cpu, err;
unsigned long flags;
cpumask_t tmp_mask;
+ vmask_t *irq_used_vectors = NULL;
old_vector = irq_to_vector(irq);
if (old_vector) {
@@ -376,6 +440,17 @@ int __assign_irq_vector(int irq, struct
return -EAGAIN;
err = -ENOSPC;
+
+ /* This is the only place normal IRQs are ever marked
+ * as "in use". If they're not in use yet, check to see
+ * if we need to assign a global vector mask. */
+ if ( irq_status[irq] == IRQ_USED )
+ {
+ irq_used_vectors = cfg->used_vectors;
+ }
+ else
+ irq_used_vectors = irq_get_used_vector_mask(irq);
+
for_each_cpu_mask(cpu, *mask) {
int new_cpu;
int vector, offset;
@@ -401,8 +476,8 @@ next:
if (test_bit(vector, used_vectors))
goto next;
- if (cfg->used_vectors
- && test_bit(vector, cfg->used_vectors) )
+ if (irq_used_vectors
+ && test_bit(vector, irq_used_vectors) )
goto next;
for_each_cpu_mask(new_cpu, tmp_mask)
@@ -420,15 +495,22 @@ next:
per_cpu(vector_irq, new_cpu)[vector] = irq;
cfg->vector = vector;
cpus_copy(cfg->cpu_mask, tmp_mask);
+
+ irq_status[irq] = IRQ_USED;
+ ASSERT((cfg->used_vectors == NULL)
+ || (cfg->used_vectors == irq_used_vectors));
+ cfg->used_vectors = irq_used_vectors;
+
+ if (IO_APIC_IRQ(irq))
+ irq_vector[irq] = vector;
+
if ( cfg->used_vectors )
{
ASSERT(!test_bit(vector, cfg->used_vectors));
+
set_bit(vector, cfg->used_vectors);
}
- irq_status[irq] = IRQ_USED;
- if (IO_APIC_IRQ(irq))
- irq_vector[irq] = vector;
err = 0;
local_irq_restore(flags);
break;
@@ -1523,7 +1605,7 @@ int map_domain_pirq(
if ( !IS_PRIV(current->domain) &&
!(IS_PRIV_FOR(current->domain, d) &&
- irq_access_permitted(current->domain, pirq)))
+ irq_access_permitted(current->domain, pirq)))
return -EPERM;
if ( pirq < 0 || pirq >= d->nr_pirqs || irq < 0 || irq >= nr_irqs )
@@ -1571,8 +1653,22 @@ int map_domain_pirq(
if ( desc->handler != &no_irq_type )
dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
- d->domain_id, irq);
+ d->domain_id, irq);
desc->handler = &pci_msi_type;
+
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
+ && !desc->chip_data->used_vectors )
+ {
+ desc->chip_data->used_vectors = &pdev->info.used_vectors;
+ if ( desc->chip_data->vector != IRQ_VECTOR_UNASSIGNED )
+ {
+ int vector = desc->chip_data->vector;
+ ASSERT(!test_bit(vector, desc->chip_data->used_vectors));
+
+ set_bit(vector, desc->chip_data->used_vectors);
+ }
+ }
+
d->arch.pirq_irq[pirq] = irq;
d->arch.irq_pirq[irq] = pirq;
setup_msi_irq(pdev, msi_desc, irq);
@@ -1583,9 +1679,12 @@ int map_domain_pirq(
d->arch.pirq_irq[pirq] = irq;
d->arch.irq_pirq[irq] = pirq;
spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV )
+ printk(XENLOG_INFO "Per-device vector maps for GSIs not
implemented yet.\n");
}
- done:
+done:
return ret;
}
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -166,6 +166,35 @@ int __init amd_iov_detect(void)
return -ENODEV;
}
+ /*
+ * AMD IOMMUs don't distinguish between vectors destined for
+ * different cpus when doing interrupt remapping. This means
+ * that interrupts going through the same intremap table
+ * can't share the same vector.
+ *
+ * If irq_vector_map isn't specified, choose a sensible default:
+ * - If we're using per-device interemap tables, per-device
+ * vector non-sharing maps
+ * - If we're using a global interemap table, global vector
+ * non-sharing map
+ */
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT )
+ {
+ if ( amd_iommu_perdev_intremap )
+ {
+ printk("AMD-Vi: Enabling per-device vector maps\n");
+ opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV;
+ }
+ else
+ {
+ printk("AMD-Vi: Enabling global vector map\n");
+ opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
+ }
+ }
+ else
+ {
+ printk("AMD-Vi: Not overriding irq_vector_map setting\n");
+ }
return scan_pci_devices();
}
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -45,6 +45,13 @@ extern u8 *irq_vector;
extern bool_t opt_noirqbalance;
+#define OPT_IRQ_VECTOR_MAP_DEFAULT 0 /* Do the default thing */
+#define OPT_IRQ_VECTOR_MAP_NONE 1 /* None */
+#define OPT_IRQ_VECTOR_MAP_GLOBAL 2 /* One global vector map (no vector
sharing) */
+#define OPT_IRQ_VECTOR_MAP_PERDEV 3 /* Per-device vetor map (no vector
sharing w/in a device) */
+
+extern int opt_irq_vector_map;
+
/*
* Per-cpu current frame pointer - the location of the last exception frame on
* the stack
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -11,6 +11,7 @@
#include <xen/types.h>
#include <xen/list.h>
#include <xen/spinlock.h>
+#include <xen/irq.h>
/*
* The PCI interface treats multi-function devices as independent
@@ -38,6 +39,7 @@ struct pci_dev_info {
u8 bus;
u8 devfn;
} physfn;
+ vmask_t used_vectors;
};
struct pci_dev {
++++++ 23771-x86-ioapic-clear-pin.patch ++++++
References: bnc#701686
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1313503555 -3600
# Node ID fc2be6cb89ad49efd90fe1b650f7efaab72f61b2
# Parent 5c1ebc117f9901bc155d2b92ae902a4144767dfb
x86: simplify (and fix) clear_IO_APIC{,_pin}()
These are used during bootup and (emergency) shutdown only, and their
only purpose is to get the actual IO-APIC's RTE(s) cleared.
Consequently, only the "raw" accessors should be used (and the ones
going through interrupt remapping code can be skipped), with the
exception of determining the delivery mode: This one must always go
through the interrupt remapping path, as in the VT-d case the actual
IO-APIC's RTE will have the delivery mode always set to zero (which
before possibly could have resulted in such an entry getting cleared
in the "raw" pass, though I haven't observed this case in practice).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/io_apic.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/io_apic.c
+++ xen-4.1.2-testing/xen/arch/x86/io_apic.c
@@ -471,14 +471,12 @@ static void eoi_IO_APIC_irq(unsigned int
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-#define clear_IO_APIC_pin(a,p) __clear_IO_APIC_pin(a,p,0)
-#define clear_IO_APIC_pin_raw(a,p) __clear_IO_APIC_pin(a,p,1)
-static void __clear_IO_APIC_pin(unsigned int apic, unsigned int pin, int raw)
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
/* Check delivery_mode to be sure we're not clearing an SMI pin */
- entry = ioapic_read_entry(apic, pin, raw);
+ entry = __ioapic_read_entry(apic, pin, FALSE);
if (entry.delivery_mode == dest_SMI)
return;
@@ -487,7 +485,7 @@ static void __clear_IO_APIC_pin(unsigned
*/
memset(&entry, 0, sizeof(entry));
entry.mask = 1;
- ioapic_write_entry(apic, pin, raw, entry);
+ __ioapic_write_entry(apic, pin, TRUE, entry);
}
static void clear_IO_APIC (void)
@@ -495,10 +493,8 @@ static void clear_IO_APIC (void)
int apic, pin;
for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
clear_IO_APIC_pin(apic, pin);
- clear_IO_APIC_pin_raw(apic, pin);
- }
}
}
++++++ 23772-x86-trampoline.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1313744066 -3600
# Node ID 29aeed4979a78f26519f5fde8a405f8438297ab9
# Parent fc2be6cb89ad49efd90fe1b650f7efaab72f61b2
x86: make run-time part of trampoline relocatable
In order to eliminate an initial hack in the EFI boot code (where
memory for the trampoline was just "claimed" instead of properly
allocated), the trampoline code must no longer make assumption on the
address at which it would be located. For the time being, the fixed
address is being retained for the traditional multiboot path.
As an additional benefit (at least from my pov) it allows confining
the visibility of the BOOT_TRAMPOLINE definition to just the boot
code.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/boot/Makefile
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/boot/Makefile
+++ xen-4.1.2-testing/xen/arch/x86/boot/Makefile
@@ -2,8 +2,8 @@ obj-y += head.o
head.o: reloc.S
-BOOT_TRAMPOLINE := $(shell sed -n
's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p'
$(BASEDIR)/include/asm-x86/config.h)
+BOOT_TRAMPOLINE := $(shell sed -n
's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p' head.S)
%.S: %.c
RELOC=$(BOOT_TRAMPOLINE) $(MAKE) -f build32.mk $@
-reloc.S: $(BASEDIR)/include/asm-x86/config.h
+reloc.S: head.S
Index: xen-4.1.2-testing/xen/arch/x86/boot/head.S
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/boot/head.S
+++ xen-4.1.2-testing/xen/arch/x86/boot/head.S
@@ -9,7 +9,7 @@
.text
.code32
-#undef bootsym_phys
+#define BOOT_TRAMPOLINE 0x7c000
#define sym_phys(sym) ((sym) - __XEN_VIRT_START)
#define bootsym_phys(sym) ((sym) - trampoline_start + BOOT_TRAMPOLINE)
@@ -189,6 +189,17 @@ __start:
mov %edi,sym_phys(idle_pg_table_l2) + (__PAGE_OFFSET>>18)
#endif
+ /* Apply relocations to bootstrap trampoline. */
+ mov $BOOT_TRAMPOLINE,%edx
+ mov $sym_phys(__trampoline_rel_start),%edi
+ mov %edx,sym_phys(trampoline_phys)
+1:
+ mov (%edi),%eax
+ add %edx,(%edi,%eax)
+ add $4,%edi
+ cmp $sym_phys(__trampoline_rel_stop),%edi
+ jb 1b
+
/* Copy bootstrap trampoline to low memory, below 1MB. */
mov $sym_phys(trampoline_start),%esi
mov $bootsym_phys(trampoline_start),%edi
Index: xen-4.1.2-testing/xen/arch/x86/boot/trampoline.S
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/boot/trampoline.S
+++ xen-4.1.2-testing/xen/arch/x86/boot/trampoline.S
@@ -4,6 +4,13 @@
#undef bootsym
#define bootsym(s) ((s)-trampoline_start)
+#define bootsym_rel(sym, off, opnd...) \
+ bootsym(sym),##opnd; \
+111:; \
+ .pushsection .trampoline_rel, "a"; \
+ .long 111b - (off) - .; \
+ .popsection
+
.globl trampoline_realmode_entry
trampoline_realmode_entry:
mov %cs,%ax
@@ -17,11 +24,11 @@ trampoline_realmode_entry:
xor %ax, %ax
inc %ax
lmsw %ax # CR0.PE = 1 (enter protected mode)
- ljmpl $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
+ ljmpl $BOOT_CS32,$bootsym_rel(trampoline_protmode_entry,6)
idt_48: .word 0, 0, 0 # base = limit = 0
gdt_48: .word 6*8-1
- .long bootsym_phys(trampoline_gdt)
+ .long bootsym_rel(trampoline_gdt,4)
trampoline_gdt:
/* 0x0000: unused */
.quad 0x0000000000000000
@@ -32,11 +39,16 @@ trampoline_gdt:
/* 0x0018: ring 0 data */
.quad 0x00cf92000000ffff
/* 0x0020: real-mode code @ BOOT_TRAMPOLINE */
- .long 0x0000ffff | ((BOOT_TRAMPOLINE & 0x00ffff) << 16)
- .long 0x00009a00 | ((BOOT_TRAMPOLINE & 0xff0000) >> 16)
+ .long 0x0000ffff
+ .long 0x00009a00
/* 0x0028: real-mode data @ BOOT_TRAMPOLINE */
- .long 0x0000ffff | ((BOOT_TRAMPOLINE & 0x00ffff) << 16)
- .long 0x00009200 | ((BOOT_TRAMPOLINE & 0xff0000) >> 16)
+ .long 0x0000ffff
+ .long 0x00009200
+
+ .pushsection .trampoline_rel, "a"
+ .long trampoline_gdt + BOOT_PSEUDORM_CS + 2 - .
+ .long trampoline_gdt + BOOT_PSEUDORM_DS + 2 - .
+ .popsection
.globl cpuid_ext_features
cpuid_ext_features:
@@ -66,11 +78,11 @@ trampoline_protmode_entry:
/* Load pagetable base register. */
mov $sym_phys(idle_pg_table),%eax
- add bootsym_phys(trampoline_xen_phys_start),%eax
+ add bootsym_rel(trampoline_xen_phys_start,4,%eax)
mov %eax,%cr3
/* Set up EFER (Extended Feature Enable Register). */
- mov bootsym_phys(cpuid_ext_features),%edi
+ mov bootsym_rel(cpuid_ext_features,4,%edi)
test $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */
jz .Lskip_efer
movl $MSR_EFER,%ecx
@@ -93,7 +105,7 @@ trampoline_protmode_entry:
#if defined(__x86_64__)
/* Now in compatibility mode. Long-jump into 64-bit mode. */
- ljmp $BOOT_CS64,$bootsym_phys(start64)
+ ljmp $BOOT_CS64,$bootsym_rel(start64,6)
.code64
start64:
Index: xen-4.1.2-testing/xen/arch/x86/boot/wakeup.S
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/boot/wakeup.S
+++ xen-4.1.2-testing/xen/arch/x86/boot/wakeup.S
@@ -42,15 +42,13 @@ ENTRY(wakeup_start)
# boot trampoline is under 1M, and shift its start into
# %fs to reference symbols in that area
- movl $BOOT_TRAMPOLINE, %eax
- shrl $4, %eax
- movl %eax, %fs
+ mov wakesym(trampoline_seg), %fs
lidt %fs:bootsym(idt_48)
lgdt %fs:bootsym(gdt_48)
movw $1, %ax
lmsw %ax # Turn on CR0.PE
- ljmpl $BOOT_CS32, $bootsym_phys(wakeup_32)
+ ljmpl $BOOT_CS32, $bootsym_rel(wakeup_32, 6)
/* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
@@ -103,6 +101,10 @@ real_magic: .long 0x12345678
.globl video_mode, video_flags
video_mode: .long 0
video_flags: .long 0
+trampoline_seg: .word BOOT_TRAMPOLINE >> 4
+ .pushsection .trampoline_seg, "a"
+ .long trampoline_seg - .
+ .popsection
.code32
@@ -114,11 +116,11 @@ wakeup_32:
mov $BOOT_DS, %eax
mov %eax, %ds
mov %eax, %ss
- mov $bootsym_phys(early_stack), %esp
+ mov $bootsym_rel(early_stack, 4, %esp)
# check saved magic again
mov $sym_phys(saved_magic), %eax
- add bootsym_phys(trampoline_xen_phys_start), %eax
+ add bootsym_rel(trampoline_xen_phys_start, 4, %eax)
mov (%eax), %eax
cmp $0x9abcdef0, %eax
jne bogus_saved_magic
@@ -131,12 +133,12 @@ wakeup_32:
/* Load pagetable base register */
mov $sym_phys(idle_pg_table),%eax
- add bootsym_phys(trampoline_xen_phys_start),%eax
+ add bootsym_rel(trampoline_xen_phys_start,4,%eax)
mov %eax,%cr3
/* Will cpuid feature change after resume? */
/* Set up EFER (Extended Feature Enable Register). */
- mov bootsym_phys(cpuid_ext_features),%edi
+ mov bootsym_rel(cpuid_ext_features,4,%edi)
test $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */
jz .Lskip_eferw
movl $MSR_EFER,%ecx
@@ -162,7 +164,7 @@ wakeup_32:
#if defined(__x86_64__)
/* Now in compatibility mode. Long-jump to 64-bit mode */
- ljmp $BOOT_CS64, $bootsym_phys(wakeup_64)
+ ljmp $BOOT_CS64, $bootsym_rel(wakeup_64,6)
.code64
wakeup_64:
Index: xen-4.1.2-testing/xen/arch/x86/efi/boot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/efi/boot.c
+++ xen-4.1.2-testing/xen/arch/x86/efi/boot.c
@@ -599,6 +599,9 @@ static void __init relocate_image(unsign
}
}
+extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[];
+extern const s32 __trampoline_seg_start[], __trampoline_seg_stop[];
+
void EFIAPI __init __attribute__((__noreturn__))
efi_start(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
{
@@ -614,9 +617,10 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info;
EFI_FILE_HANDLE dir_handle;
union string section = { NULL }, name;
+ const s32 *trampoline_ptr;
struct e820entry *e;
u64 efer;
- bool_t base_video = 0, trampoline_okay = 0;
+ bool_t base_video = 0;
efi_ih = ImageHandle;
efi_bs = SystemTable->BootServices;
@@ -914,15 +918,27 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
dmi_efi_get_table((void *)(long)efi.smbios);
/* Allocate space for trampoline (in first Mb). */
- cfg.addr = BOOT_TRAMPOLINE;
+ cfg.addr = 0x100000;
cfg.size = trampoline_end - trampoline_start;
- status = efi_bs->AllocatePages(AllocateAddress, EfiLoaderData,
+ status = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData,
PFN_UP(cfg.size), &cfg.addr);
if ( EFI_ERROR(status) )
{
cfg.addr = 0;
- PrintErr(L"Note: Trampoline area is in use\r\n");
+ blexit(L"No memory for trampoline\r\n");
}
+ trampoline_phys = cfg.addr;
+ /* Apply relocations to trampoline. */
+ for ( trampoline_ptr = __trampoline_rel_start;
+ trampoline_ptr < __trampoline_rel_stop;
+ ++trampoline_ptr )
+ *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) +=
+ trampoline_phys;
+ for ( trampoline_ptr = __trampoline_seg_start;
+ trampoline_ptr < __trampoline_seg_stop;
+ ++trampoline_ptr )
+ *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) =
+ trampoline_phys >> 4;
/* Initialise L2 identity-map and xen page table entries (16MB). */
for ( i = 0; i < 8; ++i )
@@ -1096,14 +1112,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
e->type = type;
++e820nr;
}
- if ( type == E820_RAM && e->addr <= BOOT_TRAMPOLINE &&
- e->addr + e->size >= BOOT_TRAMPOLINE + cfg.size )
- trampoline_okay = 1;
}
- if ( !trampoline_okay )
- blexit(L"Trampoline area unavailable\r\n");
-
status = efi_bs->ExitBootServices(ImageHandle, map_key);
if ( EFI_ERROR(status) )
PrintErrMesg(L"Cannot exit boot services", status);
@@ -1117,7 +1127,7 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
efi_fw_vendor = (void *)efi_fw_vendor + DIRECTMAP_VIRT_START;
relocate_image(__XEN_VIRT_START - xen_phys_start);
- memcpy((void *)(long)BOOT_TRAMPOLINE, trampoline_start, cfg.size);
+ memcpy((void *)trampoline_phys, trampoline_start, cfg.size);
/* Set system registers and transfer control. */
asm volatile("pushq $0\n\tpopfq");
Index: xen-4.1.2-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/smpboot.c
+++ xen-4.1.2-testing/xen/arch/x86/smpboot.c
@@ -48,6 +48,8 @@
#define setup_trampoline() (bootsym_phys(trampoline_realmode_entry))
+unsigned long __read_mostly trampoline_phys;
+
/* Set if we find a B stepping CPU */
static int smp_b_stepping;
Index: xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_32/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_32/mm.c
@@ -22,6 +22,7 @@
#include <xen/lib.h>
#include <xen/init.h>
#include <xen/mm.h>
+#include <xen/pfn.h>
#include <xen/sched.h>
#include <xen/guest_access.h>
#include <asm/current.h>
@@ -166,8 +167,9 @@ void __init zap_low_mappings(l2_pgentry_
flush_all(FLUSH_TLB_GLOBAL);
/* Replace with mapping of the boot trampoline only. */
- map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
- 0x10, __PAGE_HYPERVISOR);
+ map_pages_to_xen(trampoline_phys, trampoline_phys >> PAGE_SHIFT,
+ PFN_UP(trampoline_end - trampoline_start),
+ __PAGE_HYPERVISOR);
}
void __init subarch_init_memory(void)
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mm.c
@@ -830,7 +830,7 @@ void __init zap_low_mappings(void)
flush_local(FLUSH_TLB_GLOBAL);
/* Replace with mapping of the boot trampoline only. */
- map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
+ map_pages_to_xen(trampoline_phys, trampoline_phys >> PAGE_SHIFT,
PFN_UP(trampoline_end - trampoline_start),
__PAGE_HYPERVISOR);
}
Index: xen-4.1.2-testing/xen/arch/x86/xen.lds.S
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/xen.lds.S
+++ xen-4.1.2-testing/xen/arch/x86/xen.lds.S
@@ -103,6 +103,13 @@ SECTIONS
*(.init.data)
*(.init.data.rel)
*(.init.data.rel.*)
+ . = ALIGN(4);
+ __trampoline_rel_start = .;
+ *(.trampoline_rel)
+ __trampoline_rel_stop = .;
+ __trampoline_seg_start = .;
+ *(.trampoline_seg)
+ __trampoline_seg_stop = .;
} :text
. = ALIGN(32);
.init.setup : {
Index: xen-4.1.2-testing/xen/include/asm-x86/config.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/config.h
+++ xen-4.1.2-testing/xen/include/asm-x86/config.h
@@ -95,13 +95,13 @@
/* Primary stack is restricted to 8kB by guard pages. */
#define PRIMARY_STACK_SIZE 8192
-#define BOOT_TRAMPOLINE 0x7c000
+#ifndef __ASSEMBLY__
+extern unsigned long trampoline_phys;
#define bootsym_phys(sym) \
- (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+BOOT_TRAMPOLINE)
+ (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+trampoline_phys)
#define bootsym(sym) \
(*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))), \
- BOOT_TRAMPOLINE-__pa(trampoline_start)))
-#ifndef __ASSEMBLY__
+ trampoline_phys-__pa(trampoline_start)))
extern char trampoline_start[], trampoline_end[];
extern char trampoline_realmode_entry[];
extern unsigned int trampoline_xen_phys_start;
++++++ 23774-x86_64-EFI-EDD.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1313744120 -3600
# Node ID e35c5202625ef5534561f84352833ad9467d986c
# Parent dd90b59cb11c60c48e174c899190e2967341fe32
x86-64/EFI: construct EDD data from device path protocol information
In the absence of a BIOS to handle INT13 requests, this information
must be constructed artificially instead when booted from EFI.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/boot/edd.S
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/boot/edd.S
+++ xen-4.1.2-testing/xen/arch/x86/boot/edd.S
@@ -16,21 +16,13 @@
* Updated and ported for Xen by Keir Fraser <keir@xxxxxxxxxxxxx> June 2007
*/
+#include <asm/edd.h>
+
.code16
/* Offset of disc signature in the MBR. */
#define EDD_MBR_SIG_OFFSET 0x1B8
-/* Maximum number of EDD information structures at boot_edd_info. */
-#define EDD_INFO_MAX 6
-
-/* Maximum number of MBR signatures at boot_mbr_signature. */
-#define EDD_MBR_SIG_MAX 16
-
-/* Size of components of EDD information structure. */
-#define EDDEXTSIZE 8
-#define EDDPARMSIZE 74
-
get_edd:
cmpb $2, bootsym(opt_edd) # edd=off ?
je edd_done
Index: xen-4.1.2-testing/xen/arch/x86/efi/boot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/efi/boot.c
+++ xen-4.1.2-testing/xen/arch/x86/efi/boot.c
@@ -16,6 +16,7 @@
#include <xen/stringify.h>
#include <xen/vga.h>
#include <asm/e820.h>
+#include <asm/edd.h>
#include <asm/mm.h>
#include <asm/msr.h>
#include <asm/processor.h>
@@ -539,6 +540,18 @@ static void __init split_value(char *s)
*s = 0;
}
+static void __init edd_put_string(u8 *dst, size_t n, const char *src)
+{
+ while ( n-- && *src )
+ *dst++ = *src++;
+ if ( *src )
+ PrintErrMesg(L"Internal error populating EDD info",
+ EFI_BUFFER_TOO_SMALL);
+ while ( n-- )
+ *dst++ = ' ';
+}
+#define edd_put_string(d, s) edd_put_string(d, ARRAY_SIZE(d), s)
+
static int __init set_color(u32 mask, int bpp, u8 *pos, u8 *sz)
{
if ( bpp < 0 )
@@ -607,6 +620,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
{
static EFI_GUID __initdata loaded_image_guid = LOADED_IMAGE_PROTOCOL;
static EFI_GUID __initdata gop_guid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+ static EFI_GUID __initdata bio_guid = BLOCK_IO_PROTOCOL;
+ static EFI_GUID __initdata devp_guid = DEVICE_PATH_PROTOCOL;
EFI_LOADED_IMAGE *loaded_image;
EFI_STATUS status;
unsigned int i, argc;
@@ -887,7 +902,148 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
place_string(&mbi.mem_upper, NULL);
- /* XXX Collect EDD info. */
+ /* Collect EDD info. */
+ BUILD_BUG_ON(offsetof(struct edd_info, edd_device_params) != EDDEXTSIZE);
+ BUILD_BUG_ON(sizeof(struct edd_device_params) != EDDPARMSIZE);
+ size = 0;
+ status = efi_bs->LocateHandle(ByProtocol, &bio_guid, NULL, &size, NULL);
+ if ( status == EFI_BUFFER_TOO_SMALL )
+ status = efi_bs->AllocatePool(EfiLoaderData, size, (void **)&handles);
+ if ( !EFI_ERROR(status) )
+ status = efi_bs->LocateHandle(ByProtocol, &bio_guid, NULL, &size,
+ handles);
+ if ( EFI_ERROR(status) )
+ size = 0;
+ for ( i = 0; i < size / sizeof(*handles); ++i )
+ {
+ EFI_BLOCK_IO *bio;
+ EFI_DEV_PATH_PTR devp;
+ struct edd_info *info = boot_edd_info + boot_edd_info_nr;
+ struct edd_device_params *params = &info->edd_device_params;
+ enum { root, acpi, pci, ctrlr } state = root;
+
+ status = efi_bs->HandleProtocol(handles[i], &bio_guid, (void **)&bio);
+ if ( EFI_ERROR(status) ||
+ bio->Media->RemovableMedia ||
+ bio->Media->LogicalPartition )
+ continue;
+ if ( boot_edd_info_nr < EDD_INFO_MAX )
+ {
+ info->device = 0x80 + boot_edd_info_nr; /* fake */
+ info->version = 0x11;
+ params->length = offsetof(struct edd_device_params, dpte_ptr);
+ params->number_of_sectors = bio->Media->LastBlock + 1;
+ params->bytes_per_sector = bio->Media->BlockSize;
+ params->dpte_ptr = ~0;
+ }
+ ++boot_edd_info_nr;
+ status = efi_bs->HandleProtocol(handles[i], &devp_guid,
+ (void **)&devp);
+ if ( EFI_ERROR(status) )
+ continue;
+ for ( ; !IsDevicePathEnd(devp.DevPath);
+ devp.DevPath = NextDevicePathNode(devp.DevPath) )
+ {
+ switch ( DevicePathType(devp.DevPath) )
+ {
+ const u8 *p;
+
+ case ACPI_DEVICE_PATH:
+ if ( state != root || boot_edd_info_nr > EDD_INFO_MAX )
+ break;
+ switch ( DevicePathSubType(devp.DevPath) )
+ {
+ case ACPI_DP:
+ if ( devp.Acpi->HID != EISA_PNP_ID(0xA03) &&
+ devp.Acpi->HID != EISA_PNP_ID(0xA08) )
+ break;
+ params->interface_path.pci.bus = devp.Acpi->UID;
+ state = acpi;
+ break;
+ case EXPANDED_ACPI_DP:
+ /* XXX */
+ break;
+ }
+ break;
+ case HARDWARE_DEVICE_PATH:
+ if ( state != acpi ||
+ DevicePathSubType(devp.DevPath) != HW_PCI_DP ||
+ boot_edd_info_nr > EDD_INFO_MAX )
+ break;
+ state = pci;
+ edd_put_string(params->host_bus_type, "PCI");
+ params->interface_path.pci.slot = devp.Pci->Device;
+ params->interface_path.pci.function = devp.Pci->Function;
+ break;
+ case MESSAGING_DEVICE_PATH:
+ if ( state != pci || boot_edd_info_nr > EDD_INFO_MAX )
+ break;
+ state = ctrlr;
+ switch ( DevicePathSubType(devp.DevPath) )
+ {
+ case MSG_ATAPI_DP:
+ edd_put_string(params->interface_type, "ATAPI");
+ params->interface_path.pci.channel =
+ devp.Atapi->PrimarySecondary;
+ params->device_path.atapi.device = devp.Atapi->SlaveMaster;
+ params->device_path.atapi.lun = devp.Atapi->Lun;
+ break;
+ case MSG_SCSI_DP:
+ edd_put_string(params->interface_type, "SCSI");
+ params->device_path.scsi.id = devp.Scsi->Pun;
+ params->device_path.scsi.lun = devp.Scsi->Lun;
+ break;
+ case MSG_FIBRECHANNEL_DP:
+ edd_put_string(params->interface_type, "FIBRE");
+ params->device_path.fibre.wwid = devp.FibreChannel->WWN;
+ params->device_path.fibre.lun = devp.FibreChannel->Lun;
+ break;
+ case MSG_1394_DP:
+ edd_put_string(params->interface_type, "1394");
+ params->device_path.i1394.eui = devp.F1394->Guid;
+ break;
+ case MSG_USB_DP:
+ case MSG_USB_CLASS_DP:
+ edd_put_string(params->interface_type, "USB");
+ break;
+ case MSG_I2O_DP:
+ edd_put_string(params->interface_type, "I2O");
+ params->device_path.i2o.identity_tag = devp.I2O->Tid;
+ break;
+ default:
+ continue;
+ }
+ info->version = 0x30;
+ params->length = sizeof(struct edd_device_params);
+ params->key = 0xbedd;
+ params->device_path_info_length =
+ sizeof(struct edd_device_params) -
+ offsetof(struct edd_device_params, key);
+ for ( p = (const u8 *)¶ms->key; p < ¶ms->checksum; ++p
)
+ params->checksum -= *p;
+ break;
+ case MEDIA_DEVICE_PATH:
+ if ( DevicePathSubType(devp.DevPath) == MEDIA_HARDDRIVE_DP &&
+ devp.HardDrive->MBRType == MBR_TYPE_PCAT &&
+ boot_mbr_signature_nr < EDD_MBR_SIG_MAX )
+ {
+ struct mbr_signature *sig = boot_mbr_signature +
+ boot_mbr_signature_nr;
+
+ sig->device = 0x80 + boot_edd_info_nr; /* fake */
+ memcpy(&sig->signature, devp.HardDrive->Signature,
+ sizeof(sig->signature));
+ ++boot_mbr_signature_nr;
+ }
+ break;
+ }
+ }
+ }
+ if ( handles )
+ efi_bs->FreePool(handles);
+ if ( boot_edd_info_nr > EDD_INFO_MAX )
+ boot_edd_info_nr = EDD_INFO_MAX;
+
/* XXX Collect EDID info. */
if ( cpuid_eax(0x80000000) > 0x80000000 )
Index: xen-4.1.2-testing/xen/include/asm-x86/edd.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/edd.h
+++ xen-4.1.2-testing/xen/include/asm-x86/edd.h
@@ -23,6 +23,8 @@
#ifndef __XEN_EDD_H__
#define __XEN_EDD_H__
+#ifndef __ASSEMBLY__
+
struct edd_info {
/* Int13, Fn48: Check Extensions Present. */
u8 device; /* %dl: device */
@@ -33,10 +35,106 @@ struct edd_info {
u8 legacy_max_head; /* %dh: maximum head number */
u8 legacy_sectors_per_track; /* %cl[5:0]: maximum sector number */
/* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
- struct {
+ struct edd_device_params {
u16 length;
- u8 data[72];
- } edd_device_params;
+ u16 info_flags;
+ u32 num_default_cylinders;
+ u32 num_default_heads;
+ u32 sectors_per_track;
+ u64 number_of_sectors;
+ u16 bytes_per_sector;
+ u32 dpte_ptr; /* 0xFFFFFFFF for our purposes */
+ u16 key; /* = 0xBEDD */
+ u8 device_path_info_length;
+ u8 reserved2;
+ u16 reserved3;
+ u8 host_bus_type[4];
+ u8 interface_type[8];
+ union {
+ struct {
+ u16 base_address;
+ u16 reserved1;
+ u32 reserved2;
+ } __attribute__ ((packed)) isa;
+ struct {
+ u8 bus;
+ u8 slot;
+ u8 function;
+ u8 channel;
+ u32 reserved;
+ } __attribute__ ((packed)) pci;
+ /* pcix is same as pci */
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) ibnd;
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) xprs;
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) htpt;
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) unknown;
+ } interface_path;
+ union {
+ struct {
+ u8 device;
+ u8 reserved1;
+ u16 reserved2;
+ u32 reserved3;
+ u64 reserved4;
+ } __attribute__ ((packed)) ata;
+ struct {
+ u8 device;
+ u8 lun;
+ u8 reserved1;
+ u8 reserved2;
+ u32 reserved3;
+ u64 reserved4;
+ } __attribute__ ((packed)) atapi;
+ struct {
+ u16 id;
+ u64 lun;
+ u16 reserved1;
+ u32 reserved2;
+ } __attribute__ ((packed)) scsi;
+ struct {
+ u64 serial_number;
+ u64 reserved;
+ } __attribute__ ((packed)) usb;
+ struct {
+ u64 eui;
+ u64 reserved;
+ } __attribute__ ((packed)) i1394;
+ struct {
+ u64 wwid;
+ u64 lun;
+ } __attribute__ ((packed)) fibre;
+ struct {
+ u64 identity_tag;
+ u64 reserved;
+ } __attribute__ ((packed)) i2o;
+ struct {
+ u32 array_number;
+ u32 reserved1;
+ u64 reserved2;
+ } __attribute__ ((packed)) raid;
+ struct {
+ u8 device;
+ u8 reserved1;
+ u16 reserved2;
+ u32 reserved3;
+ u64 reserved4;
+ } __attribute__ ((packed)) sata;
+ struct {
+ u64 reserved1;
+ u64 reserved2;
+ } __attribute__ ((packed)) unknown;
+ } device_path;
+ u8 reserved4;
+ u8 checksum;
+ } __attribute__ ((packed)) edd_device_params;
} __attribute__ ((packed));
struct mbr_signature {
@@ -51,4 +149,16 @@ extern u8 boot_mbr_signature_nr;
extern struct edd_info boot_edd_info[];
extern u8 boot_edd_info_nr;
+#endif /* __ASSEMBLY__ */
+
+/* Maximum number of EDD information structures at boot_edd_info. */
+#define EDD_INFO_MAX 6
+
+/* Maximum number of MBR signatures at boot_mbr_signature. */
+#define EDD_MBR_SIG_MAX 16
+
+/* Size of components of EDD information structure. */
+#define EDDEXTSIZE 8
+#define EDDPARMSIZE 74
+
#endif /* __XEN_EDD_H__ */
++++++ 23781-pm-wide-ACPI-ids.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1314004239 -3600
# Node ID 0849b0e59e2418e8215616df147f955b01b07577
# Parent 07f78b5bd03c02e32324eaa00487643d27b7ffa8
pm: don't truncate processors' ACPI IDs to 8 bits
This is just another adjustment to allow systems with very many CPUs
(or unusual ACPI IDs) to be properly power-managed.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/ia64/linux-xen/acpi.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/ia64/linux-xen/acpi.c
+++ xen-4.1.2-testing/xen/arch/ia64/linux-xen/acpi.c
@@ -223,11 +223,14 @@ static u16 ia64_acpiid_to_sapicid[ MAX_L
{[0 ... MAX_LOCAL_SAPIC - 1] = 0xffff };
/* acpi id to cpu id */
-int get_cpu_id(u8 acpi_id)
+int get_cpu_id(u32 acpi_id)
{
int i;
u16 apic_id;
+ if ( acpi_id >= MAX_LOCAL_SAPIC )
+ return -EINVAL;
+
apic_id = ia64_acpiid_to_sapicid[acpi_id];
if ( apic_id == 0xffff )
return -EINVAL;
Index: xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/acpi/cpu_idle.c
+++ xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
@@ -901,11 +901,14 @@ static void set_cx(
acpi_power->safe_state = cx;
}
-int get_cpu_id(u8 acpi_id)
+int get_cpu_id(u32 acpi_id)
{
int i;
u32 apic_id;
+ if ( acpi_id >= MAX_MADT_ENTRIES )
+ return -1;
+
apic_id = x86_acpiid_to_apicid[acpi_id];
if ( apic_id == BAD_APICID )
return -1;
@@ -982,7 +985,7 @@ long set_cx_pminfo(uint32_t cpu, struct
print_cx_pminfo(cpu, power);
/* map from acpi_id to cpu_id */
- cpu_id = get_cpu_id((u8)cpu);
+ cpu_id = get_cpu_id(cpu);
if ( cpu_id == -1 )
{
printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu);
Index: xen-4.1.2-testing/xen/include/acpi/cpufreq/processor_perf.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/acpi/cpufreq/processor_perf.h
+++ xen-4.1.2-testing/xen/include/acpi/cpufreq/processor_perf.h
@@ -6,7 +6,7 @@
#define XEN_PX_INIT 0x80000000
-int get_cpu_id(u8);
+int get_cpu_id(u32);
int powernow_cpufreq_init(void);
unsigned int powernow_register_driver(void);
unsigned int get_measured_perf(unsigned int cpu, unsigned int flag);
++++++ 23782-x86-ioapic-clear-irr.patch ++++++
References: bnc#701686
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1314004270 -3600
# Node ID 25dfe53bb1898b3967ceb71a7eb60a8b760c25fb
# Parent 0849b0e59e2418e8215616df147f955b01b07577
x86/IO-APIC: clear remoteIRR in clear_IO_APIC_pin()
It was found that in a crash scenario, the remoteIRR bit in an IO-APIC
RTE could be left set, causing problems when bringing up a kdump
kernel. While this generally is most important to be taken care of in
the new kernel (which usually would be a native one), it still seems
desirable to also address this problem in Xen so that (a) the problem
doesn't bite Xen when used as a secondary emergency kernel and (b) an
attempt is being made to save un-fixed secondary kernels from running
into said problem.
Based on a Linux patch from suresh.b.siddha@xxxxxxxxx.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -481,11 +481,35 @@ static void clear_IO_APIC_pin(unsigned i
return;
/*
+ * Make sure the entry is masked and re-read the contents to check
+ * if it is a level triggered pin and if the remoteIRR is set.
+ */
+ if (!entry.mask) {
+ entry.mask = 1;
+ __ioapic_write_entry(apic, pin, FALSE, entry);
+ }
+ entry = __ioapic_read_entry(apic, pin, TRUE);
+
+ if (entry.irr) {
+ /* Make sure the trigger mode is set to level. */
+ if (!entry.trigger) {
+ entry.trigger = 1;
+ __ioapic_write_entry(apic, pin, TRUE, entry);
+ }
+ __io_apic_eoi(apic, entry.vector, pin);
+ }
+
+ /*
* Disable it in the IO-APIC irq-routing table:
*/
memset(&entry, 0, sizeof(entry));
entry.mask = 1;
__ioapic_write_entry(apic, pin, TRUE, entry);
+
+ entry = __ioapic_read_entry(apic, pin, TRUE);
+ if (entry.irr)
+ printk(KERN_ERR "IO-APIC%02x-%u: Unable to reset IRR\n",
+ IO_APIC_ID(apic), pin);
}
static void clear_IO_APIC (void)
++++++ 23783-ACPI-set-_PDC-bits.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1314004356 -3600
# Node ID 2029263c501c315fa4d94845e5cfa6a9b0b395d5
# Parent 25dfe53bb1898b3967ceb71a7eb60a8b760c25fb
ACPI: add _PDC input override mechanism
In order to have Dom0 call _PDC with input fully representing Xen's
capabilities, and in order to avoid building knowledge of Xen
implementation details into Dom0, this provides a mechanism by which
the Dom0 kernel can, once it filled the _PDC input buffer according to
its own knowledge, present the buffer to Xen to apply overrides for
the parts of the C-, P-, and T-state management that it controls. This
is particularly to address the dependency of Xen using MWAIT to enter
certain C-states on the availability of the break-on-interrupt
extension (which the Dom0 kernel should have no need to know about).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/ia64/linux-xen/acpi.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/ia64/linux-xen/acpi.c
+++ xen-4.1.2-testing/xen/arch/ia64/linux-xen/acpi.c
@@ -243,6 +243,13 @@ int get_cpu_id(u32 acpi_id)
return -1;
}
+
+int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *pdc, u32 mask)
+{
+ pdc[2] |= ACPI_PDC_EST_CAPABILITY_SMP & mask;
+ return 0;
+}
+
#endif
static int __init
Index: xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/acpi/cpu_idle.c
+++ xen-4.1.2-testing/xen/arch/x86/acpi/cpu_idle.c
@@ -649,12 +649,6 @@ static int cpuidle_init_cpu(int cpu)
return 0;
}
-#define CPUID_MWAIT_LEAF (5)
-#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
-#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
-
-#define MWAIT_ECX_INTERRUPT_BREAK (0x1)
-
#define MWAIT_SUBSTATE_MASK (0xf)
#define MWAIT_SUBSTATE_SIZE (4)
Index: xen-4.1.2-testing/xen/arch/x86/acpi/boot.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/acpi/boot.c
+++ xen-4.1.2-testing/xen/arch/x86/acpi/boot.c
@@ -1006,3 +1006,47 @@ unsigned int acpi_get_processor_id(unsig
return INVALID_ACPIID;
}
+
+static void get_mwait_ecx(void *info)
+{
+ *(u32 *)info = cpuid_ecx(CPUID_MWAIT_LEAF);
+}
+
+int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *pdc, u32 mask)
+{
+ unsigned int cpu = get_cpu_id(acpi_id);
+ struct cpuinfo_x86 *c;
+ u32 ecx;
+
+ if (!(acpi_id + 1))
+ c = &boot_cpu_data;
+ else if (cpu >= NR_CPUS || !cpu_online(cpu))
+ return -EINVAL;
+ else
+ c = cpu_data + cpu;
+
+ pdc[2] |= ACPI_PDC_C_CAPABILITY_SMP & mask;
+
+ if (cpu_has(c, X86_FEATURE_EST))
+ pdc[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP & mask;
+
+ if (cpu_has(c, X86_FEATURE_ACPI))
+ pdc[2] |= ACPI_PDC_T_FFH & mask;
+
+ /*
+ * If mwait/monitor or its break-on-interrupt extension are
+ * unsupported, Cx_FFH will be disabled.
+ */
+ if (!cpu_has(c, X86_FEATURE_MWAIT) ||
+ c->cpuid_level < CPUID_MWAIT_LEAF)
+ ecx = 0;
+ else if (c == &boot_cpu_data || cpu == smp_processor_id())
+ ecx = cpuid_ecx(CPUID_MWAIT_LEAF);
+ else
+ on_selected_cpus(cpumask_of(cpu), get_mwait_ecx, &ecx, 1);
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+ !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+ pdc[2] &= ~(ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH);
+
+ return 0;
+}
Index: xen-4.1.2-testing/xen/arch/x86/platform_hypercall.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/platform_hypercall.c
+++ xen-4.1.2-testing/xen/arch/x86/platform_hypercall.c
@@ -419,6 +419,15 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
ret = -EINVAL;
break;
+ case XEN_PM_PDC:
+ {
+ XEN_GUEST_HANDLE(uint32) pdc;
+
+ guest_from_compat_handle(pdc, op->u.set_pminfo.u.pdc);
+ ret = acpi_set_pdc_bits(op->u.set_pminfo.id, pdc);
+ }
+ break;
+
default:
ret = -EINVAL;
break;
Index: xen-4.1.2-testing/xen/drivers/acpi/pmstat.c
===================================================================
--- xen-4.1.2-testing.orig/xen/drivers/acpi/pmstat.c
+++ xen-4.1.2-testing/xen/drivers/acpi/pmstat.c
@@ -519,3 +519,34 @@ int do_pm_op(struct xen_sysctl_pm_op *op
return ret;
}
+
+int acpi_set_pdc_bits(u32 acpi_id, XEN_GUEST_HANDLE(uint32) pdc)
+{
+ u32 bits[3];
+ int ret;
+
+ if ( copy_from_guest(bits, pdc, 2) )
+ ret = -EFAULT;
+ else if ( bits[0] != ACPI_PDC_REVISION_ID || !bits[1] )
+ ret = -EINVAL;
+ else if ( copy_from_guest_offset(bits + 2, pdc, 2, 1) )
+ ret = -EFAULT;
+ else
+ {
+ u32 mask = 0;
+
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_CX )
+ mask |= ACPI_PDC_C_MASK | ACPI_PDC_SMP_C1PT;
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_PX )
+ mask |= ACPI_PDC_P_MASK | ACPI_PDC_SMP_C1PT;
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_TX )
+ mask |= ACPI_PDC_T_MASK | ACPI_PDC_SMP_C1PT;
+ bits[2] &= (ACPI_PDC_C_MASK | ACPI_PDC_P_MASK | ACPI_PDC_T_MASK |
+ ACPI_PDC_SMP_C1PT) & ~mask;
+ ret = arch_acpi_set_pdc_bits(acpi_id, bits, mask);
+ }
+ if ( !ret )
+ ret = copy_to_guest_offset(pdc, 2, bits + 2, 1);
+
+ return ret;
+}
Index: xen-4.1.2-testing/xen/include/acpi/cpufreq/processor_perf.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/acpi/cpufreq/processor_perf.h
+++ xen-4.1.2-testing/xen/include/acpi/cpufreq/processor_perf.h
@@ -3,10 +3,10 @@
#include <public/platform.h>
#include <public/sysctl.h>
+#include <xen/acpi.h>
#define XEN_PX_INIT 0x80000000
-int get_cpu_id(u32);
int powernow_cpufreq_init(void);
unsigned int powernow_register_driver(void);
unsigned int get_measured_perf(unsigned int cpu, unsigned int flag);
Index: xen-4.1.2-testing/xen/include/acpi/pdc_intel.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/acpi/pdc_intel.h
+++ xen-4.1.2-testing/xen/include/acpi/pdc_intel.h
@@ -4,6 +4,8 @@
#ifndef __PDC_INTEL_H__
#define __PDC_INTEL_H__
+#define ACPI_PDC_REVISION_ID 1
+
#define ACPI_PDC_P_FFH (0x0001)
#define ACPI_PDC_C_C1_HALT (0x0002)
#define ACPI_PDC_T_FFH (0x0004)
@@ -14,6 +16,7 @@
#define ACPI_PDC_SMP_T_SWCOORD (0x0080)
#define ACPI_PDC_C_C1_FFH (0x0100)
#define ACPI_PDC_C_C2C3_FFH (0x0200)
+#define ACPI_PDC_SMP_P_HWCOORD (0x0800)
#define ACPI_PDC_EST_CAPABILITY_SMP (ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT | \
@@ -22,6 +25,7 @@
#define ACPI_PDC_EST_CAPABILITY_SWSMP (ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT | \
ACPI_PDC_SMP_P_SWCOORD | \
+ ACPI_PDC_SMP_P_HWCOORD | \
ACPI_PDC_P_FFH)
#define ACPI_PDC_C_CAPABILITY_SMP (ACPI_PDC_SMP_C2C3 | \
@@ -30,4 +34,17 @@
ACPI_PDC_C_C1_FFH | \
ACPI_PDC_C_C2C3_FFH)
+#define ACPI_PDC_C_MASK (ACPI_PDC_C_C1_HALT | \
+ ACPI_PDC_C_C1_FFH | \
+ ACPI_PDC_SMP_C2C3 | \
+ ACPI_PDC_SMP_C_SWCOORD | \
+ ACPI_PDC_C_C2C3_FFH)
+
+#define ACPI_PDC_P_MASK (ACPI_PDC_P_FFH | \
+ ACPI_PDC_SMP_P_SWCOORD | \
+ ACPI_PDC_SMP_P_HWCOORD)
+
+#define ACPI_PDC_T_MASK (ACPI_PDC_T_FFH | \
+ ACPI_PDC_SMP_T_SWCOORD)
+
#endif /* __PDC_INTEL_H__ */
Index: xen-4.1.2-testing/xen/include/asm-x86/cpufeature.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/cpufeature.h
+++ xen-4.1.2-testing/xen/include/asm-x86/cpufeature.h
@@ -151,6 +151,10 @@
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
#define cpufeat_mask(idx) (1u << ((idx) & 31))
+#define CPUID_MWAIT_LEAF 5
+#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
+#define CPUID5_ECX_INTERRUPT_BREAK 0x2
+
#ifdef __i386__
#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME)
#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
Index: xen-4.1.2-testing/xen/include/public/platform.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/platform.h
+++ xen-4.1.2-testing/xen/include/public/platform.h
@@ -304,6 +304,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_getidletim
#define XEN_PM_CX 0
#define XEN_PM_PX 1
#define XEN_PM_TX 2
+#define XEN_PM_PDC 3
/* Px sub info type */
#define XEN_PX_PCT 1
@@ -401,6 +402,7 @@ struct xenpf_set_processor_pminfo {
union {
struct xen_processor_power power;/* Cx: _CST/_CSD */
struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
+ XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */
} u;
};
typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
Index: xen-4.1.2-testing/xen/include/xen/acpi.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/acpi.h
+++ xen-4.1.2-testing/xen/include/xen/acpi.h
@@ -334,6 +334,8 @@ static inline int acpi_boot_table_init(v
#endif /*!CONFIG_ACPI_BOOT*/
+int get_cpu_id(u32 acpi_id);
+
unsigned int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low);
int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
@@ -431,6 +433,9 @@ static inline unsigned int acpi_get_csta
static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; }
#endif
+int acpi_set_pdc_bits(u32 acpi_id, XEN_GUEST_HANDLE(uint32));
+int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *, u32 mask);
+
#ifdef CONFIG_ACPI_NUMA
int acpi_get_pxm(acpi_handle handle);
#else
++++++ 23795-intel-ich10-quirk.patch ++++++
References: bnc#683580
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1314443678 -3600
# Node ID e17f70940d1f57fe04dde3bf4e243f75c89f0d0e
# Parent 4705eca37c9fac9d13867a856bdcfa8b7bad56c6
x86: work around certain Intel BIOSes causing (transient) hangs during boot
They apparently leave the USB legacy emulation bits set in ICH10's
SMI Control and Enable register, but fail to handle the resulting SMIs
gracefully. The hangs can apparently extend indefinitely, but are
commonly observed to last between a few seconds and a minute.
This assumes that only ICH10-based systems on Intel main boards with
Intel BIOS may be affected. Until Intel comes up with a more precise
identification of affected BIOSes, all Intel ones on Intel boards
will get this workaround applied.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/dmi_scan.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/dmi_scan.c
+++ xen-4.1.2-testing/xen/arch/x86/dmi_scan.c
@@ -10,6 +10,8 @@
#include <asm/system.h>
#include <xen/dmi.h>
#include <xen/efi.h>
+#include <xen/pci.h>
+#include <xen/pci_regs.h>
#define bt_ioremap(b,l) ((void *)__acpi_map_table(b,l))
#define bt_iounmap(b,l) ((void)0)
@@ -278,6 +280,28 @@ static __init int broken_toshiba_keyboar
return 0;
}
+static int __init ich10_bios_quirk(struct dmi_system_id *d)
+{
+ u32 port, smictl;
+
+ if ( pci_conf_read16(0, 0x1f, 0, PCI_VENDOR_ID) != 0x8086 )
+ return 0;
+
+ switch ( pci_conf_read16(0, 0x1f, 0, PCI_DEVICE_ID) ) {
+ case 0x3a14:
+ case 0x3a16:
+ case 0x3a18:
+ case 0x3a1a:
+ port = (pci_conf_read16(0, 0x1f, 0, 0x40) & 0xff80) + 0x30;
+ smictl = inl(port);
+ /* turn off LEGACY_USB{,2}_EN if enabled */
+ if ( smictl & 0x20008 )
+ outl(smictl & ~0x20008, port);
+ break;
+ }
+
+ return 0;
+}
#ifdef CONFIG_ACPI_SLEEP
static __init int reset_videomode_after_s3(struct dmi_blacklist *d)
@@ -363,6 +387,18 @@ static __initdata struct dmi_blacklist d
} },
#endif
+ { ich10_bios_quirk, "Intel board & BIOS",
+ /*
+ * BIOS leaves legacy USB emulation enabled while
+ * SMM can't properly handle it.
+ */
+ {
+ MATCH(DMI_BOARD_VENDOR, "Intel Corp"),
+ MATCH(DMI_BIOS_VENDOR, "Intel Corp"),
+ NO_MATCH, NO_MATCH
+ }
+ },
+
#ifdef CONFIG_ACPI_BOOT
/*
* If your system is blacklisted here, but you find that acpi=force
++++++ 23800-x86_64-guest-addr-range.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1314800089 -3600
# Node ID 72edc40e2942a3cf0ee8e0d3a330d2e5c2bdfb53
# Parent ac9aa65050e9abc8f1c12c8603acf3b99e22cddc
x86-64: Fix off-by-one error in __addr_ok() macro
Signed-off-by: Laszlo Ersek <lersek@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/include/asm-x86/x86_64/uaccess.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/x86_64/uaccess.h
+++ xen-4.1.2-testing/xen/include/asm-x86/x86_64/uaccess.h
@@ -21,7 +21,7 @@ void free_compat_arg_xlat(struct vcpu *v
* non-canonical address (and thus fault) before ever reaching VIRT_START.
*/
#define __addr_ok(addr) \
- (((unsigned long)(addr) < (1UL<<48)) || \
+ (((unsigned long)(addr) < (1UL<<47)) || \
((unsigned long)(addr) >= HYPERVISOR_VIRT_END))
#define access_ok(addr, size) \
++++++ 23804-x86-IPI-counts.patch ++++++
# HG changeset patch
# User Kevin Tian <kevin.tian@xxxxxxxxx>
# Date 1314800303 -3600
# Node ID 42d76c68b2bfbedee3e5f79d32344e14bce48b0f
# Parent 51983821efa4db4040ae1c5063a4404791597699
x86: add irq count for IPIs
such count is useful to assist decision make in cpuidle governor,
while w/o this patch only device interrupts through do_IRQ is
currently counted.
Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx>
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1372,6 +1372,7 @@ fastcall void smp_apic_timer_interrupt(s
struct cpu_user_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
perfc_incr(apic_timer);
+ this_cpu(irq_count)++;
raise_softirq(TIMER_SOFTIRQ);
set_irq_regs(old_regs);
}
@@ -1393,6 +1394,7 @@ fastcall void smp_spurious_interrupt(str
unsigned long v;
struct cpu_user_regs *old_regs = set_irq_regs(regs);
+ this_cpu(irq_count)++;
irq_enter();
/*
@@ -1428,6 +1430,7 @@ fastcall void smp_error_interrupt(struct
unsigned long v, v1;
struct cpu_user_regs *old_regs = set_irq_regs(regs);
+ this_cpu(irq_count)++;
irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
@@ -1459,6 +1462,7 @@ fastcall void smp_pmu_apic_interrupt(str
{
struct cpu_user_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
+ this_cpu(irq_count)++;
hvm_do_pmu_interrupt(regs);
set_irq_regs(old_regs);
}
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -66,6 +66,7 @@ static void (*vendor_thermal_interrupt)(
fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
{
struct cpu_user_regs *old_regs = set_irq_regs(regs);
+ this_cpu(irq_count)++;
irq_enter();
vendor_thermal_interrupt(regs);
irq_exit();
@@ -1094,6 +1095,7 @@ fastcall void smp_cmci_interrupt(struct
struct cpu_user_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
+ this_cpu(irq_count)++;
irq_enter();
mctc = mcheck_mca_logout(
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -529,6 +529,7 @@ fastcall void smp_irq_move_cleanup_inter
struct cpu_user_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
+ this_cpu(irq_count)++;
irq_enter();
me = smp_processor_id();
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -221,6 +221,7 @@ fastcall void smp_invalidate_interrupt(v
{
ack_APIC_irq();
perfc_incr(ipis);
+ this_cpu(irq_count)++;
irq_enter();
if ( !__sync_local_execstate() ||
(flush_flags & (FLUSH_TLB_GLOBAL | FLUSH_CACHE)) )
@@ -385,6 +386,7 @@ fastcall void smp_event_check_interrupt(
struct cpu_user_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
perfc_incr(ipis);
+ this_cpu(irq_count)++;
set_irq_regs(old_regs);
}
@@ -421,6 +423,7 @@ fastcall void smp_call_function_interrup
ack_APIC_irq();
perfc_incr(ipis);
+ this_cpu(irq_count)++;
__smp_call_function_interrupt();
set_irq_regs(old_regs);
}
++++++ 23817-mem_event_add_ref_counting_for_free_requestslots.patch ++++++
changeset: 23817:083f10851dd8
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon Sep 05 15:10:09 2011 +0100
files: xen/arch/x86/mm/mem_event.c xen/arch/x86/mm/mem_sharing.c
xen/arch/x86/mm/p2m.c xen/include/asm-x86/mem_event.h xen/include/xen/sched.h
description:
mem_event: add ref counting for free requestslots
If mem_event_check_ring() is called by many vcpus at the same time
before any of them called also mem_event_put_request(), all of the
callers must assume there are enough free slots available in the ring.
Record the number of request producers in mem_event_check_ring() to
keep track of available free slots.
Add a new mem_event_put_req_producers() function to release a request
attempt made in mem_event_check_ring(). Its required for
p2m_mem_paging_populate() because that function can only modify the
p2m type if there are free request slots. But in some cases
p2m_mem_paging_populate() does not actually have to produce another
request when it is known that the same request was already made
earlier by a different vcpu.
mem_event_check_ring() can not return a reference to a free request
slot because there could be multiple references for different vcpus
and the order of mem_event_put_request() calls is not known. As a
result, incomplete requests could be consumed by the ring user.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/arch/x86/mm/mem_event.c | 19 ++++++++++++-------
xen/arch/x86/mm/mem_sharing.c | 1 -
xen/arch/x86/mm/p2m.c | 1 +
xen/include/asm-x86/mem_event.h | 1 +
xen/include/xen/sched.h | 1 +
5 files changed, 15 insertions(+), 8 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -37,8 +37,6 @@
#define mem_event_ring_lock(_d) spin_lock(&(_d)->mem_event.ring_lock)
#define mem_event_ring_unlock(_d) spin_unlock(&(_d)->mem_event.ring_lock)
-#define MEM_EVENT_RING_THRESHOLD 4
-
static int mem_event_enable(struct domain *d, mfn_t ring_mfn, mfn_t shared_mfn)
{
int rc;
@@ -109,6 +107,7 @@ void mem_event_put_request(struct domain
req_prod++;
/* Update ring */
+ d->mem_event.req_producers--;
front_ring->req_prod_pvt = req_prod;
RING_PUSH_REQUESTS(front_ring);
@@ -153,11 +152,18 @@ void mem_event_mark_and_pause(struct vcp
vcpu_sleep_nosync(v);
}
+void mem_event_put_req_producers(struct domain *d)
+{
+ mem_event_ring_lock(d);
+ d->mem_event.req_producers--;
+ mem_event_ring_unlock(d);
+}
+
int mem_event_check_ring(struct domain *d)
{
struct vcpu *curr = current;
int free_requests;
- int ring_full;
+ int ring_full = 1;
if ( !d->mem_event.ring_page )
return -1;
@@ -165,12 +171,11 @@ int mem_event_check_ring(struct domain *
mem_event_ring_lock(d);
free_requests = RING_FREE_REQUESTS(&d->mem_event.front_ring);
- if ( unlikely(free_requests < 2) )
+ if ( d->mem_event.req_producers < free_requests )
{
- gdprintk(XENLOG_INFO, "free request slots: %d\n", free_requests);
- WARN_ON(free_requests == 0);
+ d->mem_event.req_producers++;
+ ring_full = 0;
}
- ring_full = free_requests < MEM_EVENT_RING_THRESHOLD ? 1 : 0;
if ( (curr->domain->domain_id == d->domain_id) && ring_full )
{
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_sharing.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
@@ -322,7 +322,6 @@ static struct page_info* mem_sharing_all
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
- /* XXX: Need to reserve a request, not just check the ring! */
if(mem_event_check_ring(d)) return page;
req.gfn = gfn;
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2970,6 +2970,7 @@ void p2m_mem_paging_populate(struct p2m_
else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
{
/* gfn is already on its way back and vcpu is not paused */
+ mem_event_put_req_producers(d);
return;
}
Index: xen-4.1.2-testing/xen/include/asm-x86/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/mem_event.h
+++ xen-4.1.2-testing/xen/include/asm-x86/mem_event.h
@@ -27,6 +27,7 @@
/* Pauses VCPU while marking pause flag for mem event */
void mem_event_mark_and_pause(struct vcpu *v);
int mem_event_check_ring(struct domain *d);
+void mem_event_put_req_producers(struct domain *d);
void mem_event_put_request(struct domain *d, mem_event_request_t *req);
void mem_event_get_response(struct domain *d, mem_event_response_t *rsp);
void mem_event_unpause_vcpus(struct domain *d);
Index: xen-4.1.2-testing/xen/include/xen/sched.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/sched.h
+++ xen-4.1.2-testing/xen/include/xen/sched.h
@@ -190,6 +190,7 @@ struct mem_event_domain
{
/* ring lock */
spinlock_t ring_lock;
+ unsigned int req_producers;
/* shared page */
mem_event_shared_page_t *shared_page;
/* shared ring page */
++++++
23818-mem_event_use_mem_event_mark_and_pause_in_mem_event_check_ring.patch
++++++
changeset: 23818:0268e7380953
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon Sep 05 15:10:28 2011 +0100
files: xen/arch/x86/mm/mem_event.c
description:
mem_event: use mem_event_mark_and_pause() in mem_event_check_ring()
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
xen/arch/x86/mm/mem_event.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -178,10 +178,7 @@ int mem_event_check_ring(struct domain *
}
if ( (curr->domain->domain_id == d->domain_id) && ring_full )
- {
- set_bit(_VPF_mem_event, &curr->pause_flags);
- vcpu_sleep_nosync(curr);
- }
+ mem_event_mark_and_pause(curr);
mem_event_ring_unlock(d);
++++++ 23819-make-docs.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1315320580 -3600
# Node ID 5fe770c8a8a35c58893816ee6335a90ed43f3bbd
# Parent 0268e73809532a4a3ca18a075efcee3c62caf458
docs: Fix 'make docs'
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/docs/src/user.tex
===================================================================
--- xen-4.1.2-testing.orig/docs/src/user.tex
+++ xen-4.1.2-testing/docs/src/user.tex
@@ -4197,7 +4197,7 @@ writing to the VGA console after domain
\item [ vcpu\_migration\_delay=$<$minimum\_time$>$] Set minimum time of
vcpu migration in microseconds (default 0). This parameter avoids agressive
vcpu migration. For example, the linux kernel uses 0.5ms by default.
-\item [ irq_vector_map=xxx ] Enable irq vector non-sharing maps. Setting
'global'
+\item [ irq\_vector\_map=xxx ] Enable irq vector non-sharing maps. Setting
'global'
will ensure that no IRQs will share vectors. Setting 'per-device' will
ensure
that no IRQs from the same device will share vectors. Setting to 'none' will
disable it entirely, overriding any defaults the IOMMU code may set.
++++++ 23827-xenpaging_use_batch_of_pages_during_final_page-in.patch ++++++
changeset: 23827:d1d6abc1db20
user: Olaf Hering <olaf@xxxxxxxxx>
date: Tue Sep 13 10:25:32 2011 +0100
files: tools/xenpaging/pagein.c tools/xenpaging/xenpaging.c
tools/xenpaging/xenpaging.h
description:
xenpaging: use batch of pages during final page-in
Map up to RING_SIZE pages in exit path to fill the ring instead of
populating one page at a time.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
tools/xenpaging/pagein.c | 36 ++++++++++++++++++++++++------------
tools/xenpaging/xenpaging.c | 18 +++++++++++++-----
tools/xenpaging/xenpaging.h | 7 +++++--
3 files changed, 42 insertions(+), 19 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/pagein.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/pagein.c
+++ xen-4.1.2-testing/tools/xenpaging/pagein.c
@@ -1,14 +1,16 @@
/* Trigger a page-in in a separate thread-of-execution to avoid deadlock */
#include <pthread.h>
-#include "xc_private.h"
+#include <xc_private.h>
+#include "xenpaging.h"
struct page_in_args {
domid_t dom;
+ unsigned long *pagein_queue;
xc_interface *xch;
};
static struct page_in_args page_in_args;
-static unsigned long page_in_gfn;
+static unsigned long page_in_request;
static unsigned int page_in_possible;
static pthread_t page_in_thread;
@@ -19,19 +21,28 @@ static void *page_in(void *arg)
{
struct page_in_args *pia = arg;
void *page;
- xen_pfn_t gfn;
+ int i, num;
+ xen_pfn_t gfns[XENPAGING_PAGEIN_QUEUE_SIZE];
while (1)
{
pthread_mutex_lock(&page_in_mutex);
- while (!page_in_gfn)
+ while (!page_in_request)
pthread_cond_wait(&page_in_cond, &page_in_mutex);
- gfn = page_in_gfn;
- page_in_gfn = 0;
+ num = 0;
+ for (i = 0; i < XENPAGING_PAGEIN_QUEUE_SIZE; i++)
+ {
+ if (!pia->pagein_queue[i])
+ continue;
+ gfns[num] = pia->pagein_queue[i];
+ pia->pagein_queue[i] = 0;
+ num++;
+ }
+ page_in_request = 0;
pthread_mutex_unlock(&page_in_mutex);
/* Ignore errors */
- page = xc_map_foreign_pages(pia->xch, pia->dom, PROT_READ, &gfn, 1);
+ page = xc_map_foreign_pages(pia->xch, pia->dom, PROT_READ, gfns, num);
if (page)
munmap(page, PAGE_SIZE);
}
@@ -39,21 +50,22 @@ static void *page_in(void *arg)
pthread_exit(NULL);
}
-void page_in_trigger(unsigned long gfn)
+void page_in_trigger(void)
{
if (!page_in_possible)
return;
pthread_mutex_lock(&page_in_mutex);
- page_in_gfn = gfn;
+ page_in_request = 1;
pthread_mutex_unlock(&page_in_mutex);
pthread_cond_signal(&page_in_cond);
}
-void create_page_in_thread(domid_t domain_id, xc_interface *xch)
+void create_page_in_thread(xenpaging_t *paging)
{
- page_in_args.dom = domain_id;
- page_in_args.xch = xch;
+ page_in_args.dom = paging->mem_event.domain_id;
+ page_in_args.pagein_queue = paging->pagein_queue;
+ page_in_args.xch = paging->xc_handle;
if (pthread_create(&page_in_thread, NULL, page_in, &page_in_args) == 0)
page_in_possible = 1;
}
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -648,7 +648,7 @@ int main(int argc, char *argv[])
sigaction(SIGALRM, &act, NULL);
/* listen for page-in events to stop pager */
- create_page_in_thread(paging->mem_event.domain_id, xch);
+ create_page_in_thread(paging);
/* Evict pages */
for ( i = 0; i < paging->num_pages; i++ )
@@ -764,16 +764,24 @@ int main(int argc, char *argv[])
/* Write all pages back into the guest */
if ( interrupted == SIGTERM || interrupted == SIGINT )
{
+ int num = 0;
for ( i = 0; i < paging->domain_info->max_pages; i++ )
{
if ( test_bit(i, paging->bitmap) )
{
- page_in_trigger(i);
- break;
+ paging->pagein_queue[num] = i;
+ num++;
+ if ( num == XENPAGING_PAGEIN_QUEUE_SIZE )
+ break;
}
}
- /* If no more pages to process, exit loop */
- if ( i == paging->domain_info->max_pages )
+ /*
+ * One more round if there are still pages to process.
+ * If no more pages to process, exit loop.
+ */
+ if ( num )
+ page_in_trigger();
+ else if ( i == paging->domain_info->max_pages )
break;
}
else
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -29,6 +29,8 @@
#include <xen/event_channel.h>
#include <xen/mem_event.h>
+#define XENPAGING_PAGEIN_QUEUE_SIZE 64
+
typedef struct mem_event {
domid_t domain_id;
xc_evtchn *xce_handle;
@@ -49,6 +51,7 @@ typedef struct xenpaging {
mem_event_t mem_event;
int num_pages;
int policy_mru_size;
+ unsigned long pagein_queue[XENPAGING_PAGEIN_QUEUE_SIZE];
} xenpaging_t;
@@ -58,8 +61,8 @@ typedef struct xenpaging_victim {
} xenpaging_victim_t;
-extern void create_page_in_thread(domid_t domain_id, xc_interface *xch);
-extern void page_in_trigger(unsigned long gfn);
+extern void create_page_in_thread(xenpaging_t *paging);
+extern void page_in_trigger(void);
#endif // __XEN_PAGING_H__
++++++
23841-mem_event_pass_mem_event_domain_pointer_to_mem_event_functions.patch
++++++
changeset: 23841:ed7586b1d515
user: Olaf Hering <olaf@xxxxxxxxx>
date: Fri Sep 16 12:13:31 2011 +0100
files: xen/arch/x86/hvm/hvm.c xen/arch/x86/mm/mem_event.c
xen/arch/x86/mm/mem_sharing.c xen/arch/x86/mm/p2m.c
xen/include/asm-x86/mem_event.h
description:
mem_event: pass mem_event_domain pointer to mem_event functions
Pass a struct mem_event_domain pointer to the various mem_event
functions. This will be used in a subsequent patch which creates
different ring buffers for the memshare, xenpaging and memaccess
functionality.
Remove the struct domain argument from some functions.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/hvm/hvm.c | 4 -
xen/arch/x86/mm/mem_event.c | 95 ++++++++++++++++++++--------------------
xen/arch/x86/mm/mem_sharing.c | 6 +-
xen/arch/x86/mm/p2m.c | 18 +++----
xen/include/asm-x86/mem_event.h | 8 +--
5 files changed, 66 insertions(+), 65 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
@@ -3909,7 +3909,7 @@ static int hvm_memory_event_traps(long p
if ( (p & HVMPME_onchangeonly) && (value == old) )
return 1;
- rc = mem_event_check_ring(d);
+ rc = mem_event_check_ring(d, &d->mem_event);
if ( rc )
return rc;
@@ -3932,7 +3932,7 @@ static int hvm_memory_event_traps(long p
req.gla_valid = 1;
}
- mem_event_put_request(d, &req);
+ mem_event_put_request(d, &d->mem_event, &req);
return 1;
}
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -33,21 +33,21 @@
#define xen_rmb() rmb()
#define xen_wmb() wmb()
-#define mem_event_ring_lock_init(_d)
spin_lock_init(&(_d)->mem_event.ring_lock)
-#define mem_event_ring_lock(_d) spin_lock(&(_d)->mem_event.ring_lock)
-#define mem_event_ring_unlock(_d) spin_unlock(&(_d)->mem_event.ring_lock)
+#define mem_event_ring_lock_init(_med) spin_lock_init(&(_med)->ring_lock)
+#define mem_event_ring_lock(_med) spin_lock(&(_med)->ring_lock)
+#define mem_event_ring_unlock(_med) spin_unlock(&(_med)->ring_lock)
-static int mem_event_enable(struct domain *d, mfn_t ring_mfn, mfn_t shared_mfn)
+static int mem_event_enable(struct domain *d, struct mem_event_domain *med,
mfn_t ring_mfn, mfn_t shared_mfn)
{
int rc;
/* Map ring and shared pages */
- d->mem_event.ring_page = map_domain_page(mfn_x(ring_mfn));
- if ( d->mem_event.ring_page == NULL )
+ med->ring_page = map_domain_page(mfn_x(ring_mfn));
+ if ( med->ring_page == NULL )
goto err;
- d->mem_event.shared_page = map_domain_page(mfn_x(shared_mfn));
- if ( d->mem_event.shared_page == NULL )
+ med->shared_page = map_domain_page(mfn_x(shared_mfn));
+ if ( med->shared_page == NULL )
goto err_ring;
/* Allocate event channel */
@@ -56,15 +56,15 @@ static int mem_event_enable(struct domai
if ( rc < 0 )
goto err_shared;
- ((mem_event_shared_page_t *)d->mem_event.shared_page)->port = rc;
- d->mem_event.xen_port = rc;
+ ((mem_event_shared_page_t *)med->shared_page)->port = rc;
+ med->xen_port = rc;
/* Prepare ring buffer */
- FRONT_RING_INIT(&d->mem_event.front_ring,
- (mem_event_sring_t *)d->mem_event.ring_page,
+ FRONT_RING_INIT(&med->front_ring,
+ (mem_event_sring_t *)med->ring_page,
PAGE_SIZE);
- mem_event_ring_lock_init(d);
+ mem_event_ring_lock_init(med);
/* Wake any VCPUs paused for memory events */
mem_event_unpause_vcpus(d);
@@ -72,34 +72,34 @@ static int mem_event_enable(struct domai
return 0;
err_shared:
- unmap_domain_page(d->mem_event.shared_page);
- d->mem_event.shared_page = NULL;
+ unmap_domain_page(med->shared_page);
+ med->shared_page = NULL;
err_ring:
- unmap_domain_page(d->mem_event.ring_page);
- d->mem_event.ring_page = NULL;
+ unmap_domain_page(med->ring_page);
+ med->ring_page = NULL;
err:
return 1;
}
-static int mem_event_disable(struct domain *d)
+static int mem_event_disable(struct mem_event_domain *med)
{
- unmap_domain_page(d->mem_event.ring_page);
- d->mem_event.ring_page = NULL;
+ unmap_domain_page(med->ring_page);
+ med->ring_page = NULL;
- unmap_domain_page(d->mem_event.shared_page);
- d->mem_event.shared_page = NULL;
+ unmap_domain_page(med->shared_page);
+ med->shared_page = NULL;
return 0;
}
-void mem_event_put_request(struct domain *d, mem_event_request_t *req)
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req)
{
mem_event_front_ring_t *front_ring;
RING_IDX req_prod;
- mem_event_ring_lock(d);
+ mem_event_ring_lock(med);
- front_ring = &d->mem_event.front_ring;
+ front_ring = &med->front_ring;
req_prod = front_ring->req_prod_pvt;
/* Copy request */
@@ -107,23 +107,23 @@ void mem_event_put_request(struct domain
req_prod++;
/* Update ring */
- d->mem_event.req_producers--;
+ med->req_producers--;
front_ring->req_prod_pvt = req_prod;
RING_PUSH_REQUESTS(front_ring);
- mem_event_ring_unlock(d);
+ mem_event_ring_unlock(med);
- notify_via_xen_event_channel(d, d->mem_event.xen_port);
+ notify_via_xen_event_channel(d, med->xen_port);
}
-void mem_event_get_response(struct domain *d, mem_event_response_t *rsp)
+void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp)
{
mem_event_front_ring_t *front_ring;
RING_IDX rsp_cons;
- mem_event_ring_lock(d);
+ mem_event_ring_lock(med);
- front_ring = &d->mem_event.front_ring;
+ front_ring = &med->front_ring;
rsp_cons = front_ring->rsp_cons;
/* Copy response */
@@ -134,7 +134,7 @@ void mem_event_get_response(struct domai
front_ring->rsp_cons = rsp_cons;
front_ring->sring->rsp_event = rsp_cons + 1;
- mem_event_ring_unlock(d);
+ mem_event_ring_unlock(med);
}
void mem_event_unpause_vcpus(struct domain *d)
@@ -152,35 +152,35 @@ void mem_event_mark_and_pause(struct vcp
vcpu_sleep_nosync(v);
}
-void mem_event_put_req_producers(struct domain *d)
+void mem_event_put_req_producers(struct mem_event_domain *med)
{
- mem_event_ring_lock(d);
- d->mem_event.req_producers--;
- mem_event_ring_unlock(d);
+ mem_event_ring_lock(med);
+ med->req_producers--;
+ mem_event_ring_unlock(med);
}
-int mem_event_check_ring(struct domain *d)
+int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
{
struct vcpu *curr = current;
int free_requests;
int ring_full = 1;
- if ( !d->mem_event.ring_page )
+ if ( !med->ring_page )
return -1;
- mem_event_ring_lock(d);
+ mem_event_ring_lock(med);
- free_requests = RING_FREE_REQUESTS(&d->mem_event.front_ring);
- if ( d->mem_event.req_producers < free_requests )
+ free_requests = RING_FREE_REQUESTS(&med->front_ring);
+ if ( med->req_producers < free_requests )
{
- d->mem_event.req_producers++;
+ med->req_producers++;
ring_full = 0;
}
- if ( (curr->domain->domain_id == d->domain_id) && ring_full )
+ if ( ring_full && (curr->domain == d) )
mem_event_mark_and_pause(curr);
- mem_event_ring_unlock(d);
+ mem_event_ring_unlock(med);
return ring_full;
}
@@ -230,6 +230,7 @@ int mem_event_domctl(struct domain *d, x
{
struct domain *dom_mem_event = current->domain;
struct vcpu *v = current;
+ struct mem_event_domain *med = &d->mem_event;
unsigned long ring_addr = mec->ring_addr;
unsigned long shared_addr = mec->shared_addr;
l1_pgentry_t l1e;
@@ -242,7 +243,7 @@ int mem_event_domctl(struct domain *d, x
* the cache is in an undefined state and so is the guest
*/
rc = -EBUSY;
- if ( d->mem_event.ring_page )
+ if ( med->ring_page )
break;
/* Currently only EPT is supported */
@@ -270,7 +271,7 @@ int mem_event_domctl(struct domain *d, x
break;
rc = -EINVAL;
- if ( mem_event_enable(d, ring_mfn, shared_mfn) != 0 )
+ if ( mem_event_enable(d, med, ring_mfn, shared_mfn) != 0 )
break;
rc = 0;
@@ -279,7 +280,7 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_DISABLE:
{
- rc = mem_event_disable(d);
+ rc = mem_event_disable(&d->mem_event);
}
break;
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_sharing.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
@@ -322,12 +322,12 @@ static struct page_info* mem_sharing_all
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
- if(mem_event_check_ring(d)) return page;
+ if(mem_event_check_ring(d, &d->mem_event)) return page;
req.gfn = gfn;
req.p2mt = p2m_ram_shared;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &req);
+ mem_event_put_request(d, &d->mem_event, &req);
return page;
}
@@ -342,7 +342,7 @@ int mem_sharing_sharing_resume(struct do
mem_event_response_t rsp;
/* Get request off the ring */
- mem_event_get_response(d, &rsp);
+ mem_event_get_response(&d->mem_event, &rsp);
/* Unpause domain/vcpu */
if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2923,7 +2923,7 @@ void p2m_mem_paging_drop_page(struct p2m
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d) == 0)
+ if ( mem_event_check_ring(d, &d->mem_event) == 0)
{
/* Send release notification to pager */
memset(&req, 0, sizeof(req));
@@ -2931,7 +2931,7 @@ void p2m_mem_paging_drop_page(struct p2m
req.gfn = gfn;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &req);
+ mem_event_put_request(d, &d->mem_event, &req);
}
}
@@ -2943,7 +2943,7 @@ void p2m_mem_paging_populate(struct p2m_
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d) )
+ if ( mem_event_check_ring(d, &d->mem_event) )
return;
memset(&req, 0, sizeof(req));
@@ -2970,7 +2970,7 @@ void p2m_mem_paging_populate(struct p2m_
else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
{
/* gfn is already on its way back and vcpu is not paused */
- mem_event_put_req_producers(d);
+ mem_event_put_req_producers(&d->mem_event);
return;
}
@@ -2979,7 +2979,7 @@ void p2m_mem_paging_populate(struct p2m_
req.p2mt = p2mt;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &req);
+ mem_event_put_request(d, &d->mem_event, &req);
}
int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn)
@@ -3008,7 +3008,7 @@ void p2m_mem_paging_resume(struct p2m_do
mfn_t mfn;
/* Pull the response off the ring */
- mem_event_get_response(d, &rsp);
+ mem_event_get_response(&d->mem_event, &rsp);
/* Fix p2m entry if the page was not dropped */
if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
@@ -3055,7 +3055,7 @@ void p2m_mem_access_check(unsigned long
p2m_unlock(p2m);
/* Otherwise, check if there is a memory event listener, and send the
message along */
- res = mem_event_check_ring(d);
+ res = mem_event_check_ring(d, &d->mem_event);
if ( res < 0 )
{
/* No listener */
@@ -3099,7 +3099,7 @@ void p2m_mem_access_check(unsigned long
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &req);
+ mem_event_put_request(d, &d->mem_event, &req);
/* VCPU paused, mem event request sent */
}
@@ -3109,7 +3109,7 @@ void p2m_mem_access_resume(struct p2m_do
struct domain *d = p2m->domain;
mem_event_response_t rsp;
- mem_event_get_response(d, &rsp);
+ mem_event_get_response(&d->mem_event, &rsp);
/* Unpause domain */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
Index: xen-4.1.2-testing/xen/include/asm-x86/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/asm-x86/mem_event.h
+++ xen-4.1.2-testing/xen/include/asm-x86/mem_event.h
@@ -26,10 +26,10 @@
/* Pauses VCPU while marking pause flag for mem event */
void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d);
-void mem_event_put_req_producers(struct domain *d);
-void mem_event_put_request(struct domain *d, mem_event_request_t *req);
-void mem_event_get_response(struct domain *d, mem_event_response_t *rsp);
+int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
+void mem_event_put_req_producers(struct mem_event_domain *med);
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req);
+void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp);
void mem_event_unpause_vcpus(struct domain *d);
int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
++++++
23842-mem_event_use_different_ringbuffers_for_share_paging_and_access.patch
++++++
++++ 868 lines (skipped)
++++++ 23853-x86-pv-cpuid-xsave.patch ++++++
# HG changeset patch
# User Shan Haitao <haitao.shan@xxxxxxxxx>
# Date 1316300518 -3600
# Node ID b78235de5c6407023759f9bbf723dd83887fedf0
# Parent c944e82bb092925f31403a129087e9d40e0fa06a
Fix PV CPUID virtualization of XSave
The patch will fix XSave CPUID virtualization for PV guests. The XSave
area size returned by CPUID leaf D is changed dynamically depending on
the XCR0. Tools/libxc only assigns a static value. The fix will adjust
xsave area size during runtime.
Note: This fix is already in HVM cpuid virtualization. And Dom0 is not
affected, either.
Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2310,7 +2310,7 @@ void hvm_cpuid(unsigned int input, unsig
{
/* reset EBX to default value first */
*ebx = XSAVE_AREA_MIN_SIZE;
- for ( sub_leaf = 2; sub_leaf < 64; sub_leaf++ )
+ for ( sub_leaf = 2; sub_leaf < 63; sub_leaf++ )
{
if ( !(v->arch.xcr0 & (1ULL << sub_leaf)) )
continue;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -768,6 +768,30 @@ static void pv_cpuid(struct cpu_user_reg
{
if ( !cpuid_hypervisor_leaves(a, c, &a, &b, &c, &d) )
domain_cpuid(current->domain, a, c, &a, &b, &c, &d);
+
+ switch ( a )
+ {
+ case 0xd:
+ {
+ unsigned int sub_leaf, _eax, _ebx, _ecx, _edx;
+ /* EBX value of main leaf 0 depends on enabled xsave features */
+ if ( c == 0 && current->arch.xcr0 )
+ {
+ /* reset EBX to default value first */
+ b = XSAVE_AREA_MIN_SIZE;
+ for ( sub_leaf = 2; sub_leaf < 63; sub_leaf++ )
+ {
+ if ( !(current->arch.xcr0 & (1ULL << sub_leaf)) )
+ continue;
+ domain_cpuid(current->domain, a, c, &_eax, &_ebx, &_ecx,
+ &_edx);
+ if ( (_eax + _ebx) > b )
+ b = _eax + _ebx;
+ }
+ }
+ break;
+ }
+ }
goto out;
}
++++++ 23874-xenpaging_track_number_of_paged_pages_in_struct_domain.patch ++++++
changeset: 23874:651aed73b39c
user: Olaf Hering <olafiaepfle.de>
date: Mon Sep 26 22:19:42 2011 +0100
files: tools/libxc/xc_domain.c tools/libxc/xenctrl.h
xen/arch/x86/mm/p2m.c xen/common/domctl.c xen/include/public/domctl.h
xen/include/xen/sched.h
description:
xenpaging: track number of paged pages in struct domain
The toolstack should know how many pages are paged-out at a given point
in time so it could make smarter decisions about how many pages should
be paged or ballooned.
Add a new member to xen_domctl_getdomaininfo and bump interface version.
Use the new member in xc_dominfo_t.
The SONAME of libxc should be changed if this patch gets applied.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/libxc/xc_domain.c | 1 +
tools/libxc/xenctrl.h | 1 +
xen/arch/x86/mm/p2m.c | 5 +++++
xen/common/domctl.c | 1 +
xen/include/public/domctl.h | 3 ++-
xen/include/xen/sched.h | 1 +
6 files changed, 11 insertions(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/libxc/xc_domain.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_domain.c
+++ xen-4.1.2-testing/tools/libxc/xc_domain.c
@@ -235,6 +235,7 @@ int xc_domain_getinfo(xc_interface *xch,
info->ssidref = domctl.u.getdomaininfo.ssidref;
info->nr_pages = domctl.u.getdomaininfo.tot_pages;
info->nr_shared_pages = domctl.u.getdomaininfo.shr_pages;
+ info->nr_paged_pages = domctl.u.getdomaininfo.paged_pages;
info->max_memkb = domctl.u.getdomaininfo.max_pages << (PAGE_SHIFT-10);
info->shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
info->cpu_time = domctl.u.getdomaininfo.cpu_time;
Index: xen-4.1.2-testing/tools/libxc/xenctrl.h
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xenctrl.h
+++ xen-4.1.2-testing/tools/libxc/xenctrl.h
@@ -353,6 +353,7 @@ typedef struct xc_dominfo {
unsigned int shutdown_reason; /* only meaningful if shutdown==1 */
unsigned long nr_pages; /* current number, not maximum */
unsigned long nr_shared_pages;
+ unsigned long nr_paged_pages;
unsigned long shared_info_frame;
uint64_t cpu_time;
unsigned long max_memkb;
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2913,6 +2913,9 @@ int p2m_mem_paging_evict(struct p2m_doma
/* Put the page back so it gets freed */
put_page(page);
+ /* Track number of paged gfns */
+ atomic_inc(&p2m->domain->paged_pages);
+
return 0;
}
@@ -2997,6 +3000,8 @@ int p2m_mem_paging_prep(struct p2m_domai
audit_p2m(p2m, 1);
p2m_unlock(p2m);
+ atomic_dec(&p2m->domain->paged_pages);
+
return 0;
}
Index: xen-4.1.2-testing/xen/common/domctl.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/domctl.c
+++ xen-4.1.2-testing/xen/common/domctl.c
@@ -139,6 +139,7 @@ void getdomaininfo(struct domain *d, str
info->tot_pages = d->tot_pages;
info->max_pages = d->max_pages;
info->shr_pages = atomic_read(&d->shr_pages);
+ info->paged_pages = atomic_read(&d->paged_pages);
info->shared_info_frame = mfn_to_gmfn(d, __pa(d->shared_info)>>PAGE_SHIFT);
BUG_ON(SHARED_M2P(info->shared_info_frame));
Index: xen-4.1.2-testing/xen/include/public/domctl.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/domctl.h
+++ xen-4.1.2-testing/xen/include/public/domctl.h
@@ -35,7 +35,7 @@
#include "xen.h"
#include "grant_table.h"
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000007
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000008
/*
* NB. xen_domctl.domain is an IN/OUT parameter for this operation.
@@ -95,6 +95,7 @@ struct xen_domctl_getdomaininfo {
uint64_aligned_t tot_pages;
uint64_aligned_t max_pages;
uint64_aligned_t shr_pages;
+ uint64_aligned_t paged_pages;
uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */
uint64_aligned_t cpu_time;
uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */
Index: xen-4.1.2-testing/xen/include/xen/sched.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/sched.h
+++ xen-4.1.2-testing/xen/include/xen/sched.h
@@ -215,6 +215,7 @@ struct domain
unsigned int tot_pages; /* number of pages currently possesed */
unsigned int max_pages; /* maximum value for tot_pages */
atomic_t shr_pages; /* number of shared pages */
+ atomic_t paged_pages; /* number of paged-out pages */
unsigned int xenheap_pages; /* # pages allocated from Xen heap */
unsigned int max_vcpus;
++++++ 23897-x86-mce-offline-again.patch ++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1317413803 -3600
# Node ID 2215d7d7382617adbe97831fe35752a027917d1d
# Parent d568e2313fd6f055b66a6c3cb2bca6372b77692e
X86 MCE: Prevent malicious guest access broken page again
To avoid recursive mce.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/mce_intel.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -639,6 +639,8 @@ static void intel_memerr_dhandler(int bn
/* This is free page */
if (status & PG_OFFLINE_OFFLINED)
result->result = MCA_RECOVERED;
+ else if (status & PG_OFFLINE_AGAIN)
+ result->result = MCA_NO_ACTION;
else if (status & PG_OFFLINE_PENDING) {
/* This page has owner */
if (status & PG_OFFLINE_OWNED) {
Index: xen-4.1.2-testing/xen/common/page_alloc.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/page_alloc.c
+++ xen-4.1.2-testing/xen/common/page_alloc.c
@@ -38,6 +38,7 @@
#include <xen/tmem.h>
#include <xen/tmem_xen.h>
#include <public/sysctl.h>
+#include <public/sched.h>
#include <asm/page.h>
#include <asm/numa.h>
#include <asm/flushtlb.h>
@@ -708,6 +709,19 @@ int offline_page(unsigned long mfn, int
return -EINVAL;
}
+ /*
+ * NB. When broken page belong to guest, usually hypervisor will
+ * notify the guest to handle the broken page. However, hypervisor
+ * need to prevent malicious guest access the broken page again.
+ * Under such case, hypervisor shutdown guest, preventing recursive mce.
+ */
+ if ( (pg->count_info & PGC_broken) && (owner = page_get_owner(pg)) )
+ {
+ *status = PG_OFFLINE_AGAIN;
+ domain_shutdown(owner, SHUTDOWN_crash);
+ return 0;
+ }
+
spin_lock(&heap_lock);
old_info = mark_page_offline(pg, broken);
Index: xen-4.1.2-testing/xen/include/public/sysctl.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/sysctl.h
+++ xen-4.1.2-testing/xen/include/public/sysctl.h
@@ -399,6 +399,7 @@ struct xen_sysctl_page_offline_op {
#define PG_OFFLINE_OFFLINED (0x1UL << 1)
#define PG_OFFLINE_PENDING (0x1UL << 2)
#define PG_OFFLINE_FAILED (0x1UL << 3)
+#define PG_OFFLINE_AGAIN (0x1UL << 4)
#define PG_ONLINE_FAILED PG_OFFLINE_FAILED
#define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED
++++++ 23900-xzalloc.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1317730526 -7200
# Node ID e09ebf7a31f55bb26c3cce7695a435ed20adf05b
# Parent a99d75671a911f9c0d5d11e0fe88a0a65863cb44
introduce xzalloc() & Co
Rather than having to match a call to one of the xmalloc() flavors with
a subsequent memset(), introduce a zeroing variant of each of those
flavors.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/common/xmalloc_tlsf.c
+++ b/xen/common/xmalloc_tlsf.c
@@ -585,6 +585,13 @@ void *_xmalloc(unsigned long size, unsig
return p;
}
+void *_xzalloc(unsigned long size, unsigned long align)
+{
+ void *p = _xmalloc(size, align);
+
+ return p ? memset(p, 0, size) : p;
+}
+
void xfree(void *p)
{
struct bhdr *b;
--- a/xen/include/acpi/platform/aclinux.h
+++ b/xen/include/acpi/platform/aclinux.h
@@ -77,10 +77,7 @@
#define acpi_thread_id struct vcpu *
#define ACPI_ALLOCATE(a) xmalloc_bytes(a)
-#define ACPI_ALLOCATE_ZEROED(a) ({ \
- void *p = xmalloc_bytes(a); \
- if ( p ) memset(p, 0, a); \
- p; })
+#define ACPI_ALLOCATE_ZEROED(a) xzalloc_bytes(a)
#define ACPI_FREE(a) xfree(a)
#endif /* __ACLINUX_H__ */
--- a/xen/include/xen/xmalloc.h
+++ b/xen/include/xen/xmalloc.h
@@ -8,19 +8,25 @@
/* Allocate space for typed object. */
#define xmalloc(_type) ((_type *)_xmalloc(sizeof(_type), __alignof__(_type)))
+#define xzalloc(_type) ((_type *)_xzalloc(sizeof(_type), __alignof__(_type)))
/* Allocate space for array of typed objects. */
#define xmalloc_array(_type, _num) \
((_type *)_xmalloc_array(sizeof(_type), __alignof__(_type), _num))
+#define xzalloc_array(_type, _num) \
+ ((_type *)_xzalloc_array(sizeof(_type), __alignof__(_type), _num))
/* Allocate untyped storage. */
-#define xmalloc_bytes(_bytes) (_xmalloc(_bytes, SMP_CACHE_BYTES))
+#define xmalloc_bytes(_bytes) _xmalloc(_bytes, SMP_CACHE_BYTES)
+#define xzalloc_bytes(_bytes) _xzalloc(_bytes, SMP_CACHE_BYTES)
/* Free any of the above. */
extern void xfree(void *);
/* Underlying functions */
extern void *_xmalloc(unsigned long size, unsigned long align);
+extern void *_xzalloc(unsigned long size, unsigned long align);
+
static inline void *_xmalloc_array(
unsigned long size, unsigned long align, unsigned long num)
{
@@ -30,6 +36,15 @@ static inline void *_xmalloc_array(
return _xmalloc(size * num, align);
}
+static inline void *_xzalloc_array(
+ unsigned long size, unsigned long align, unsigned long num)
+{
+ /* Check for overflow. */
+ if (size && num > UINT_MAX / size)
+ return NULL;
+ return _xzalloc(size * num, align);
+}
+
/*
* Pooled allocator interface.
*/
++++++ 23904-xenpaging_use_p2m-get_entry_in_p2m_mem_paging_functions.patch
++++++
changeset: 23904:ecab267b85ef
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 06 12:33:17 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: use p2m->get_entry() in p2m_mem_paging functions
Use p2m->get_entry() in the p2m_mem_paging functions. This preserves the
p2m_access type when gfn is updated with set_p2m_entry().
Its also a preparation for locking fixes in a subsequent patch.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2839,10 +2839,11 @@ int p2m_mem_paging_nominate(struct p2m_d
{
struct page_info *page;
p2m_type_t p2mt;
+ p2m_access_t a;
mfn_t mfn;
int ret;
- mfn = gfn_to_mfn(p2m, gfn, &p2mt);
+ mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
/* Check if mfn is valid */
ret = -EINVAL;
@@ -2869,7 +2870,7 @@ int p2m_mem_paging_nominate(struct p2m_d
/* Fix p2m entry */
p2m_lock(p2m);
- set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_out, p2m->default_access);
+ set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_out, a);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
@@ -2883,11 +2884,12 @@ int p2m_mem_paging_evict(struct p2m_doma
{
struct page_info *page;
p2m_type_t p2mt;
+ p2m_access_t a;
mfn_t mfn;
struct domain *d = p2m->domain;
/* Get mfn */
- mfn = gfn_to_mfn(p2m, gfn, &p2mt);
+ mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
if ( unlikely(!mfn_valid(mfn)) )
return -EINVAL;
@@ -2906,7 +2908,7 @@ int p2m_mem_paging_evict(struct p2m_doma
/* Remove mapping from p2m table */
p2m_lock(p2m);
- set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged,
p2m->default_access);
+ set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged, a);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
@@ -2943,6 +2945,7 @@ void p2m_mem_paging_populate(struct p2m_
struct vcpu *v = current;
mem_event_request_t req;
p2m_type_t p2mt;
+ p2m_access_t a;
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
@@ -2955,11 +2958,11 @@ void p2m_mem_paging_populate(struct p2m_
/* Fix p2m mapping */
/* XXX: It seems inefficient to have this here, as it's only needed
* in one case (ept guest accessing paging out page) */
- gfn_to_mfn(p2m, gfn, &p2mt);
+ p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
if ( p2mt == p2m_ram_paged )
{
p2m_lock(p2m);
- set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start,
p2m->default_access);
+ set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start,
a);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
}
@@ -2988,7 +2991,10 @@ void p2m_mem_paging_populate(struct p2m_
int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn)
{
struct page_info *page;
+ p2m_type_t p2mt;
+ p2m_access_t a;
+ p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
/* Get a free page */
page = alloc_domheap_page(p2m->domain, 0);
if ( unlikely(page == NULL) )
@@ -2996,7 +3002,7 @@ int p2m_mem_paging_prep(struct p2m_domai
/* Fix p2m mapping */
p2m_lock(p2m);
- set_p2m_entry(p2m, gfn, page_to_mfn(page), 0, p2m_ram_paging_in,
p2m->default_access);
+ set_p2m_entry(p2m, gfn, page_to_mfn(page), 0, p2m_ram_paging_in, a);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
@@ -3010,6 +3016,7 @@ void p2m_mem_paging_resume(struct p2m_do
struct domain *d = p2m->domain;
mem_event_response_t rsp;
p2m_type_t p2mt;
+ p2m_access_t a;
mfn_t mfn;
/* Pull the response off the ring */
@@ -3018,9 +3025,9 @@ void p2m_mem_paging_resume(struct p2m_do
/* Fix p2m entry if the page was not dropped */
if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
{
- mfn = gfn_to_mfn(p2m, rsp.gfn, &p2mt);
+ mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query);
p2m_lock(p2m);
- set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, p2m->default_access);
+ set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
++++++ 23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch ++++++
changeset: 23905:50ee6be56460
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 06 12:33:17 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: fix locking in p2m_mem_paging functions
As suggested by <hongkaixing@xxxxxxxxxx>, query and adjust the p2mt
under the p2m_lock to prevent races with PoD.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 42 ++++++++++++++++++++++++++----------------
1 file changed, 26 insertions(+), 16 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2843,6 +2843,8 @@ int p2m_mem_paging_nominate(struct p2m_d
mfn_t mfn;
int ret;
+ p2m_lock(p2m);
+
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
/* Check if mfn is valid */
@@ -2869,14 +2871,12 @@ int p2m_mem_paging_nominate(struct p2m_d
goto out;
/* Fix p2m entry */
- p2m_lock(p2m);
set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_out, a);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
-
ret = 0;
out:
+ p2m_unlock(p2m);
return ret;
}
@@ -2887,30 +2887,31 @@ int p2m_mem_paging_evict(struct p2m_doma
p2m_access_t a;
mfn_t mfn;
struct domain *d = p2m->domain;
+ int ret = -EINVAL;
+
+ p2m_lock(p2m);
/* Get mfn */
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
if ( unlikely(!mfn_valid(mfn)) )
- return -EINVAL;
+ goto out;
if ( (p2mt == p2m_ram_paged) || (p2mt == p2m_ram_paging_in) ||
(p2mt == p2m_ram_paging_in_start) )
- return -EINVAL;
+ goto out;
/* Get the page so it doesn't get modified under Xen's feet */
page = mfn_to_page(mfn);
if ( unlikely(!get_page(page, d)) )
- return -EINVAL;
+ goto out;
/* Decrement guest domain's ref count of the page */
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
/* Remove mapping from p2m table */
- p2m_lock(p2m);
set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged, a);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
/* Put the page back so it gets freed */
put_page(page);
@@ -2918,7 +2919,11 @@ int p2m_mem_paging_evict(struct p2m_doma
/* Track number of paged gfns */
atomic_inc(&p2m->domain->paged_pages);
- return 0;
+ ret = 0;
+
+ out:
+ p2m_unlock(p2m);
+ return ret;
}
void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn)
@@ -2958,14 +2963,14 @@ void p2m_mem_paging_populate(struct p2m_
/* Fix p2m mapping */
/* XXX: It seems inefficient to have this here, as it's only needed
* in one case (ept guest accessing paging out page) */
+ p2m_lock(p2m);
p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
if ( p2mt == p2m_ram_paged )
{
- p2m_lock(p2m);
set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start,
a);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
}
+ p2m_unlock(p2m);
/* Pause domain */
if ( v->domain->domain_id == d->domain_id )
@@ -2993,22 +2998,27 @@ int p2m_mem_paging_prep(struct p2m_domai
struct page_info *page;
p2m_type_t p2mt;
p2m_access_t a;
+ int ret = -ENOMEM;
+
+ p2m_lock(p2m);
p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
+
/* Get a free page */
page = alloc_domheap_page(p2m->domain, 0);
if ( unlikely(page == NULL) )
- return -ENOMEM;
+ goto out;
/* Fix p2m mapping */
- p2m_lock(p2m);
set_p2m_entry(p2m, gfn, page_to_mfn(page), 0, p2m_ram_paging_in, a);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
atomic_dec(&p2m->domain->paged_pages);
- return 0;
+ ret = 0;
+ out:
+ p2m_unlock(p2m);
+ return ret;
}
void p2m_mem_paging_resume(struct p2m_domain *p2m)
@@ -3025,8 +3035,8 @@ void p2m_mem_paging_resume(struct p2m_do
/* Fix p2m entry if the page was not dropped */
if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
{
- mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query);
p2m_lock(p2m);
+ mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query);
set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
audit_p2m(p2m, 1);
++++++
23906-xenpaging_remove_confusing_comment_from_p2m_mem_paging_populate.patch
++++++
changeset: 23906:7bf85c3fd9f0
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 06 12:33:17 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: remove confusing comment from p2m_mem_paging_populate
Currently there is no way to avoid the double check of the p2mt
because p2m_mem_paging_populate() is called from many places without
the p2m_lock held. Upcoming changes will move the function into
gfn_to_mfn(), so its interface could be changed and the extra
p2m_lock/get_entry can be removed.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 2 --
1 file changed, 2 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2961,8 +2961,6 @@ void p2m_mem_paging_populate(struct p2m_
req.type = MEM_EVENT_TYPE_PAGING;
/* Fix p2m mapping */
- /* XXX: It seems inefficient to have this here, as it's only needed
- * in one case (ept guest accessing paging out page) */
p2m_lock(p2m);
p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
if ( p2mt == p2m_ram_paged )
++++++ 23908-p2m_query-modify_p2mt_with_p2m_lock_held.patch ++++++
changeset: 23908:88b6e08b8aa8
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 06 14:15:43 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
p2m: query/modify p2mt with p2m_lock held
Query and update the p2mt in set_mmio_p2m_entry, clear_mmio_p2m_entry
and set_shared_p2m_entry with the p2m_lock held.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2751,9 +2751,11 @@ set_mmio_p2m_entry(struct p2m_domain *p2
if ( !paging_mode_translate(p2m->domain) )
return 0;
+ p2m_lock(p2m);
omfn = gfn_to_mfn_query(p2m, gfn, &ot);
if ( p2m_is_grant(ot) )
{
+ p2m_unlock(p2m);
domain_crash(p2m->domain);
return 0;
}
@@ -2764,7 +2766,6 @@ set_mmio_p2m_entry(struct p2m_domain *p2
}
P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
- p2m_lock(p2m);
rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_mmio_direct, p2m->default_access);
audit_p2m(p2m, 1);
p2m_unlock(p2m);
@@ -2785,18 +2786,20 @@ clear_mmio_p2m_entry(struct p2m_domain *
if ( !paging_mode_translate(p2m->domain) )
return 0;
- mfn = gfn_to_mfn(p2m, gfn, &t);
+ p2m_lock(p2m);
+ mfn = gfn_to_mfn_query(p2m, gfn, &t);
/* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
if ( (INVALID_MFN == mfn_x(mfn)) || (t != p2m_mmio_direct) )
{
gdprintk(XENLOG_ERR,
"clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
- return 0;
+ goto out;
}
- p2m_lock(p2m);
rc = set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), 0, p2m_invalid,
p2m->default_access);
audit_p2m(p2m, 1);
+
+out:
p2m_unlock(p2m);
return rc;
@@ -2813,6 +2816,8 @@ set_shared_p2m_entry(struct p2m_domain *
if ( !paging_mode_translate(p2m->domain) )
return 0;
+ if ( need_lock )
+ p2m_lock(p2m);
omfn = gfn_to_mfn_query(p2m, gfn, &ot);
/* At the moment we only allow p2m change if gfn has already been made
* sharable first */
@@ -2822,8 +2827,6 @@ set_shared_p2m_entry(struct p2m_domain *
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn));
- if ( need_lock )
- p2m_lock(p2m);
rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_shared, p2m->default_access);
if ( need_lock )
p2m_unlock(p2m);
++++++ 23925-x86-AMD-ARAT-Fam12.patch ++++++
# HG changeset patch
# User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
# Date 1317976335 -7200
# Node ID 08d6ba4e447d6c13c6dfac5c23e84b73961cb109
# Parent 159be83e5fe9111bb30d8b1f83127f5724d44424
x86/AMD: Do not enable ARAT feature on AMD processors below family 0x12
Determining whether an AMD processor is affected by erratum 400 may
have some corner cases and handling these cases is somewhat complicated.
In the interest of simplicity we won't claim ARAT support on processor
families below 0x12.
Mirrors Linux commit e9cdd343a5e42c43bcda01e609fa23089e026470
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -632,8 +632,11 @@ static void __devinit init_amd(struct cp
}
#endif
- /* As a rule processors have APIC timer running in deep C states */
- if (c->x86 > 0xf && !cpu_has_amd_erratum(c, AMD_ERRATUM_400))
+ /*
+ * Family 0x12 and above processors have APIC timer
+ * running in deep C states.
+ */
+ if (c->x86 > 0x11)
set_bit(X86_FEATURE_ARAT, c->x86_capability);
/* Prevent TSC drift in non single-processor, single-core platforms. */
++++++ 23933-pt-bus2bridge-update.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1318231292 -7200
# Node ID 0b81515b8e982e8012c28e5f8d9e965c63b6503d
# Parent 0c2bfd1f9c6822fbd23af0043f83d93be976323c
passthrough: update bus2bridge mapping as PCI devices get added/removed
This deals with two limitations at once: On device removal, the
mapping did not get updated so far at all, and hotplugged devices as
well as such not discoverable by Xen's initial bus scan (including the
case where a non-zero PCI segment wasn't accessible during Xen boot,
but became accessible after Dom0 validated access information against
ACPI data) wouldn't cause updates to the mapping either.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Kay, Allen M" <allen.m.kay@xxxxxxxxx>
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -63,11 +63,67 @@ static struct pci_dev *alloc_pdev(u8 bus
list_add(&pdev->alldevs_list, &alldevs_list);
spin_lock_init(&pdev->msix_table_lock);
+ /* update bus2bridge */
+ switch ( pdev_type(bus, devfn) )
+ {
+ u8 sec_bus, sub_bus;
+
+ case DEV_TYPE_PCIe_BRIDGE:
+ break;
+
+ case DEV_TYPE_PCIe2PCI_BRIDGE:
+ case DEV_TYPE_LEGACY_PCI_BRIDGE:
+ sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ PCI_SECONDARY_BUS);
+ sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ PCI_SUBORDINATE_BUS);
+
+ spin_lock(&bus2bridge_lock);
+ for ( ; sec_bus <= sub_bus; sec_bus++ )
+ {
+ bus2bridge[sec_bus].map = 1;
+ bus2bridge[sec_bus].bus = bus;
+ bus2bridge[sec_bus].devfn = devfn;
+ }
+ spin_unlock(&bus2bridge_lock);
+ break;
+
+ case DEV_TYPE_PCIe_ENDPOINT:
+ case DEV_TYPE_PCI:
+ break;
+
+ default:
+ printk(XENLOG_WARNING "%s: unknown type: %02x:%02x.%u\n",
+ __func__, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ break;
+ }
+
return pdev;
}
static void free_pdev(struct pci_dev *pdev)
{
+ /* update bus2bridge */
+ switch ( pdev_type(pdev->bus, pdev->devfn) )
+ {
+ u8 dev, func, sec_bus, sub_bus;
+
+ case DEV_TYPE_PCIe2PCI_BRIDGE:
+ case DEV_TYPE_LEGACY_PCI_BRIDGE:
+ dev = PCI_SLOT(pdev->devfn);
+ func = PCI_FUNC(pdev->devfn);
+ sec_bus = pci_conf_read8(pdev->bus, dev, func,
+ PCI_SECONDARY_BUS);
+ sub_bus = pci_conf_read8(pdev->bus, dev, func,
+ PCI_SUBORDINATE_BUS);
+
+ spin_lock(&bus2bridge_lock);
+ for ( ; sec_bus <= sub_bus; sec_bus++ )
+ bus2bridge[sec_bus] = bus2bridge[pdev->bus];
+ spin_unlock(&bus2bridge_lock);
+ break;
+ }
+
list_del(&pdev->alldevs_list);
xfree(pdev);
}
@@ -432,8 +488,6 @@ int __init scan_pci_devices(void)
{
struct pci_dev *pdev;
int bus, dev, func;
- u8 sec_bus, sub_bus;
- int type;
spin_lock(&pcidevs_lock);
for ( bus = 0; bus < 256; bus++ )
@@ -453,41 +507,6 @@ int __init scan_pci_devices(void)
return -ENOMEM;
}
- /* build bus2bridge */
- type = pdev_type(bus, PCI_DEVFN(dev, func));
- switch ( type )
- {
- case DEV_TYPE_PCIe_BRIDGE:
- break;
-
- case DEV_TYPE_PCIe2PCI_BRIDGE:
- case DEV_TYPE_LEGACY_PCI_BRIDGE:
- sec_bus = pci_conf_read8(bus, dev, func,
- PCI_SECONDARY_BUS);
- sub_bus = pci_conf_read8(bus, dev, func,
- PCI_SUBORDINATE_BUS);
-
- spin_lock(&bus2bridge_lock);
- for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ )
- {
- bus2bridge[sec_bus].map = 1;
- bus2bridge[sec_bus].bus = bus;
- bus2bridge[sec_bus].devfn = PCI_DEVFN(dev, func);
- }
- spin_unlock(&bus2bridge_lock);
- break;
-
- case DEV_TYPE_PCIe_ENDPOINT:
- case DEV_TYPE_PCI:
- break;
-
- default:
- printk("%s: unknown type: bdf = %x:%x.%x\n",
- __func__, bus, dev, func);
- spin_unlock(&pcidevs_lock);
- return -EINVAL;
- }
-
if ( !func && !(pci_conf_read8(bus, dev, func,
PCI_HEADER_TYPE) & 0x80) )
break;
++++++ 23943-xenpaging_clear_page_content_after_evict.patch ++++++
changeset: 23943:1185ae04b5aa
user: Olaf Hering <olaf@xxxxxxxxx>
date: Tue Oct 11 10:46:28 2011 +0100
files: tools/xenpaging/xenpaging.c xen/arch/x86/mm/p2m.c
description:
xenpaging: clear page content after evict
If the guest happens to read from the gfn while xenpaging is in the process of
evicting the page, the guest may read zeros instead of actual data.
Also if eviction fails the page content will be corrupted and xenpaging wont
attempt to restore the page.
Remove page scrubbing from pager and do it after successful eviction.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/xenpaging/xenpaging.c | 3 ---
xen/arch/x86/mm/p2m.c | 3 +++
2 files changed, 3 insertions(+), 3 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -455,9 +455,6 @@ static int xenpaging_evict_page(xenpagin
goto out;
}
- /* Clear page */
- memset(page, 0, PAGE_SIZE);
-
munmap(page, PAGE_SIZE);
/* Tell Xen to evict page */
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2916,6 +2916,9 @@ int p2m_mem_paging_evict(struct p2m_doma
set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged, a);
audit_p2m(p2m, 1);
+ /* Clear content before returning the page to Xen */
+ scrub_one_page(page);
+
/* Put the page back so it gets freed */
put_page(page);
++++++ 23949-constify_vcpu_set_affinitys_second_parameter.patch ++++++
changeset: 23949:39df16923958
user: Jan Beulich <jbeulich@xxxxxxxx>
date: Thu Oct 13 10:00:13 2011 +0200
files: xen/arch/x86/cpu/mcheck/vmce.c xen/arch/x86/traps.c
xen/common/schedule.c xen/include/xen/sched.h
description:
constify vcpu_set_affinity()'s second parameter
None of the callers actually make use of the function's returning of
the old affinity through its second parameter, and eliminating this
capability allows some callers to no longer use a local variable here,
reducing their stack footprint significantly when building with large
NR_CPUS.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/cpu/mcheck/vmce.c | 5 +----
xen/arch/x86/traps.c | 10 ++--------
xen/common/schedule.c | 6 ++----
xen/include/xen/sched.h | 2 +-
4 files changed, 6 insertions(+), 17 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/vmce.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/cpu/mcheck/vmce.c
+++ xen-4.1.2-testing/xen/arch/x86/cpu/mcheck/vmce.c
@@ -304,7 +304,6 @@ int vmce_wrmsr(u32 msr, u64 val)
int inject_vmce(struct domain *d)
{
int cpu = smp_processor_id();
- cpumask_t affinity;
/* PV guest and HVM guest have different vMCE# injection methods. */
if ( !test_and_set_bool(d->vcpu[0]->mce_pending) )
@@ -323,11 +322,9 @@ int inject_vmce(struct domain *d)
{
d->vcpu[0]->cpu_affinity_tmp =
d->vcpu[0]->cpu_affinity;
- cpus_clear(affinity);
- cpu_set(cpu, affinity);
mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old %d\n",
cpu, d->vcpu[0]->processor);
- vcpu_set_affinity(d->vcpu[0], &affinity);
+ vcpu_set_affinity(d->vcpu[0], cpumask_of(cpu));
vcpu_kick(d->vcpu[0]);
}
else
Index: xen-4.1.2-testing/xen/arch/x86/traps.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/traps.c
+++ xen-4.1.2-testing/xen/arch/x86/traps.c
@@ -3104,7 +3104,6 @@ static void nmi_mce_softirq(void)
{
int cpu = smp_processor_id();
struct softirq_trap *st = &per_cpu(softirq_trap, cpu);
- cpumask_t affinity;
BUG_ON(st == NULL);
BUG_ON(st->vcpu == NULL);
@@ -3120,9 +3119,7 @@ static void nmi_mce_softirq(void)
* Make sure to wakeup the vcpu on the
* specified processor.
*/
- cpus_clear(affinity);
- cpu_set(st->processor, affinity);
- vcpu_set_affinity(st->vcpu, &affinity);
+ vcpu_set_affinity(st->vcpu, cpumask_of(st->processor));
/* Affinity is restored in the iret hypercall. */
}
@@ -3192,14 +3189,11 @@ void async_exception_cleanup(struct vcpu
!test_and_set_bool(curr->mce_pending) )
{
int cpu = smp_processor_id();
- cpumask_t affinity;
curr->cpu_affinity_tmp = curr->cpu_affinity;
- cpus_clear(affinity);
- cpu_set(cpu, affinity);
printk(XENLOG_DEBUG "MCE: CPU%d set affinity, old %d\n",
cpu, curr->processor);
- vcpu_set_affinity(curr, &affinity);
+ vcpu_set_affinity(curr, cpumask_of(cpu));
}
}
}
Index: xen-4.1.2-testing/xen/common/schedule.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/schedule.c
+++ xen-4.1.2-testing/xen/common/schedule.c
@@ -593,9 +593,9 @@ int cpu_disable_scheduler(unsigned int c
return ret;
}
-int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
+int vcpu_set_affinity(struct vcpu *v, const cpumask_t *affinity)
{
- cpumask_t online_affinity, old_affinity;
+ cpumask_t online_affinity;
cpumask_t *online;
if ( v->domain->is_pinned )
@@ -607,9 +607,7 @@ int vcpu_set_affinity(struct vcpu *v, cp
vcpu_schedule_lock_irq(v);
- old_affinity = v->cpu_affinity;
v->cpu_affinity = *affinity;
- *affinity = old_affinity;
if ( !cpu_isset(v->processor, v->cpu_affinity) )
set_bit(_VPF_migrating, &v->pause_flags);
Index: xen-4.1.2-testing/xen/include/xen/sched.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/sched.h
+++ xen-4.1.2-testing/xen/include/xen/sched.h
@@ -623,7 +623,7 @@ void scheduler_free(struct scheduler *sc
int schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
void vcpu_force_reschedule(struct vcpu *v);
int cpu_disable_scheduler(unsigned int cpu);
-int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
+int vcpu_set_affinity(struct vcpu *v, const cpumask_t *affinity);
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
uint64_t get_cpu_idle_time(unsigned int cpu);
++++++ 23953-xenpaging_handle_evict_failures.patch ++++++
changeset: 23953:eda18b27de6e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 13 12:21:10 2011 +0100
files: tools/xenpaging/xenpaging.c xen/arch/x86/mm.c
xen/arch/x86/mm/p2m.c xen/include/public/mem_event.h
description:
xenpaging: handle evict failures
Evict of a nominated gfn must fail if some other process mapped the
page without checking the p2mt of that gfn first.
Add a check to cancel eviction if the page usage count is not 1.
Handle the possible eviction failure in the page-in paths.
After nominate and before evict, something may check the p2mt and call
populate. Handle this case and let the gfn enter the page-in path. The
gfn may still be connected to a mfn, so there is no need to allocate a
new page in prep.
Adjust do_mmu_update to return -ENOENT only if the gfn has entered the
page-in path and if it is not yet connected to a mfn. Otherwise
linux_privcmd_map_foreign_bulk() may loop forever.
Add MEM_EVENT_FLAG_EVICT_FAIL to inform pager that a page-in request for
a possible not-evicted page was sent. xenpaging does currently not need
that flag because failure to evict a gfn will be caught.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/xenpaging/xenpaging.c | 10 ++++---
xen/arch/x86/mm.c | 8 ++---
xen/arch/x86/mm/p2m.c | 55 +++++++++++++++++++++++++++++------------
xen/include/public/mem_event.h | 1
4 files changed, 50 insertions(+), 24 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -734,10 +734,12 @@ int main(int argc, char *argv[])
}
else
{
- DPRINTF("page already populated (domain = %d; vcpu = %d;"
- " gfn = %"PRIx64"; paused = %d)\n",
- paging->mem_event.domain_id, req.vcpu_id,
- req.gfn, req.flags & MEM_EVENT_FLAG_VCPU_PAUSED);
+ DPRINTF("page %s populated (domain = %d; vcpu = %d;"
+ " gfn = %"PRIx64"; paused = %d; evict_fail = %d)\n",
+ req.flags & MEM_EVENT_FLAG_EVICT_FAIL ? "not" :
"already",
+ paging->mem_event.domain_id, req.vcpu_id, req.gfn,
+ !!(req.flags & MEM_EVENT_FLAG_VCPU_PAUSED) ,
+ !!(req.flags & MEM_EVENT_FLAG_EVICT_FAIL) );
/* Tell Xen to resume the vcpu */
/* XXX: Maybe just check if the vcpu was paused? */
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -3502,7 +3502,7 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l1e_p2mt )
+ else if ( p2m_ram_paging_in_start == l1e_p2mt &&
!mfn_valid(mfn) )
{
rc = -ENOENT;
break;
@@ -3543,7 +3543,7 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l2e_p2mt )
+ else if ( p2m_ram_paging_in_start == l2e_p2mt &&
!mfn_valid(mfn) )
{
rc = -ENOENT;
break;
@@ -3572,7 +3572,7 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l3e_p2mt )
+ else if ( p2m_ram_paging_in_start == l3e_p2mt &&
!mfn_valid(mfn) )
{
rc = -ENOENT;
break;
@@ -3602,7 +3602,7 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l4e_p2mt )
+ else if ( p2m_ram_paging_in_start == l4e_p2mt &&
!mfn_valid(mfn) )
{
rc = -ENOENT;
break;
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2899,15 +2899,24 @@ int p2m_mem_paging_evict(struct p2m_doma
if ( unlikely(!mfn_valid(mfn)) )
goto out;
- if ( (p2mt == p2m_ram_paged) || (p2mt == p2m_ram_paging_in) ||
- (p2mt == p2m_ram_paging_in_start) )
+ /* Allow only nominated pages */
+ if ( p2mt != p2m_ram_paging_out )
goto out;
+ ret = -EBUSY;
/* Get the page so it doesn't get modified under Xen's feet */
page = mfn_to_page(mfn);
if ( unlikely(!get_page(page, d)) )
goto out;
+ /* Check page count and type once more */
+ if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
+ (2 | PGC_allocated) )
+ goto out_put;
+
+ if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_none )
+ goto out_put;
+
/* Decrement guest domain's ref count of the page */
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
@@ -2919,14 +2928,15 @@ int p2m_mem_paging_evict(struct p2m_doma
/* Clear content before returning the page to Xen */
scrub_one_page(page);
- /* Put the page back so it gets freed */
- put_page(page);
-
/* Track number of paged gfns */
atomic_inc(&p2m->domain->paged_pages);
ret = 0;
+ out_put:
+ /* Put the page back so it gets freed */
+ put_page(page);
+
out:
p2m_unlock(p2m);
return ret;
@@ -2957,6 +2967,7 @@ void p2m_mem_paging_populate(struct p2m_
mem_event_request_t req;
p2m_type_t p2mt;
p2m_access_t a;
+ mfn_t mfn;
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
@@ -2968,20 +2979,26 @@ void p2m_mem_paging_populate(struct p2m_
/* Fix p2m mapping */
p2m_lock(p2m);
- p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
- if ( p2mt == p2m_ram_paged )
+ mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
+ /* Allow only nominated or evicted pages to enter page-in path */
+ if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
{
- set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start,
a);
+ /* Evict will fail now, tag this request for pager */
+ if ( p2mt == p2m_ram_paging_out )
+ req.flags |= MEM_EVENT_FLAG_EVICT_FAIL;
+
+ set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a);
audit_p2m(p2m, 1);
}
p2m_unlock(p2m);
- /* Pause domain */
- if ( v->domain->domain_id == d->domain_id )
+ /* Pause domain if request came from guest and gfn has paging type */
+ if ( p2m_is_paging(p2mt) && v->domain->domain_id == d->domain_id )
{
vcpu_pause_nosync(v);
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
+ /* No need to inform pager if the gfn is not in the page-out path */
else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
{
/* gfn is already on its way back and vcpu is not paused */
@@ -3002,19 +3019,25 @@ int p2m_mem_paging_prep(struct p2m_domai
struct page_info *page;
p2m_type_t p2mt;
p2m_access_t a;
+ mfn_t mfn;
int ret = -ENOMEM;
p2m_lock(p2m);
- p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
+ mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
- /* Get a free page */
- page = alloc_domheap_page(p2m->domain, 0);
- if ( unlikely(page == NULL) )
- goto out;
+ /* Allocate a page if the gfn does not have one yet */
+ if ( !mfn_valid(mfn) )
+ {
+ /* Get a free page */
+ page = alloc_domheap_page(p2m->domain, 0);
+ if ( unlikely(page == NULL) )
+ goto out;
+ mfn = page_to_mfn(page);
+ }
/* Fix p2m mapping */
- set_p2m_entry(p2m, gfn, page_to_mfn(page), 0, p2m_ram_paging_in, a);
+ set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in, a);
audit_p2m(p2m, 1);
atomic_dec(&p2m->domain->paged_pages);
Index: xen-4.1.2-testing/xen/include/public/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/mem_event.h
+++ xen-4.1.2-testing/xen/include/public/mem_event.h
@@ -38,6 +38,7 @@
/* Memory event flags */
#define MEM_EVENT_FLAG_VCPU_PAUSED (1 << 0)
#define MEM_EVENT_FLAG_DROP_PAGE (1 << 1)
+#define MEM_EVENT_FLAG_EVICT_FAIL (1 << 2)
/* Reasons for the memory event request */
#define MEM_EVENT_REASON_UNKNOWN 0 /* typical reason */
++++++ 23955-x86-pv-cpuid-xsave.patch ++++++
# HG changeset patch
# User Shan Haitao <haitao.shan@xxxxxxxxx>
# Date 1318517935 -3600
# Node ID bbde1453cbd95d6d36febe9c552f9cfa26b5c49e
# Parent c1bd53fac3d5e5868352894d62dbacd6de0b0e7a
x86: Further fixes for xsave leaf in pv_cpuid().
Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -766,16 +766,18 @@ static void pv_cpuid(struct cpu_user_reg
if ( current->domain->domain_id != 0 )
{
+ unsigned int cpuid_leaf = a, sub_leaf = c;
+
if ( !cpuid_hypervisor_leaves(a, c, &a, &b, &c, &d) )
domain_cpuid(current->domain, a, c, &a, &b, &c, &d);
- switch ( a )
+ switch ( cpuid_leaf )
{
case 0xd:
{
- unsigned int sub_leaf, _eax, _ebx, _ecx, _edx;
+ unsigned int _eax, _ebx, _ecx, _edx;
/* EBX value of main leaf 0 depends on enabled xsave features */
- if ( c == 0 && current->arch.xcr0 )
+ if ( sub_leaf == 0 && current->arch.xcr0 )
{
/* reset EBX to default value first */
b = XSAVE_AREA_MIN_SIZE;
@@ -783,8 +785,8 @@ static void pv_cpuid(struct cpu_user_reg
{
if ( !(current->arch.xcr0 & (1ULL << sub_leaf)) )
continue;
- domain_cpuid(current->domain, a, c, &_eax, &_ebx, &_ecx,
- &_edx);
+ domain_cpuid(current->domain, cpuid_leaf, sub_leaf,
+ &_eax, &_ebx, &_ecx, &_edx);
if ( (_eax + _ebx) > b )
b = _eax + _ebx;
}
++++++ 23957-cpufreq-error-paths.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1318580154 -7200
# Node ID de316831471a8e0f11f615e7bf336dee2ba811e7
# Parent a65693f9fb1250ff4819774a70284693705db9e7
cpufreq: error path fixes
This fixes an actual bug (failure to exit from a function after an
allocation failure), an inconsistency (not removing the cpufreq_dom
list member upon failure), and a latent bug (not clearing the current
governor upon governor initialization failure when there was no old
one; latent because the only current code path leading to this
situation frees the policy upon failure and hence the governor not
getting cleared is benign).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -195,8 +195,10 @@ int cpufreq_add_cpu(unsigned int cpu)
if (!domexist || hw_all) {
policy = xmalloc(struct cpufreq_policy);
- if (!policy)
+ if (!policy) {
ret = -ENOMEM;
+ goto err0;
+ }
memset(policy, 0, sizeof(struct cpufreq_policy));
policy->cpu = cpu;
@@ -206,7 +208,7 @@ int cpufreq_add_cpu(unsigned int cpu)
if (ret) {
xfree(policy);
per_cpu(cpufreq_cpu_policy, cpu) = NULL;
- return ret;
+ goto err0;
}
if (cpufreq_verbose)
printk("CPU %u initialization completed\n", cpu);
@@ -263,7 +265,7 @@ err1:
cpufreq_driver->exit(policy);
xfree(policy);
}
-
+err0:
if (cpus_empty(cpufreq_dom->map)) {
list_del(&cpufreq_dom->node);
xfree(cpufreq_dom);
--- a/xen/drivers/cpufreq/utility.c
+++ b/xen/drivers/cpufreq/utility.c
@@ -462,8 +462,8 @@ int __cpufreq_set_policy(struct cpufreq_
data->governor->name);
/* new governor failed, so re-start old one */
+ data->governor = old_gov;
if (old_gov) {
- data->governor = old_gov;
__cpufreq_governor(data, CPUFREQ_GOV_START);
printk(KERN_WARNING "Still stay at %s governor\n",
data->governor->name);
++++++ 23978-xenpaging_check_p2mt_in_p2m_mem_paging_functions.patch ++++++
changeset: 23978:fd3fa0a85020
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 20 11:25:55 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: check p2mt in p2m_mem_paging functions
Add checks to forward the p2m_ram_paging* state properly during page-in.
Resume can be called several times if several vcpus called populate for
the gfn. Finish resume only once.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -3020,16 +3020,22 @@ int p2m_mem_paging_prep(struct p2m_domai
p2m_type_t p2mt;
p2m_access_t a;
mfn_t mfn;
- int ret = -ENOMEM;
+ int ret;
p2m_lock(p2m);
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
+ ret = -ENOENT;
+ /* Allow only missing pages */
+ if ( p2mt != p2m_ram_paging_in_start )
+ goto out;
+
/* Allocate a page if the gfn does not have one yet */
if ( !mfn_valid(mfn) )
{
/* Get a free page */
+ ret = -ENOMEM;
page = alloc_domheap_page(p2m->domain, 0);
if ( unlikely(page == NULL) )
goto out;
@@ -3064,9 +3070,15 @@ void p2m_mem_paging_resume(struct p2m_do
{
p2m_lock(p2m);
mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query);
- set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
- set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
- audit_p2m(p2m, 1);
+ /* Allow only pages which were prepared properly, or pages which
+ * were nominated but not evicted */
+ if ( mfn_valid(mfn) &&
+ (p2mt == p2m_ram_paging_in || p2mt == p2m_ram_paging_in_start) )
+ {
+ set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
+ set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
+ audit_p2m(p2m, 1);
+ }
p2m_unlock(p2m);
}
++++++ 23979-xenpaging_document_p2m_mem_paging_functions.patch ++++++
changeset: 23979:18306b054799
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 20 11:25:58 2011 +0100
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: document p2m_mem_paging functions
Add some documentation for each of the p2m_mem_paging functions to describe
what they ought to do.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 93 insertions(+)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2838,6 +2838,24 @@ set_shared_p2m_entry(struct p2m_domain *
}
#ifdef __x86_64__
+/**
+ * p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out
+ * @d: guest domain
+ * @gfn: guest page to nominate
+ *
+ * Returns 0 for success or negative errno values if gfn is not pageable.
+ *
+ * p2m_mem_paging_nominate() is called by the pager and checks if a guest page
+ * can be paged out. If the following conditions are met the p2mt will be
+ * changed:
+ * - the gfn is backed by a mfn
+ * - the p2mt of the gfn is pageable
+ * - the mfn is not used for IO
+ * - the mfn has exactly one user and has no special meaning
+ *
+ * Once the p2mt is changed the page is readonly for the guest. On success the
+ * pager can write the page contents to disk and later evict the page.
+ */
int p2m_mem_paging_nominate(struct p2m_domain *p2m, unsigned long gfn)
{
struct page_info *page;
@@ -2883,6 +2901,25 @@ int p2m_mem_paging_nominate(struct p2m_d
return ret;
}
+/**
+ * p2m_mem_paging_evict - Mark a guest page as paged-out
+ * @d: guest domain
+ * @gfn: guest page to evict
+ *
+ * Returns 0 for success or negative errno values if eviction is not possible.
+ *
+ * p2m_mem_paging_evict() is called by the pager and will free a guest page and
+ * release it back to Xen. If the following conditions are met the page can be
+ * freed:
+ * - the gfn is backed by a mfn
+ * - the gfn was nominated
+ * - the mfn has still exactly one user and has no special meaning
+ *
+ * After successful nomination some other process could have mapped the page.
In
+ * this case eviction can not be done. If the gfn was populated before the
pager
+ * could evict it, eviction can not be done either. In this case the gfn is
+ * still backed by a mfn.
+ */
int p2m_mem_paging_evict(struct p2m_domain *p2m, unsigned long gfn)
{
struct page_info *page;
@@ -2942,6 +2979,15 @@ int p2m_mem_paging_evict(struct p2m_doma
return ret;
}
+/**
+ * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page
+ * @d: guest domain
+ * @gfn: guest page to drop
+ *
+ * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was
+ * released by the guest. The pager is supposed to drop its reference of the
+ * gfn.
+ */
void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn)
{
struct vcpu *v = current;
@@ -2961,6 +3007,27 @@ void p2m_mem_paging_drop_page(struct p2m
}
}
+/**
+ * p2m_mem_paging_populate - Tell pager to populete a paged page
+ * @d: guest domain
+ * @gfn: guest page in paging state
+ *
+ * p2m_mem_paging_populate() will notify the pager that a page in any of the
+ * paging states needs to be written back into the guest.
+ * This function needs to be called whenever gfn_to_mfn() returns any of the
p2m
+ * paging types because the gfn may not be backed by a mfn.
+ *
+ * The gfn can be in any of the paging states, but the pager needs only be
+ * notified when the gfn is in the paging-out path (paging_out or paged). This
+ * function may be called more than once from several vcpus. If the vcpu
belongs
+ * to the guest, the vcpu must be stopped and the pager notified that the vcpu
+ * was stopped. The pager needs to handle several requests for the same gfn.
+ *
+ * If the gfn is not in the paging-out path and the vcpu does not belong to the
+ * guest, nothing needs to be done and the function assumes that a request was
+ * already sent to the pager. In this case the caller has to try again until
the
+ * gfn is fully paged in again.
+ */
void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn)
{
struct vcpu *v = current;
@@ -3014,6 +3081,17 @@ void p2m_mem_paging_populate(struct p2m_
mem_event_put_request(d, &d->mem_paging, &req);
}
+/**
+ * p2m_mem_paging_prep - Allocate a new page for the guest
+ * @d: guest domain
+ * @gfn: guest page in paging state
+ *
+ * p2m_mem_paging_prep() will allocate a new page for the guest if the gfn is
+ * not backed by a mfn. It is called by the pager.
+ * It is required that the gfn was already populated. The gfn may already have
a
+ * mfn if populate was called for gfn which was nominated but not evicted. In
+ * this case only the p2mt needs to be forwarded.
+ */
int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn)
{
struct page_info *page;
@@ -3054,6 +3132,21 @@ int p2m_mem_paging_prep(struct p2m_domai
return ret;
}
+/**
+ * p2m_mem_paging_resume - Resume guest gfn and vcpus
+ * @d: guest domain
+ * @gfn: guest page in paging state
+ *
+ * p2m_mem_paging_resume() will forward the p2mt of a gfn to ram_rw and all
+ * waiting vcpus will be unpaused again. It is called by the pager.
+ *
+ * The gfn was previously either evicted and populated, or nominated and
+ * populated. If the page was evicted the p2mt will be p2m_ram_paging_in. If
+ * the page was just nominated the p2mt will be p2m_ram_paging_in_start because
+ * the pager did not call p2m_mem_paging_prep().
+ *
+ * If the gfn was dropped the vcpu needs to be unpaused.
+ */
void p2m_mem_paging_resume(struct p2m_domain *p2m)
{
struct domain *d = p2m->domain;
++++++ 23980-xenpaging_disallow_paging_in_a_PoD_guest.patch ++++++
changeset: 23980:a06609840ff1
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Oct 20 11:25:58 2011 +0100
files: tools/xenpaging/xenpaging.c xen/arch/x86/mm/mem_event.c
description:
xenpaging: disallow paging in a PoD guest
Disallow xenpaging in a PoD guest until coexistance between the two features
is properly implemented.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/xenpaging/xenpaging.c | 3 +++
xen/arch/x86/mm/mem_event.c | 6 ++++++
2 files changed, 9 insertions(+)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -246,6 +246,9 @@ static xenpaging_t *xenpaging_init(domid
case ENODEV:
ERROR("EPT not supported for this guest");
break;
+ case EXDEV:
+ ERROR("xenpaging not supported in a PoD guest");
+ break;
default:
ERROR("Error initialising shared page: %s", strerror(errno));
break;
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -253,6 +253,7 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_PAGING:
{
struct mem_event_domain *med = &d->mem_paging;
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
rc = -ENODEV;
/* Only HAP is supported */
if ( !hap_enabled(d) )
@@ -262,6 +263,11 @@ int mem_event_domctl(struct domain *d, x
if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
break;
+ rc = -EXDEV;
+ /* Disallow paging in a PoD guest */
+ if ( p2m->pod.entry_count )
+ break;
+
switch( mec->op )
{
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE:
++++++ 23993-x86-microcode-amd-fix-23871.patch ++++++
References: bnc#725169
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxxxx>
# Date 1319475620 -3600
# Node ID e458dfc35b8d3be04a9b72c30ff97163e27a7314
# Parent ffe861c1d5dfa8f4485052e5600e06124105033f
x86/ucode-amd: fix regression from c/s 23871:503ee256fecf
microcode_fits() must return distinct values for the success and
no-fit-but-no-error cases, so the caller can react accordingly. Make
it return 1 in the success case, and adjust its single caller.
Also remove an impossible code path - install_equiv_cpu_table(), which
gets called prior to microcode_fits(), never leaves equiv_cpu_table
being NULL without also returning an error.
Note that this is still awaiting testing on a system where the
regression was actually observed (which also requires a new enough
microcode_ctl package). Note also that this will need to be
backported to 4.0 and 4.1 (or the broken c/s that got backported
there reverted).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
Index: xen-4.1.2-testing/xen/arch/x86/microcode_amd.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/microcode_amd.c
+++ xen-4.1.2-testing/xen/arch/x86/microcode_amd.c
@@ -76,14 +76,6 @@ static int microcode_fits(void *mc, int
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
- if ( equiv_cpu_table == NULL )
- {
- printk(KERN_INFO "microcode: CPU%d microcode update with "
- "version 0x%x (current=0x%x)\n",
- cpu, mc_header->patch_id, uci->cpu_sig.rev);
- goto out;
- }
-
current_cpu_id = cpuid_eax(0x00000001);
for ( i = 0; equiv_cpu_table[i].installed_cpu != 0; i++ )
@@ -96,7 +88,7 @@ static int microcode_fits(void *mc, int
}
if ( !equiv_cpu_id )
- return 0;
+ return 0;
if ( (mc_header->processor_rev_id) != equiv_cpu_id )
{
@@ -113,8 +105,7 @@ static int microcode_fits(void *mc, int
"update with version 0x%x (current=0x%x)\n",
cpu, mc_header->patch_id, uci->cpu_sig.rev);
-out:
- return 0;
+ return 1;
}
static int apply_microcode(int cpu)
@@ -289,7 +280,7 @@ static int cpu_request_microcode(int cpu
while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) ==
0)
{
error = microcode_fits(mc, cpu);
- if (error != 0)
+ if (error <= 0)
continue;
error = apply_microcode(cpu);
++++++ 24104-waitqueue_Double_size_of_x86_shadow_stack..patch ++++++
changeset: 24104:4daa4ad90f12
user: Keir Fraser <keir@xxxxxxx>
date: Tue Nov 08 19:33:46 2011 +0000
files: xen/common/wait.c
description:
waitqueue: Double size of x86 shadow stack.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -33,7 +33,7 @@ struct waitqueue_vcpu {
* hypervisor context before sleeping (descheduling), setjmp/longjmp-style.
*/
void *esp;
- char stack[1500];
+ char stack[3000];
#endif
};
++++++ 24105-xenpaging_compare_domain_pointer_in_p2m_mem_paging_populate.patch
++++++
changeset: 24105:89efd82620ec
user: Olaf Hering <olaf@xxxxxxxxx>
date: Tue Nov 08 19:35:01 2011 +0000
files: xen/arch/x86/mm/p2m.c
description:
xenpaging: compare domain pointer in p2m_mem_paging_populate
Compare just the domain pointer instead of the domain_id number.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/mm/p2m.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -3060,7 +3060,7 @@ void p2m_mem_paging_populate(struct p2m_
p2m_unlock(p2m);
/* Pause domain if request came from guest and gfn has paging type */
- if ( p2m_is_paging(p2mt) && v->domain->domain_id == d->domain_id )
+ if ( p2m_is_paging(p2mt) && v->domain == d )
{
vcpu_pause_nosync(v);
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
++++++ 24106-mem_event_check_capabilities_only_once.patch ++++++
changeset: 24106:2af5bfbc9fde
user: Olaf Hering <olaf@xxxxxxxxx>
date: Tue Nov 08 19:35:42 2011 +0000
files: xen/arch/x86/mm/mem_event.c
description:
mem_event: check capabilities only once
It is not required to check the system capabilities during every domctl.
Rearrange the code to check them only once.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/mm/mem_event.c | 54 ++++++++++++++++++++++++--------------------
1 file changed, 30 insertions(+), 24 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -253,32 +253,35 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_PAGING:
{
struct mem_event_domain *med = &d->mem_paging;
- struct p2m_domain *p2m = p2m_get_hostp2m(d);
- rc = -ENODEV;
- /* Only HAP is supported */
- if ( !hap_enabled(d) )
- break;
-
- /* Currently only EPT is supported */
- if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
- break;
-
- rc = -EXDEV;
- /* Disallow paging in a PoD guest */
- if ( p2m->pod.entry_count )
- break;
+ rc = -EINVAL;
switch( mec->op )
{
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE:
{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ rc = -ENODEV;
+ /* Only HAP is supported */
+ if ( !hap_enabled(d) )
+ break;
+
+ /* Currently only EPT is supported */
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+ break;
+
+ rc = -EXDEV;
+ /* Disallow paging in a PoD guest */
+ if ( p2m->pod.entry_count )
+ break;
+
rc = mem_event_enable(d, mec, med);
}
break;
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE:
{
- rc = mem_event_disable(med);
+ if ( med->ring_page )
+ rc = mem_event_disable(med);
}
break;
@@ -295,26 +298,29 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS:
{
struct mem_event_domain *med = &d->mem_access;
- rc = -ENODEV;
- /* Only HAP is supported */
- if ( !hap_enabled(d) )
- break;
-
- /* Currently only EPT is supported */
- if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
- break;
+ rc = -EINVAL;
switch( mec->op )
{
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE:
{
+ rc = -ENODEV;
+ /* Only HAP is supported */
+ if ( !hap_enabled(d) )
+ break;
+
+ /* Currently only EPT is supported */
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+ break;
+
rc = mem_event_enable(d, mec, med);
}
break;
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
{
- rc = mem_event_disable(&d->mem_access);
+ if ( med->ring_page )
+ rc = mem_event_disable(&d->mem_access);
}
break;
++++++ 24116-x86-continuation-cancel.patch ++++++
# HG changeset patch
# User Jean Guyader <jean.guyader@xxxxxxxxxxxxx>
# Date 1321002862 -3600
# Node ID a095cf28f2b6eeb8f5873c18eb18d4d7e5544e2c
# Parent 6534da595d695a4f2af12a64e46fb06219a0e4bc
Hypercall continuation cancelation in compat mode for XENMEM_get/set_pod_target
If copy_to_guest failed in the compat code after a continuation as been
done in the native code we need to cancel it so we won't reexecute the
hypercall but return from the hypercall with the appropriate error.
Signed-off-by: Jean Guyader <jean.guyader@xxxxxxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1585,6 +1585,24 @@ void sync_vcpu_execstate(struct vcpu *v)
__arg; \
})
+void hypercall_cancel_continuation(void)
+{
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ struct mc_state *mcs = ¤t->mc_state;
+
+ if ( test_bit(_MCSF_in_multicall, &mcs->flags) )
+ {
+ __clear_bit(_MCSF_call_preempted, &mcs->flags);
+ }
+ else
+ {
+ if ( !is_hvm_vcpu(current) )
+ regs->eip += 2; /* skip re-execute 'syscall' / 'int $xx' */
+ else
+ current->arch.hvm_vcpu.hcall_preempted = 0;
+ }
+}
+
unsigned long hypercall_create_continuation(
unsigned int op, const char *format, ...)
{
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -133,7 +133,11 @@ int compat_arch_memory_op(int op, XEN_GU
XLAT_pod_target(&cmp, nat);
if ( copy_to_guest(arg, &cmp, 1) )
+ {
+ if ( rc == __HYPERVISOR_memory_op )
+ hypercall_cancel_continuation();
rc = -EFAULT;
+ }
break;
}
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -560,6 +560,7 @@ void startup_cpu_idle_loop(void);
*/
unsigned long hypercall_create_continuation(
unsigned int op, const char *format, ...);
+void hypercall_cancel_continuation(void);
#define hypercall_preempt_check() (unlikely( \
softirq_pending(smp_processor_id()) | \
++++++ 24123-x86-cpuidle-quiesce.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321017916 -3600
# Node ID 8b08b2166aa82e7df0b1fa620ed57078810f8c12
# Parent 4699decb8424a00447c466205be3cb4d0fb95a76
x86: quiesce cpuidle code
So far these messages got pointlessly (as the code in other places
assumes symmetric configuration) emitted once per CPU. Hide the debug
one behind opt_cpu_info, and issue the info one just once (if the code
gets adjusted to support assymtric configurations, this would need to
be revisited, but ideally without producing per-CPU messages again).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -659,6 +659,8 @@ static int acpi_processor_ffh_cstate_pro
unsigned int edx_part;
unsigned int cstate_type; /* C-state type and not ACPI C-state type */
unsigned int num_cstate_subtype;
+ int ret = 0;
+ static unsigned long printed;
if ( c->cpuid_level < CPUID_MWAIT_LEAF )
{
@@ -667,8 +669,9 @@ static int acpi_processor_ffh_cstate_pro
}
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
- printk(XENLOG_DEBUG "cpuid.MWAIT[.eax=%x, .ebx=%x, .ecx=%x, .edx=%x]\n",
- eax, ebx, ecx, edx);
+ if ( opt_cpu_info )
+ printk(XENLOG_DEBUG "cpuid.MWAIT[eax=%x ebx=%x ecx=%x edx=%x]\n",
+ eax, ebx, ecx, edx);
/* Check whether this particular cx_type (in CST) is supported or not */
cstate_type = (cx->reg.address >> MWAIT_SUBSTATE_SIZE) + 1;
@@ -676,15 +679,16 @@ static int acpi_processor_ffh_cstate_pro
num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
if ( num_cstate_subtype < (cx->reg.address & MWAIT_SUBSTATE_MASK) )
- return -EFAULT;
-
+ ret = -ERANGE;
/* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
- if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
- !(ecx & CPUID5_ECX_INTERRUPT_BREAK) )
- return -EFAULT;
-
- printk(XENLOG_INFO "Monitor-Mwait will be used to enter C-%d state\n",
cx->type);
- return 0;
+ else if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+ !(ecx & CPUID5_ECX_INTERRUPT_BREAK) )
+ ret = -ENODEV;
+ else if ( opt_cpu_info || cx->type >= BITS_PER_LONG ||
+ !test_and_set_bit(cx->type, &printed) )
+ printk(XENLOG_INFO "Monitor-Mwait will be used to enter C%d state\n",
+ cx->type);
+ return ret;
}
/*
++++++ 24124-x86-microcode-amd-quiesce.patch ++++++
References: bnc#719700
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321018008 -3600
# Node ID 69f8b6f4c29cb2fb2d11e27c391090f543e6b393
# Parent 8b08b2166aa82e7df0b1fa620ed57078810f8c12
x86/amd-ucode: further turn down verbosity
Turn up the log level on various (mostly debug-only) messages.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/microcode_amd.c
+++ b/xen/arch/x86/microcode_amd.c
@@ -59,7 +59,7 @@ static int collect_cpu_info(int cpu, str
rdmsrl(MSR_AMD_PATCHLEVEL, csig->rev);
- printk(KERN_INFO "microcode: collect_cpu_info: patch_id=0x%x\n",
+ printk(KERN_DEBUG "microcode: collect_cpu_info: patch_id=0x%x\n",
csig->rev);
return 0;
@@ -92,7 +92,7 @@ static int microcode_fits(void *mc, int
if ( (mc_header->processor_rev_id) != equiv_cpu_id )
{
- printk(KERN_INFO "microcode: CPU%d patch does not match "
+ printk(KERN_DEBUG "microcode: CPU%d patch does not match "
"(patch is %x, cpu base id is %x) \n",
cpu, mc_header->processor_rev_id, equiv_cpu_id);
return -EINVAL;
@@ -101,7 +101,7 @@ static int microcode_fits(void *mc, int
if ( mc_header->patch_id <= uci->cpu_sig.rev )
return -EINVAL;
- printk(KERN_INFO "microcode: CPU%d found a matching microcode "
+ printk(KERN_DEBUG "microcode: CPU%d found a matching microcode "
"update with version 0x%x (current=0x%x)\n",
cpu, mc_header->patch_id, uci->cpu_sig.rev);
@@ -139,8 +139,7 @@ static int apply_microcode(int cpu)
return -EIO;
}
- printk("microcode: CPU%d updated from revision "
- "0x%x to 0x%x \n",
+ printk(KERN_INFO "microcode: CPU%d updated from revision %#x to %#x\n",
cpu, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
uci->cpu_sig.rev = rev;
@@ -173,7 +172,7 @@ static int get_next_ucode_from_buffer_am
total_size = (unsigned long) (bufp[off+4] + (bufp[off+5] << 8));
- printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n",
+ printk(KERN_DEBUG "microcode: size %lu, total_size %lu, offset %ld\n",
(unsigned long)size, total_size, off);
if ( (off + total_size) > size )
++++++ 24137-revert-23666.patch ++++++
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
# Date 1321035275 0
# Node ID 0844b17df7a9dd885e98e505f14fc99c1951b483
# Parent 3622d7fae14dfc2d00f378738ace3b65ee65b6cc
Revert c/s 23666:b96f8bdcaa15 KEXEC: disconnect all PCI devices from the PCI
bus on crash
It turns out that this causes all mannor of problems on certain
motherboards (so far with no pattern I can discern)
Problems include:
* Hanging forever checking hlt instruction.
* Panics when trying to change switch root device
* Drivers hanging when trying to check for interrupts.
From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -28,7 +28,6 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <xen/iommu.h>
-#include <xen/pci.h>
#include <asm/hpet.h>
static atomic_t waiting_for_crash_ipi;
@@ -83,8 +82,6 @@ static void nmi_shootdown_cpus(void)
msecs--;
}
- disconnect_pci_devices();
-
/* Crash shutdown any IOMMU functionality as the crashdump kernel is not
* happy when booting if interrupt/dma remapping is still enabled */
iommu_crash_shutdown();
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -518,25 +518,6 @@ int __init scan_pci_devices(void)
return 0;
}
-/* Disconnect all PCI devices from the PCI buses. From the PCI spec:
- * "When a 0 is written to [the COMMAND] register, the device is
- * logically disconnected from the PCI bus for all accesses except
- * configuration accesses. All devices are required to support
- * this base level of functionality."
- */
-void disconnect_pci_devices(void)
-{
- struct pci_dev *pdev;
-
- spin_lock(&pcidevs_lock);
-
- list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
- pci_conf_write16(pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn), PCI_COMMAND, 0);
-
- spin_unlock(&pcidevs_lock);
-}
-
#ifdef SUPPORT_MSI_REMAPPING
static void dump_pci_devices(unsigned char ch)
{
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -94,8 +94,6 @@ int pci_remove_device(u8 bus, u8 devfn);
struct pci_dev *pci_get_pdev(int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);
-void disconnect_pci_devices(void);
-
uint8_t pci_conf_read8(
unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);
uint16_t pci_conf_read16(
++++++ 24138-xenpaging_munmap_all_pages_after_page-in.patch ++++++
changeset: 24138:e2cc58b85b6e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Mon Nov 14 17:49:14 2011 +0000
files: tools/xenpaging/pagein.c
description:
xenpaging: munmap all pages after page-in
Do munmap() on all mapped pages, not just the first one. Without this
change the gfns backing the remaining pages can not be paged out again
because the page count does not go down to 1. This change was missing
from changeset 23827:d1d6abc1db20.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
tools/xenpaging/pagein.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/xenpaging/pagein.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/pagein.c
+++ xen-4.1.2-testing/tools/xenpaging/pagein.c
@@ -44,7 +44,7 @@ static void *page_in(void *arg)
/* Ignore errors */
page = xc_map_foreign_pages(pia->xch, pia->dom, PROT_READ, gfns, num);
if (page)
- munmap(page, PAGE_SIZE);
+ munmap(page, PAGE_SIZE * num);
}
page_in_possible = 0;
pthread_exit(NULL);
++++++ 24144-cpufreq-turbo-crash.patch ++++++
# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxx>
# Date 1321363478 -3600
# Node ID cd3ef25f207a3925f1f8650c227e24f839da13ad
# Parent 848049b14ec7fbd28bd4cd756e01609698b60c7a
xen: avoid crash enabling turbo mode
On a system which has not had P-state information pushed down into the
hypervisor running "xenpm enable-turbo-mode" will reliably crash the host.
(XEN) PM OP 38 on CPU0
(XEN) ----[ Xen-4.2-unstable x86_64 debug=y Not tainted ]----
(XEN) CPU: 0
(XEN) RIP: e008:[<ffff82c48013ceed>] cpufreq_enable_turbo+0x1d/0x29
(XEN) RFLAGS: 0000000000010297 CONTEXT: hypervisor
(XEN) rax: 0000000000000000 rbx: ffff82c48029fe40 rcx: 0000000000000000
(XEN) rdx: 0000000000000000 rsi: 000000000000000a rdi: 0000000000000000
(XEN) rbp: ffff82c48029fd08 rsp: ffff82c48029fd08 r8: 0000000000000004
(XEN) r9: 0000000000000000 r10: 00000000fffffffd r11: ffff82c480218f20
(XEN) r12: ffff830106e720b0 r13: 0000000000000000 r14: ffff82c4802bff80
(XEN) r15: ffff82c48025c0e4 cr0: 000000008005003b cr4: 00000000000026f0
(XEN) cr3: 000000011f459000 cr2: 0000000000000051
(XEN) ds: 007b es: 007b fs: 00d8 gs: 0033 ss: 0000 cs: e008
(XEN) Xen stack trace from rsp=ffff82c48029fd08:
(XEN) ffff82c48029fdb8 ffff82c48014c427 ffff82c48029fd98 ffff82c48016b2d6
(XEN) ffff82c48029fdb8 ffff82c48029fd60 00000000001196c5 0000000116bbe025
(XEN) 0000000116bbe025 ffff8301196c5338 ffff82f6022d77c0 0000000000000000
(XEN) ffff82f60205edf0 0000000000000000 0000000000000000 ffff8300dffba000
(XEN) ffff82c48029fdb8 ffff82c48029ff18 0000000008050004 0000000000000000
(XEN) ffff82c4802bff80 ffff82c48025c0e4 ffff82c48029fef8 ffff82c480124fd8
(XEN) 0000000000000000 ffff83011f48c000 0000000116bbe025 0000000000000025
(XEN) ffff82c48029fe28 0000000080167722 ffff82c48029ff08 ffff83011f48c000
(XEN) ffff82f60232d8a0 ffff8300dffba000 ffff83011f48c000 ffff82f60232d8a0
(XEN) ffff82c48029fed8 ffff82c48017296a 000000080000000c 0000000000000026
(XEN) bfb338b000000000 bfb33874bfb33868 b78988f800000000 0000008800000000
(XEN) b787a6e0b78533a0 ffffffff00000001 080487aeb7897ff4 bfb3389000000001
(XEN) b7898ab0b7889626 0000000100000000 0000000000000001 0804867800000001
(XEN) 000000000804e998 00000000b78533a0 bfb33e70bfb33a7c 0000000000000000
(XEN) 0000000000000000 ffff8300dffba000 0000000000000000 0000000000000000
(XEN) 0000000000000000 0000000000000000 00007d3b7fd600c7 ffff82c48021511e
(XEN) 00000000c1002467 0000000000000023 0000000000000000 0000000000000000
(XEN) 0000000000000000 0000000000000000 00000000dbf5bef8 0000000008050004
(XEN) 0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN) 0000000000000023 00000000bfb33874 00000000bfb33868 0000000000000000
(XEN) Xen call trace:
(XEN) [<ffff82c48013ceed>] cpufreq_enable_turbo+0x1d/0x29
(XEN) [<ffff82c48014c427>] do_pm_op+0x884/0x8e7
(XEN) [<ffff82c480124fd8>] do_sysctl+0x6d8/0x9f0
(XEN) [<ffff82c48021511e>] compat_hypercall+0xae/0x107
(XEN)
(XEN) Pagetable walk from 0000000000000051:
(XEN) L4[0x000] = 0000000116dbc027 000000000001bd22
(XEN) L3[0x000] = 0000000119ba8027 000000000001eb36
(XEN) L2[0x000] = 0000000000000000 ffffffffffffffff
(XEN)
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) FATAL PAGE FAULT
(XEN) [error_code=0000]
(XEN) Faulting linear address: 0000000000000051
(XEN) ****************************************
(XEN)
Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/drivers/cpufreq/utility.c
+++ b/xen/drivers/cpufreq/utility.c
@@ -400,7 +400,7 @@ void cpufreq_enable_turbo(int cpuid)
struct cpufreq_policy *policy;
policy = per_cpu(cpufreq_cpu_policy, cpuid);
- if (policy->turbo != CPUFREQ_TURBO_UNSUPPORTED)
+ if (policy && policy->turbo != CPUFREQ_TURBO_UNSUPPORTED)
policy->turbo = CPUFREQ_TURBO_ENABLED;
}
@@ -409,7 +409,7 @@ void cpufreq_disable_turbo(int cpuid)
struct cpufreq_policy *policy;
policy = per_cpu(cpufreq_cpu_policy, cpuid);
- if (policy->turbo != CPUFREQ_TURBO_UNSUPPORTED)
+ if (policy && policy->turbo != CPUFREQ_TURBO_UNSUPPORTED)
policy->turbo = CPUFREQ_TURBO_DISABLED;
}
@@ -418,7 +418,7 @@ int cpufreq_get_turbo_status(int cpuid)
struct cpufreq_policy *policy;
policy = per_cpu(cpufreq_cpu_policy, cpuid);
- return policy->turbo;
+ return policy && policy->turbo;
}
/*********************************************************************
++++++ 24148-shadow-pgt-dying-op-performance.patch ++++++
References: bnc#726332
# HG changeset patch
# User Gianluca Guida <gianluca.guida@xxxxxxxxxx>
# Date 1321456773 0
# Node ID 3ecc8fef428138e4304ef11b47498981e63626b3
# Parent a5f1d3b1612bb48e1cb09dc66b0909e3613dc855
[shadow] Disable higher level pagetables early unshadow only when the "process
dying" hypercall is used.
This patch fixes a performance problem in fully virtualized guests.
Signed-off-by: Gianluca Guida <gianluca.guida@xxxxxxxxxx>
Tested-by: Jan Beulich <jbeulich@xxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -2743,8 +2743,9 @@ static inline void check_for_early_unsha
|| ( !v->domain->arch.paging.shadow.pagetable_dying_op
&& v->arch.paging.shadow.last_emulated_mfn_for_unshadow ==
mfn_x(gmfn) ) )
&& sh_mfn_is_a_page_table(gmfn)
- && !(mfn_to_page(gmfn)->shadow_flags
- & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) )
+ && (!v->domain->arch.paging.shadow.pagetable_dying_op ||
+ !(mfn_to_page(gmfn)->shadow_flags
+ & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64))) )
{
perfc_incr(shadow_early_unshadow);
sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
++++++ 24153-x86-emul-feature-checks.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321459471 0
# Node ID 644ca5d3ec435f3372ce88a4de86909bd4033819
# Parent 1cbb3c1dfb3203f5344a6c1c52507b9e75af6742
x86/emulator: add feature checks for newer instructions
Certain instructions were introduced only after the i686 or original
x86-64 architecture, so we should not try to emulate them if the guest
is not seeing the respective feature enabled (or, worse, if the
underlying hardware doesn't support them). This affects fisttp,
movnti, and cmpxchg16b.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -955,6 +955,47 @@ in_protmode(
return !(in_realmode(ctxt, ops) || (ctxt->regs->eflags & EFLG_VM));
}
+#define EAX 0
+#define ECX 1
+#define EDX 2
+#define EBX 3
+
+static bool_t vcpu_has(
+ unsigned int eax,
+ unsigned int reg,
+ unsigned int bit,
+ struct x86_emulate_ctxt *ctxt,
+ const struct x86_emulate_ops *ops)
+{
+ unsigned int ebx = 0, ecx = 0, edx = 0;
+ int rc;
+
+ fail_if(!ops->cpuid);
+ rc = ops->cpuid(&eax, &ebx, &ecx, &edx, ctxt);
+ if ( rc == X86EMUL_OKAY )
+ {
+ switch ( reg )
+ {
+ case EAX: reg = eax; break;
+ case EBX: reg = ebx; break;
+ case ECX: reg = ecx; break;
+ case EDX: reg = edx; break;
+ default: BUG();
+ }
+ if ( !(reg & (1U << bit)) )
+ rc = ~X86EMUL_OKAY;
+ }
+
+ done:
+ return rc == X86EMUL_OKAY;
+}
+
+#define vcpu_must_have(leaf, reg, bit) \
+ generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
+#define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
+#define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0)
+#define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
+
static int
in_longmode(
struct x86_emulate_ctxt *ctxt,
@@ -2738,6 +2779,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fildl", src.val);
break;
case 1: /* fisttp m32i */
+ vcpu_must_have_sse3();
ea.bytes = 4;
dst = ea;
dst.type = OP_MEM;
@@ -2846,6 +2888,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fldl", src.val);
break;
case 1: /* fisttp m64i */
+ vcpu_must_have_sse3();
ea.bytes = 8;
dst = ea;
dst.type = OP_MEM;
@@ -2953,6 +2996,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fild", src.val);
break;
case 1: /* fisttp m16i */
+ vcpu_must_have_sse3();
ea.bytes = 2;
dst = ea;
dst.type = OP_MEM;
@@ -4141,6 +4185,7 @@ x86_emulate(
case 0xc3: /* movnti */
/* Ignore the non-temporal hint for now. */
+ vcpu_must_have_sse2();
generate_exception_if(dst.bytes <= 2, EXC_UD, -1);
dst.val = src.val;
break;
@@ -4151,6 +4196,8 @@ x86_emulate(
generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+ if ( op_bytes == 8 )
+ vcpu_must_have_cx16();
op_bytes *= 2;
/* Get actual old value. */
++++++ 24155-x86-ioapic-EOI-after-migration.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321604321 -3600
# Node ID 0d50e704834fb53c6c86b8b0badd19d88e73c4ed
# Parent dbdc840f8f62db58321b5009e5e0f7833066386f
x86/IO-APIC: refine EOI-ing of migrating level interrupts
Rather than going through all IO-APICs and calling io_apic_eoi_vector()
for the vector in question, just use eoi_IO_APIC_irq().
This in turn allows to eliminate quite a bit of other code.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Tested-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -69,10 +69,6 @@ int __read_mostly nr_ioapics;
#define ioapic_has_eoi_reg(apic) (mp_ioapics[(apic)].mpc_apicver >= 0x20)
-#define io_apic_eoi_vector(apic, vector) io_apic_eoi((apic), (vector), -1)
-#define io_apic_eoi_pin(apic, pin) io_apic_eoi((apic), -1, (pin))
-
-
/*
* This is performance-critical, we want to do it O(1)
*
@@ -208,21 +204,18 @@ static void ioapic_write_entry(int apic,
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-/* EOI an IO-APIC entry. One of vector or pin may be -1, indicating that
- * it should be worked out using the other. This function expect that the
- * ioapic_lock is taken, and interrupts are disabled (or there is a good reason
- * not to), and that if both pin and vector are passed, that they refer to the
+/* EOI an IO-APIC entry. Vector may be zero, indicating that it should be
+ * worked out using the pin. This function expects that the ioapic_lock is
+ * being held, and interrupts are disabled (or there is a good reason not
+ * to), and that if both pin and vector are passed, that they refer to the
* same redirection entry in the IO-APIC. */
static void __io_apic_eoi(unsigned int apic, unsigned int vector, unsigned int
pin)
{
- /* Ensure some useful information is passed in */
- BUG_ON( (vector == -1 && pin == -1) );
-
/* Prefer the use of the EOI register if available */
if ( ioapic_has_eoi_reg(apic) )
{
/* If vector is unknown, read it from the IO-APIC */
- if ( vector == -1 )
+ if ( !vector )
vector = __ioapic_read_entry(apic, pin, TRUE).vector;
*(IO_APIC_BASE(apic)+16) = vector;
@@ -234,42 +227,6 @@ static void __io_apic_eoi(unsigned int a
struct IO_APIC_route_entry entry;
bool_t need_to_unmask = 0;
- /* If pin is unknown, search for it */
- if ( pin == -1 )
- {
- unsigned int p;
- for ( p = 0; p < nr_ioapic_registers[apic]; ++p )
- {
- entry = __ioapic_read_entry(apic, p, TRUE);
- if ( entry.vector == vector )
- {
- pin = p;
- /* break; */
-
- /* Here should be a break out of the loop, but at the
- * Xen code doesn't actually prevent multiple IO-APIC
- * entries being assigned the same vector, so EOI all
- * pins which have the correct vector.
- *
- * Remove the following code when the above assertion
- * is fulfilled. */
- __io_apic_eoi(apic, vector, p);
- }
- }
-
- /* If search fails, nothing to do */
-
- /* if ( pin == -1 ) */
-
- /* Because the loop wasn't broken out of (see comment above),
- * all relevant pins have been EOI, so we can always return.
- *
- * Re-instate the if statement above when the Xen logic has been
- * fixed.*/
-
- return;
- }
-
entry = __ioapic_read_entry(apic, pin, TRUE);
if ( ! entry.mask )
@@ -296,17 +253,6 @@ static void __io_apic_eoi(unsigned int a
}
}
-/* EOI an IO-APIC entry. One of vector or pin may be -1, indicating that
- * it should be worked out using the other. This function disables interrupts
- * and takes the ioapic_lock */
-static void io_apic_eoi(unsigned int apic, unsigned int vector, unsigned int
pin)
-{
- unsigned int flags;
- spin_lock_irqsave(&ioapic_lock, flags);
- __io_apic_eoi(apic, vector, pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
/*
* Saves all the IO-APIC RTE's
*/
@@ -1830,11 +1776,7 @@ static void end_level_ioapic_irq (unsign
/* Manually EOI the old vector if we are moving to the new */
if ( vector && i != vector )
- {
- int ioapic;
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
- io_apic_eoi_vector(ioapic, i);
- }
+ eoi_IO_APIC_irq(irq);
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
++++++ 24156-x86-ioapic-shared-vectors.patch ++++++
References: bnc#713503
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321604484 -3600
# Node ID f29b5bd6e25fd78409baa5461914c67065f7579f
# Parent 0d50e704834fb53c6c86b8b0badd19d88e73c4ed
x86/IRQ: prevent vector sharing within IO-APICs
Following the prevention of vector sharing for MSIs, this change
enforces the same within IO-APICs: Pin based interrupts use the IO-APIC
as their identifying device under the AMD IOMMU (and just like for
MSIs, only the identifying device is used to remap interrupts here,
with no regard to an interrupt's destination).
Additionally, LAPIC initiated EOIs (for level triggered interrupts) too
use only the vector for identifying which interrupts to end. While this
generally causes no significant problem (at worst an interrupt would be
re-raised without a new interrupt event actually having occurred), it
still seems better to avoid the situation.
For this second aspect, a distinction is being made between the
traditional and the directed-EOI cases: In the former, vectors should
not be shared throughout all IO-APICs in the system, while in the
latter case only individual IO-APICs need to be contrained (or, if the
firmware indicates so, sub- groups of them having the same GSI appear
at multiple pins).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -69,6 +69,34 @@ int __read_mostly nr_ioapics;
#define ioapic_has_eoi_reg(apic) (mp_ioapics[(apic)].mpc_apicver >= 0x20)
+static int apic_pin_2_gsi_irq(int apic, int pin);
+
+static vmask_t *__read_mostly vector_map[MAX_IO_APICS];
+
+static void share_vector_maps(unsigned int src, unsigned int dst)
+{
+ unsigned int pin;
+
+ if (vector_map[src] == vector_map[dst])
+ return;
+
+ bitmap_or(vector_map[src]->_bits, vector_map[src]->_bits,
+ vector_map[dst]->_bits, NR_VECTORS);
+
+ for (pin = 0; pin < nr_ioapic_registers[dst]; ++pin) {
+ int irq = apic_pin_2_gsi_irq(dst, pin);
+ struct irq_cfg *cfg;
+
+ if (irq < 0)
+ continue;
+ cfg = irq_cfg(irq);
+ if (cfg->used_vectors == vector_map[dst])
+ cfg->used_vectors = vector_map[src];
+ }
+
+ vector_map[dst] = vector_map[src];
+}
+
/*
* This is performance-critical, we want to do it O(1)
*
@@ -109,6 +137,7 @@ static void add_pin_to_irq(unsigned int
}
entry->apic = apic;
entry->pin = pin;
+ share_vector_maps(irq_2_pin[irq].apic, apic);
}
/*
@@ -124,6 +153,7 @@ static void __init replace_pin_at_irq(un
if (entry->apic == oldapic && entry->pin == oldpin) {
entry->apic = newapic;
entry->pin = newpin;
+ share_vector_maps(oldapic, newapic);
}
if (!entry->next)
break;
@@ -131,6 +161,16 @@ static void __init replace_pin_at_irq(un
}
}
+vmask_t *io_apic_get_used_vector_map(unsigned int irq)
+{
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ if (entry->pin == -1)
+ return NULL;
+
+ return vector_map[entry->apic];
+}
+
struct IO_APIC_route_entry **alloc_ioapic_entries(void)
{
int apic;
@@ -1314,6 +1354,18 @@ static void __init enable_IO_APIC(void)
for (i = irq_2_pin_free_entry = nr_irqs_gsi; i < PIN_MAP_SIZE; i++)
irq_2_pin[i].next = i + 1;
+ if (directed_eoi_enabled) {
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ vector_map[apic] = xzalloc(vmask_t);
+ BUG_ON(!vector_map[apic]);
+ }
+ } else {
+ vector_map[0] = xzalloc(vmask_t);
+ BUG_ON(!vector_map[0]);
+ for (apic = 1; apic < nr_ioapics; apic++)
+ vector_map[apic] = vector_map[0];
+ }
+
for(apic = 0; apic < nr_ioapics; apic++) {
int pin;
/* See if any of the pins is in ExtINT mode */
@@ -2479,13 +2531,12 @@ int ioapic_guest_write(unsigned long phy
}
if ( cfg->vector <= 0 || cfg->vector > LAST_DYNAMIC_VECTOR ) {
+ add_pin_to_irq(irq, apic, pin);
vector = assign_irq_vector(irq);
if ( vector < 0 )
return vector;
printk(XENLOG_INFO "allocated vector %02x for irq %d\n", vector, irq);
-
- add_pin_to_irq(irq, apic, pin);
}
spin_lock(&pcidevs_lock);
spin_lock(&dom0->event_lock);
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -403,6 +403,11 @@ static vmask_t *irq_get_used_vector_mask
}
}
}
+ else if ( IO_APIC_IRQ(irq) &&
+ opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_NONE )
+ {
+ ret = io_apic_get_used_vector_map(irq);
+ }
return ret;
}
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -113,6 +113,7 @@ void setup_IO_APIC(void);
void disable_IO_APIC(void);
void print_IO_APIC(void);
void setup_ioapic_dest(void);
+vmask_t *io_apic_get_used_vector_map(unsigned int irq);
extern unsigned long io_apic_irqs;
++++++ 24157-x86-xstate-init.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321604565 -3600
# Node ID 7b5e1cb94bfa43a9268479b9a255fb88c07e4ce2
# Parent f29b5bd6e25fd78409baa5461914c67065f7579f
x86/xsave: provide guests with finit-like environment
Without the use of xsave, guests get their initial floating point
environment set up with finit. At least NetWare actually depends on
this (in particular on all exceptions being masked), so to be
consistent set the same environment also when using xsave. This is
also in line with all SSE exceptions getting masked initially.
To avoid further fragile casts in xstate_alloc_save_area() the patch
also changes xsave_struct's fpu_see member to have actually usable
fields.
The patch was tested in its technically identical, but modified-file-
wise different 4.1.2 version.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Tested-by: Charles Arnold <carnold@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -92,11 +92,14 @@ void setup_fpu(struct vcpu *v)
v->fpu_dirtied = 1;
}
+#define FCW_DEFAULT 0x037f
+#define MXCSR_DEFAULT 0x1f80
+
static void init_fpu(void)
{
asm volatile ( "fninit" );
if ( cpu_has_xmm )
- load_mxcsr(0x1f80);
+ load_mxcsr(MXCSR_DEFAULT);
}
void save_init_fpu(struct vcpu *v)
@@ -287,7 +290,7 @@ void xsave_init(void)
int xsave_alloc_save_area(struct vcpu *v)
{
- void *save_area;
+ struct xsave_struct *save_area;
if ( !cpu_has_xsave || is_idle_vcpu(v) )
return 0;
@@ -300,8 +303,9 @@ int xsave_alloc_save_area(struct vcpu *v
return -ENOMEM;
memset(save_area, 0, xsave_cntxt_size);
- ((u32 *)save_area)[6] = 0x1f80; /* MXCSR */
- *(uint64_t *)(save_area + 512) = XSTATE_FP_SSE; /* XSETBV */
+ save_area->fpu_sse.fcw = FCW_DEFAULT;
+ save_area->fpu_sse.mxcsr = MXCSR_DEFAULT;
+ save_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
v->arch.xsave_area = save_area;
v->arch.xcr0 = XSTATE_FP_SSE;
--- a/xen/include/asm-x86/i387.h
+++ b/xen/include/asm-x86/i387.h
@@ -37,7 +37,29 @@ bool_t xsave_enabled(const struct vcpu *
struct xsave_struct
{
- struct { char x[512]; } fpu_sse; /* FPU/MMX, SSE */
+ union { /* FPU/MMX, SSE */
+ char x[512];
+ struct {
+ uint16_t fcw;
+ uint16_t fsw;
+ uint8_t ftw;
+ uint8_t rsvd1;
+ uint16_t fop;
+ union {
+#ifdef __x86_64__
+ uint64_t addr;
+#endif
+ struct {
+ uint32_t offs;
+ uint16_t sel;
+ uint16_t rsvd;
+ };
+ } fip, fdp;
+ uint32_t mxcsr;
+ uint32_t mxcsr_mask;
+ /* data registers follow here */
+ };
+ } fpu_sse;
struct {
u64 xstate_bv;
++++++ 24168-x86-vioapic-clear-remote_irr.patch ++++++
References: bnc#694863
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1321864171 -3600
# Node ID 9c350ab8d3ea64866421de756ab2bf3daaf63187
# Parent 335e8273a3f34a5e2972643a028f83684609f1c1
x86/vioapic: clear remote IRR when switching RTE to edge triggered mode
Xen itself (as much as Linux) relies on this behavior, so it should
also emulate it properly. Not doing so reportedly gets in the way of
kexec inside a HVM guest.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Tested-by: Olaf Hering <olaf@xxxxxxxxx>
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -154,8 +154,9 @@ static void vioapic_write_redirent(
{
vlapic_adjust_i8259_target(d);
}
- else if ( (ent.fields.trig_mode == VIOAPIC_LEVEL_TRIG) &&
- !ent.fields.mask &&
+ else if ( ent.fields.trig_mode == VIOAPIC_EDGE_TRIG )
+ pent->fields.remote_irr = 0;
+ else if ( !ent.fields.mask &&
!ent.fields.remote_irr &&
hvm_irq->gsi_assert_count[idx] )
{
++++++ 24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch ++++++
changeset: 24171:fe80909663c1
user: Keir Fraser <keir@xxxxxxx>
date: Tue Nov 22 13:00:21 2011 +0000
files: xen/common/wait.c
description:
x86,waitqueue: Allocate whole page for shadow stack.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -33,7 +33,7 @@ struct waitqueue_vcpu {
* hypervisor context before sleeping (descheduling), setjmp/longjmp-style.
*/
void *esp;
- char stack[3000];
+ char *stack;
#endif
};
@@ -41,11 +41,19 @@ int init_waitqueue_vcpu(struct vcpu *v)
{
struct waitqueue_vcpu *wqv;
- wqv = xmalloc(struct waitqueue_vcpu);
+ wqv = xzalloc(struct waitqueue_vcpu);
if ( wqv == NULL )
return -ENOMEM;
- memset(wqv, 0, sizeof(*wqv));
+#ifdef CONFIG_X86
+ wqv->stack = alloc_xenheap_page();
+ if ( wqv->stack == NULL )
+ {
+ xfree(wqv);
+ return -ENOMEM;
+ }
+#endif
+
INIT_LIST_HEAD(&wqv->list);
wqv->vcpu = v;
@@ -63,6 +71,9 @@ void destroy_waitqueue_vcpu(struct vcpu
return;
BUG_ON(!list_empty(&wqv->list));
+#ifdef CONFIG_X86
+ free_xenheap_page(wqv->stack);
+#endif
xfree(wqv);
v->waitqueue_vcpu = NULL;
@@ -115,7 +126,7 @@ static void __prepare_to_wait(struct wai
: "=S" (wqv->esp)
: "c" (cpu_info), "D" (wqv->stack)
: "memory" );
- BUG_ON((cpu_info - (char *)wqv->esp) > sizeof(wqv->stack));
+ BUG_ON((cpu_info - (char *)wqv->esp) > PAGE_SIZE);
}
static void __finish_wait(struct waitqueue_vcpu *wqv)
++++++ 24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch ++++++
changeset: 24178:1f2a06dbbb69
user: Keir Fraser <keir@xxxxxxx>
date: Tue Nov 22 15:35:26 2011 +0000
files: xen/common/keyhandler.c
description:
debug: Add domain/vcpu pause_count info to 'd' key.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/keyhandler.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/xen/common/keyhandler.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/keyhandler.c
+++ xen-4.1.2-testing/xen/common/keyhandler.c
@@ -244,9 +244,10 @@ static void dump_domains(unsigned char k
unsigned int i;
printk("General information for domain %u:\n", d->domain_id);
cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask);
- printk(" refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d "
- "dirty_cpus=%s max_pages=%u\n",
+ printk(" refcnt=%d dying=%d pause_count=%d\n",
atomic_read(&d->refcnt), d->is_dying,
+ atomic_read(&d->pause_count));
+ printk(" nr_pages=%d xenheap_pages=%d dirty_cpus=%s max_pages=%u\n",
d->tot_pages, d->xenheap_pages, tmpstr, d->max_pages);
printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
"%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n",
@@ -270,17 +271,18 @@ static void dump_domains(unsigned char k
d->domain_id);
for_each_vcpu ( d, v )
{
- printk(" VCPU%d: CPU%d [has=%c] flags=%lx poll=%d "
+ printk(" VCPU%d: CPU%d [has=%c] poll=%d "
"upcall_pend = %02x, upcall_mask = %02x ",
v->vcpu_id, v->processor,
- v->is_running ? 'T':'F',
- v->pause_flags, v->poll_evtchn,
+ v->is_running ? 'T':'F', v->poll_evtchn,
vcpu_info(v, evtchn_upcall_pending),
vcpu_info(v, evtchn_upcall_mask));
cpuset_print(tmpstr, sizeof(tmpstr), v->vcpu_dirty_cpumask);
printk("dirty_cpus=%s ", tmpstr);
cpuset_print(tmpstr, sizeof(tmpstr), v->cpu_affinity);
printk("cpu_affinity=%s\n", tmpstr);
+ printk(" pause_count=%d pause_flags=%lx\n",
+ atomic_read(&v->pause_count), v->pause_flags);
arch_dump_vcpu_info(v);
periodic_timer_print(tmpstr, sizeof(tmpstr), v->periodic_period);
printk(" %s\n", tmpstr);
++++++ 24189-x86-p2m-pod-locking.patch ++++++
# HG changeset patch
# User Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
# Date 1322148057 0
# Node ID 7da681c490e0a8a2b3f1fb311d254dc7ce618a43
# Parent b082fdc52ad7607d93b59148fb289aafe21f294b
x86/mm/p2m: fix pod locking
The path p2m-lookup -> p2m-pt->get_entry -> 1GB PoD superpage ->
pod_demand_populate ends in the pod code performing a p2m_set_entry with
no locks held (in order to split the 1GB superpage into 512 2MB ones)
Further, it calls p2m_unlock after that, which will break the spinlock.
This patch attempts to fix that.
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1244,7 +1244,6 @@ p2m_pod_demand_populate(struct p2m_domai
set_p2m_entry(p2m, gfn_aligned, _mfn(POPULATE_ON_DEMAND_MFN), 9,
p2m_populate_on_demand, p2m->default_access);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
return 0;
}
@@ -1602,7 +1601,8 @@ pod_retry_l3:
{
if ( q != p2m_query )
{
- if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) )
+ if ( !p2m_pod_check_and_populate(p2m, gfn,
+ (l1_pgentry_t *) &l3e, 18, q) )
goto pod_retry_l3;
}
else
@@ -1733,7 +1733,8 @@ static mfn_t p2m_gfn_to_mfn_current(stru
/* The read has succeeded, so we know that mapping exists */
if ( q != p2m_query )
{
- if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) )
+ if ( !p2m_pod_check_and_populate(p2m, gfn,
+ (l1_pgentry_t *) &l3e, 18, q) )
goto pod_retry_l3;
p2mt = p2m_invalid;
printk("%s: Allocate 1GB failed!\n", __func__);
++++++ 24190-hap-log-dirty-disable-rc.patch ++++++
# HG changeset patch
# User Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
# Date 1322148057 0
# Node ID 6b3d8250ee2c63d90680c142549123a4b1559f55
# Parent 7da681c490e0a8a2b3f1fb311d254dc7ce618a43
x86/mm: change return code for log-dirty disabling
Disabling log dirty mode in HAP always returns -EINVAL. Make it
return the correct rc on success.
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Signed-off-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -710,6 +710,8 @@ int hap_domctl(struct domain *d, xen_dom
return rc;
case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
sc->mb = hap_get_allocation(d);
+ /* Fall through... */
+ case XEN_DOMCTL_SHADOW_OP_OFF:
return 0;
default:
HAP_ERROR("Bad hap domctl op %u\n", sc->op);
++++++ 24193-hap-track-dirty-vram-rc.patch ++++++
# HG changeset patch
# User Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
# Date 1322149491 0
# Node ID 67d2ac426defedad9c10eb339019f9dc9f02d2ae
# Parent 3c864e04c2ad060ca1cac6579994777993fef6e6
Trivial fix for rc val in hap track dirty vram
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -143,7 +143,7 @@ int hap_track_dirty_vram(struct domain *
}
else if ( !paging_mode_log_dirty(d) && !dirty_vram )
{
- rc -ENOMEM;
+ rc = -ENOMEM;
if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
goto param_fail;
++++++ 24195-waitqueue_Detect_saved-stack_overflow_and_crash_the_guest..patch
++++++
changeset: 24195:9b65336f688f
user: Keir Fraser <keir@xxxxxxx>
date: Thu Nov 24 15:48:10 2011 +0000
files: xen/common/wait.c
description:
waitqueue: Detect saved-stack overflow and crash the guest.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -106,13 +106,16 @@ void wake_up(struct waitqueue_head *wq)
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
{
char *cpu_info = (char *)get_cpu_info();
+
asm volatile (
#ifdef CONFIG_X86_64
"push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
"push %%rbp; push %%r8; push %%r9; push %%r10; push %%r11; "
"push %%r12; push %%r13; push %%r14; push %%r15; call 1f; "
"1: mov 80(%%rsp),%%rdi; mov 96(%%rsp),%%rcx; mov %%rsp,%%rsi; "
- "sub %%rsi,%%rcx; rep movsb; mov %%rsp,%%rsi; pop %%rax; "
+ "sub %%rsi,%%rcx; cmp %3,%%rcx; jbe 2f; "
+ "xor %%esi,%%esi; jmp 3f; "
+ "2: rep movsb; mov %%rsp,%%rsi; 3: pop %%rax; "
"pop %%r15; pop %%r14; pop %%r13; pop %%r12; "
"pop %%r11; pop %%r10; pop %%r9; pop %%r8; "
"pop %%rbp; pop %%rdi; pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax"
@@ -120,13 +123,20 @@ static void __prepare_to_wait(struct wai
"push %%eax; push %%ebx; push %%ecx; push %%edx; push %%edi; "
"push %%ebp; call 1f; "
"1: mov 8(%%esp),%%edi; mov 16(%%esp),%%ecx; mov %%esp,%%esi; "
- "sub %%esi,%%ecx; rep movsb; mov %%esp,%%esi; pop %%eax; "
+ "sub %%esi,%%ecx; cmp %3,%%ecx; jbe 2f; "
+ "xor %%esi,%%esi; jmp 3f; "
+ "2: rep movsb; mov %%esp,%%esi; 3: pop %%eax; "
"pop %%ebp; pop %%edi; pop %%edx; pop %%ecx; pop %%ebx; pop %%eax"
#endif
: "=S" (wqv->esp)
- : "c" (cpu_info), "D" (wqv->stack)
+ : "c" (cpu_info), "D" (wqv->stack), "i" (PAGE_SIZE)
: "memory" );
- BUG_ON((cpu_info - (char *)wqv->esp) > PAGE_SIZE);
+
+ if ( unlikely(wqv->esp == 0) )
+ {
+ gdprintk(XENLOG_ERR, "Stack too large in %s\n", __FUNCTION__);
+ domain_crash_synchronous();
+ }
}
static void __finish_wait(struct waitqueue_vcpu *wqv)
@@ -162,6 +172,7 @@ void prepare_to_wait(struct waitqueue_he
struct vcpu *curr = current;
struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
+ ASSERT(!in_atomic());
ASSERT(list_empty(&wqv->list));
spin_lock(&wq->lock);
++++++
24196-waitqueue_Reorder_prepare_to_wait_so_that_vcpu_is_definitely_on_the.patch
++++++
changeset: 24196:de4fe05fe887
user: Keir Fraser <keir@xxxxxxx>
date: Thu Nov 24 15:49:25 2011 +0000
files: xen/common/wait.c
description:
waitqueue: Reorder prepare_to_wait() so that vcpu is definitely on the
queue on exit, even after a wakeup.
Otherwise, when we go round the loop in wait_event(), we may not
actually sleep after the first iteration, as we do not put ourselves
back on the queue on wakeup.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -107,6 +107,8 @@ static void __prepare_to_wait(struct wai
{
char *cpu_info = (char *)get_cpu_info();
+ ASSERT(wqv->esp == 0);
+
asm volatile (
#ifdef CONFIG_X86_64
"push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
@@ -173,14 +175,13 @@ void prepare_to_wait(struct waitqueue_he
struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
ASSERT(!in_atomic());
- ASSERT(list_empty(&wqv->list));
+ __prepare_to_wait(wqv);
+ ASSERT(list_empty(&wqv->list));
spin_lock(&wq->lock);
list_add_tail(&wqv->list, &wq->list);
vcpu_pause_nosync(curr);
spin_unlock(&wq->lock);
-
- __prepare_to_wait(wqv);
}
void finish_wait(struct waitqueue_head *wq)
++++++
24197-x86-waitqueue_Because_we_have_per-cpu_stacks_we_must_wake_up_on_teh.patch
++++++
changeset: 24197:ca92c4a8b31f
user: Keir Fraser <keir@xxxxxxx>
date: Thu Nov 24 15:50:08 2011 +0000
files: xen/common/wait.c
description:
x86/waitqueue: Because we have per-cpu stacks, we must wake up on teh
same cpu that we slept on. Otherwise stack references are bogus on
wakeup.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -34,6 +34,8 @@ struct waitqueue_vcpu {
*/
void *esp;
char *stack;
+ cpumask_t saved_affinity;
+ unsigned int wakeup_cpu;
#endif
};
@@ -106,9 +108,19 @@ void wake_up(struct waitqueue_head *wq)
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
{
char *cpu_info = (char *)get_cpu_info();
+ struct vcpu *curr = current;
ASSERT(wqv->esp == 0);
+ /* Save current VCPU affinity; force wakeup on *this* CPU only. */
+ wqv->wakeup_cpu = smp_processor_id();
+ wqv->saved_affinity = curr->cpu_affinity;
+ if ( vcpu_set_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
+ {
+ gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
+ domain_crash_synchronous();
+ }
+
asm volatile (
#ifdef CONFIG_X86_64
"push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
@@ -144,6 +156,7 @@ static void __prepare_to_wait(struct wai
static void __finish_wait(struct waitqueue_vcpu *wqv)
{
wqv->esp = NULL;
+ (void)vcpu_set_affinity(current, &wqv->saved_affinity);
}
void check_wakeup_from_wait(void)
@@ -155,6 +168,20 @@ void check_wakeup_from_wait(void)
if ( likely(wqv->esp == NULL) )
return;
+ /* Check if we woke up on the wrong CPU. */
+ if ( unlikely(smp_processor_id() != wqv->wakeup_cpu) )
+ {
+ /* Re-set VCPU affinity and re-enter the scheduler. */
+ struct vcpu *curr = current;
+ wqv->saved_affinity = curr->cpu_affinity;
+ if ( vcpu_set_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
+ {
+ gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
+ domain_crash_synchronous();
+ }
+ wait(); /* takes us back into the scheduler */
+ }
+
asm volatile (
"mov %1,%%"__OP"sp; rep movsb; jmp *(%%"__OP"sp)"
: : "S" (wqv->stack), "D" (wqv->esp),
++++++ 24201-x86-pcpu-platform-op.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1322153786 -3600
# Node ID 9c6bea25f71233787a36893deaf0e811f2dcb8d8
# Parent 480531cab3f4468b1ec9b549bc84d66e420ce685
x86: small fixes to pcpu platform op handling
XENPF_get_cpuinfo should init the flags output field rather than only
modify it.
XENPF_cpu_online must check for the input CPU number to be in range.
XENPF_cpu_offline must also do that, and should also reject attempts to
offline CPU 0 (this fails in cpu_down() too, but preventing this here
appears more correct given that the code here calls
continue_hypercall_on_cpu(0, ...), which would be flawed if cpu_down()
would ever allow bringing down CPU 0 (and a distinct error code is
easier to deal with when debugging issues).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -449,13 +449,14 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
if ( (g_info->xen_cpuid >= NR_CPUS) ||
!cpu_present(g_info->xen_cpuid) )
{
- g_info->flags |= XEN_PCPU_FLAGS_INVALID;
+ g_info->flags = XEN_PCPU_FLAGS_INVALID;
}
else
{
g_info->apic_id = x86_cpu_to_apicid[g_info->xen_cpuid];
g_info->acpi_id = acpi_get_processor_id(g_info->xen_cpuid);
ASSERT(g_info->apic_id != BAD_APICID);
+ g_info->flags = 0;
if (cpu_online(g_info->xen_cpuid))
g_info->flags |= XEN_PCPU_FLAGS_ONLINE;
}
@@ -472,7 +473,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
{
int cpu = op->u.cpu_ol.cpuid;
- if ( !cpu_present(cpu) )
+ if ( cpu >= NR_CPUS || !cpu_present(cpu) )
{
ret = -EINVAL;
break;
@@ -493,7 +494,13 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
{
int cpu = op->u.cpu_ol.cpuid;
- if ( !cpu_present(cpu) )
+ if ( cpu == 0 )
+ {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ if ( cpu >= NR_CPUS || !cpu_present(cpu) )
{
ret = -EINVAL;
break;
++++++ 24208-xenpaging_remove_filename_from_comment.patch ++++++
changeset: 24208:31fce41fc2b2
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:01:15 2011 +0100
files: tools/xenpaging/file_ops.c tools/xenpaging/policy_default.c
tools/xenpaging/xenpaging.c
description:
xenpaging: remove filename from comment
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/file_ops.c | 1 -
tools/xenpaging/policy_default.c | 1 -
tools/xenpaging/xenpaging.c | 1 -
3 files changed, 3 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/file_ops.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/file_ops.c
+++ xen-4.1.2-testing/tools/xenpaging/file_ops.c
@@ -1,5 +1,4 @@
/******************************************************************************
- * tools/xenpaging/file_ops.c
*
* Common file operations.
*
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -1,5 +1,4 @@
/******************************************************************************
- * tools/xenpaging/policy.c
*
* Xen domain paging default policy.
*
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -1,5 +1,4 @@
/******************************************************************************
- * tools/xenpaging/xenpaging.c
*
* Domain paging.
* Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
++++++ 24209-xenpaging_remove_obsolete_comment_in_resume_path.patch ++++++
changeset: 24209:fe8946916512
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:01:20 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: remove obsolete comment in resume path
Remove stale comment.
If a page was populated several times the vcpu is paused and
xenpaging has to unpause it again.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 1 -
1 file changed, 1 deletion(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -744,7 +744,6 @@ int main(int argc, char *argv[])
!!(req.flags & MEM_EVENT_FLAG_EVICT_FAIL) );
/* Tell Xen to resume the vcpu */
- /* XXX: Maybe just check if the vcpu was paused? */
if ( req.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
{
/* Prepare the response */
++++++ 24210-xenpaging_use_PERROR_to_print_errno.patch ++++++
changeset: 24210:d1d54cdc4a7b
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:01:32 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: use PERROR to print errno
v3:
- adjust arguments for xc_mem_paging_enable() failures
v2:
- move changes to file_op() to different patch
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 58 ++++++++++++++++++++++----------------------
1 file changed, 29 insertions(+), 29 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -90,7 +90,7 @@ static int xenpaging_wait_for_event_or_t
if (errno == EINTR)
return 0;
- ERROR("Poll exited with an error");
+ PERROR("Poll exited with an error");
return -errno;
}
@@ -121,7 +121,7 @@ static int xenpaging_wait_for_event_or_t
port = xc_evtchn_pending(xce);
if ( port == -1 )
{
- ERROR("Failed to read port from event channel");
+ PERROR("Failed to read port from event channel");
rc = -1;
goto err;
}
@@ -129,7 +129,7 @@ static int xenpaging_wait_for_event_or_t
rc = xc_evtchn_unmask(xce, port);
if ( rc < 0 )
{
- ERROR("Failed to unmask event channel port");
+ PERROR("Failed to unmask event channel port");
}
}
err:
@@ -185,7 +185,7 @@ static xenpaging_t *xenpaging_init(domid
paging->xs_handle = xs_open(0);
if ( paging->xs_handle == NULL )
{
- ERROR("Error initialising xenstore connection");
+ PERROR("Error initialising xenstore connection");
goto err;
}
@@ -193,7 +193,7 @@ static xenpaging_t *xenpaging_init(domid
snprintf(watch_token, sizeof(watch_token), "%u", domain_id);
if ( xs_watch(paging->xs_handle, "@releaseDomain", watch_token) == false )
{
- ERROR("Could not bind to shutdown watch\n");
+ PERROR("Could not bind to shutdown watch\n");
goto err;
}
@@ -214,7 +214,7 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.shared_page = init_page();
if ( paging->mem_event.shared_page == NULL )
{
- ERROR("Error initialising shared page");
+ PERROR("Error initialising shared page");
goto err;
}
@@ -222,7 +222,7 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.ring_page = init_page();
if ( paging->mem_event.ring_page == NULL )
{
- ERROR("Error initialising ring page");
+ PERROR("Error initialising ring page");
goto err;
}
@@ -249,7 +249,7 @@ static xenpaging_t *xenpaging_init(domid
ERROR("xenpaging not supported in a PoD guest");
break;
default:
- ERROR("Error initialising shared page: %s", strerror(errno));
+ PERROR("Error initialising shared page");
break;
}
goto err;
@@ -259,7 +259,7 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.xce_handle = xc_evtchn_open(NULL, 0);
if ( paging->mem_event.xce_handle == NULL )
{
- ERROR("Failed to open event channel");
+ PERROR("Failed to open event channel");
goto err;
}
@@ -269,7 +269,7 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.shared_page->port);
if ( rc < 0 )
{
- ERROR("Failed to bind event channel");
+ PERROR("Failed to bind event channel");
goto err;
}
@@ -279,7 +279,7 @@ static xenpaging_t *xenpaging_init(domid
paging->domain_info = malloc(sizeof(xc_domaininfo_t));
if ( paging->domain_info == NULL )
{
- ERROR("Error allocating memory for domain info");
+ PERROR("Error allocating memory for domain info");
goto err;
}
@@ -287,7 +287,7 @@ static xenpaging_t *xenpaging_init(domid
paging->domain_info);
if ( rc != 1 )
{
- ERROR("Error getting domain info");
+ PERROR("Error getting domain info");
goto err;
}
@@ -295,7 +295,7 @@ static xenpaging_t *xenpaging_init(domid
paging->bitmap = bitmap_alloc(paging->domain_info->max_pages);
if ( !paging->bitmap )
{
- ERROR("Error allocating bitmap");
+ PERROR("Error allocating bitmap");
goto err;
}
DPRINTF("max_pages = %"PRIx64"\n", paging->domain_info->max_pages);
@@ -311,7 +311,7 @@ static xenpaging_t *xenpaging_init(domid
rc = policy_init(paging);
if ( rc != 0 )
{
- ERROR("Error initialising policy");
+ PERROR("Error initialising policy");
goto err;
}
@@ -358,14 +358,14 @@ static int xenpaging_teardown(xenpaging_
rc = xc_mem_paging_disable(xch, paging->mem_event.domain_id);
if ( rc != 0 )
{
- ERROR("Error tearing down domain paging in xen");
+ PERROR("Error tearing down domain paging in xen");
}
/* Unbind VIRQ */
rc = xc_evtchn_unbind(paging->mem_event.xce_handle,
paging->mem_event.port);
if ( rc != 0 )
{
- ERROR("Error unbinding event port");
+ PERROR("Error unbinding event port");
}
paging->mem_event.port = -1;
@@ -373,7 +373,7 @@ static int xenpaging_teardown(xenpaging_
rc = xc_evtchn_close(paging->mem_event.xce_handle);
if ( rc != 0 )
{
- ERROR("Error closing event channel");
+ PERROR("Error closing event channel");
}
paging->mem_event.xce_handle = NULL;
@@ -384,7 +384,7 @@ static int xenpaging_teardown(xenpaging_
rc = xc_interface_close(xch);
if ( rc != 0 )
{
- ERROR("Error closing connection to xen");
+ PERROR("Error closing connection to xen");
}
return 0;
@@ -444,7 +444,7 @@ static int xenpaging_evict_page(xenpagin
PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
- ERROR("Error mapping page");
+ PERROR("Error mapping page");
goto out;
}
@@ -452,8 +452,8 @@ static int xenpaging_evict_page(xenpagin
ret = write_page(fd, page, i);
if ( ret != 0 )
{
+ PERROR("Error copying page");
munmap(page, PAGE_SIZE);
- ERROR("Error copying page");
goto out;
}
@@ -464,7 +464,7 @@ static int xenpaging_evict_page(xenpagin
victim->gfn);
if ( ret != 0 )
{
- ERROR("Error evicting page");
+ PERROR("Error evicting page");
goto out;
}
@@ -520,7 +520,7 @@ static int xenpaging_populate_page(xenpa
sleep(1);
continue;
}
- ERROR("Error preparing for page in");
+ PERROR("Error preparing for page in");
goto out_map;
}
}
@@ -532,7 +532,7 @@ static int xenpaging_populate_page(xenpa
PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
- ERROR("Error mapping page: page is null");
+ PERROR("Error mapping page: page is null");
goto out_map;
}
@@ -540,7 +540,7 @@ static int xenpaging_populate_page(xenpa
ret = read_page(fd, page, i);
if ( ret != 0 )
{
- ERROR("Error reading page");
+ PERROR("Error reading page");
goto out;
}
@@ -579,7 +579,7 @@ static int evict_victim(xenpaging_t *pag
{
if ( j++ % 1000 == 0 )
if ( xenpaging_mem_paging_flush_ioemu_cache(paging) )
- ERROR("Error flushing ioemu cache");
+ PERROR("Error flushing ioemu cache");
}
}
while ( ret );
@@ -670,7 +670,7 @@ int main(int argc, char *argv[])
rc = xenpaging_wait_for_event_or_timeout(paging);
if ( rc < 0 )
{
- ERROR("Error getting event");
+ PERROR("Error getting event");
goto out;
}
else if ( rc != 0 )
@@ -710,7 +710,7 @@ int main(int argc, char *argv[])
rc = xenpaging_populate_page(paging, req.gfn, fd, i);
if ( rc != 0 )
{
- ERROR("Error populating page");
+ PERROR("Error populating page");
goto out;
}
}
@@ -723,7 +723,7 @@ int main(int argc, char *argv[])
rc = xenpaging_resume_page(paging, &rsp, 1);
if ( rc != 0 )
{
- ERROR("Error resuming page");
+ PERROR("Error resuming page");
goto out;
}
@@ -754,7 +754,7 @@ int main(int argc, char *argv[])
rc = xenpaging_resume_page(paging, &rsp, 0);
if ( rc != 0 )
{
- ERROR("Error resuming");
+ PERROR("Error resuming");
goto out;
}
}
++++++ 24211-xenpaging_simplify_file_op.patch ++++++
changeset: 24211:8ddac056a89e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:01:39 2011 +0100
files: tools/xenpaging/file_ops.c
description:
xenpaging: simplify file_op
Catch lseek() errors.
Use -1 as return value and let caller read errno.
Remove const casts from buffer pointers, the page is writeable.
Use wrapper for write() which matches the read() prototype.
Remove unused stdarg.h inclusion.
Remove unused macro.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/file_ops.c | 29 +++++++++--------------------
1 file changed, 9 insertions(+), 20 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/file_ops.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/file_ops.c
+++ xen-4.1.2-testing/tools/xenpaging/file_ops.c
@@ -21,55 +21,44 @@
#include <unistd.h>
-#include <stdarg.h>
#include <xc_private.h>
-
-#define page_offset(_pfn) (((off_t)(_pfn)) << PAGE_SHIFT)
-
-
static int file_op(int fd, void *page, int i,
- ssize_t (*fn)(int, const void *, size_t))
+ ssize_t (*fn)(int, void *, size_t))
{
off_t seek_ret;
- int total;
+ int total = 0;
int bytes;
- int ret;
seek_ret = lseek(fd, i << PAGE_SHIFT, SEEK_SET);
+ if ( seek_ret == (off_t)-1 )
+ return -1;
- total = 0;
while ( total < PAGE_SIZE )
{
bytes = fn(fd, page + total, PAGE_SIZE - total);
if ( bytes <= 0 )
- {
- ret = -errno;
- goto err;
- }
+ return -1;
total += bytes;
}
return 0;
-
- err:
- return ret;
}
-static ssize_t my_read(int fd, const void *buf, size_t count)
+static ssize_t my_write(int fd, void *buf, size_t count)
{
- return read(fd, (void *)buf, count);
+ return write(fd, buf, count);
}
int read_page(int fd, void *page, int i)
{
- return file_op(fd, page, i, &my_read);
+ return file_op(fd, page, i, &read);
}
int write_page(int fd, void *page, int i)
{
- return file_op(fd, page, i, &write);
+ return file_op(fd, page, i, &my_write);
}
++++++ 24212-xenpaging_print_gfn_in_failure_case.patch ++++++
changeset: 24212:fbc31627adde
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:01:41 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: print gfn in failure case
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -444,7 +444,7 @@ static int xenpaging_evict_page(xenpagin
PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
- PERROR("Error mapping page");
+ PERROR("Error mapping page %lx", victim->gfn);
goto out;
}
@@ -452,7 +452,7 @@ static int xenpaging_evict_page(xenpagin
ret = write_page(fd, page, i);
if ( ret != 0 )
{
- PERROR("Error copying page");
+ PERROR("Error copying page %lx", victim->gfn);
munmap(page, PAGE_SIZE);
goto out;
}
@@ -464,7 +464,7 @@ static int xenpaging_evict_page(xenpagin
victim->gfn);
if ( ret != 0 )
{
- PERROR("Error evicting page");
+ PERROR("Error evicting page %lx", victim->gfn);
goto out;
}
@@ -520,7 +520,7 @@ static int xenpaging_populate_page(xenpa
sleep(1);
continue;
}
- PERROR("Error preparing for page in");
+ PERROR("Error preparing %"PRI_xen_pfn" for page-in", gfn);
goto out_map;
}
}
@@ -532,7 +532,7 @@ static int xenpaging_populate_page(xenpa
PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
- PERROR("Error mapping page: page is null");
+ PERROR("Error mapping page %"PRI_xen_pfn": page is null", gfn);
goto out_map;
}
@@ -540,7 +540,7 @@ static int xenpaging_populate_page(xenpa
ret = read_page(fd, page, i);
if ( ret != 0 )
{
- PERROR("Error reading page");
+ PERROR("Error reading page %"PRI_xen_pfn"", gfn);
goto out;
}
@@ -710,7 +710,7 @@ int main(int argc, char *argv[])
rc = xenpaging_populate_page(paging, req.gfn, fd, i);
if ( rc != 0 )
{
- PERROR("Error populating page");
+ PERROR("Error populating page %"PRIx64"", req.gfn);
goto out;
}
}
@@ -723,7 +723,7 @@ int main(int argc, char *argv[])
rc = xenpaging_resume_page(paging, &rsp, 1);
if ( rc != 0 )
{
- PERROR("Error resuming page");
+ PERROR("Error resuming page %"PRIx64"", req.gfn);
goto out;
}
@@ -754,7 +754,7 @@ int main(int argc, char *argv[])
rc = xenpaging_resume_page(paging, &rsp, 0);
if ( rc != 0 )
{
- PERROR("Error resuming");
+ PERROR("Error resuming page %"PRIx64"", req.gfn);
goto out;
}
}
++++++ 24213-xenpaging_update_xenpaging_init.patch ++++++
changeset: 24213:e3cbbad72382
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:22 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: update xenpaging_init
Move comment about xc_handle to the right place.
Allocate paging early and use calloc.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -169,18 +169,21 @@ static xenpaging_t *xenpaging_init(domid
char *p;
int rc;
+ /* Allocate memory */
+ paging = calloc(1, sizeof(xenpaging_t));
+ if ( !paging )
+ goto err;
+
if ( getenv("XENPAGING_DEBUG") )
dbg = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr,
XTL_DEBUG, 0);
- xch = xc_interface_open(dbg, NULL, 0);
+
+ /* Open connection to xen */
+ paging->xc_handle = xch = xc_interface_open(dbg, NULL, 0);
if ( !xch )
- goto err_iface;
+ goto err;
DPRINTF("xenpaging init\n");
- /* Allocate memory */
- paging = malloc(sizeof(xenpaging_t));
- memset(paging, 0, sizeof(xenpaging_t));
-
/* Open connection to xenstore */
paging->xs_handle = xs_open(0);
if ( paging->xs_handle == NULL )
@@ -204,9 +207,6 @@ static xenpaging_t *xenpaging_init(domid
DPRINTF("Setting policy mru_size to %d\n", paging->policy_mru_size);
}
- /* Open connection to xen */
- paging->xc_handle = xch;
-
/* Set domain id */
paging->mem_event.domain_id = domain_id;
@@ -322,7 +322,8 @@ static xenpaging_t *xenpaging_init(domid
{
if ( paging->xs_handle )
xs_close(paging->xs_handle);
- xc_interface_close(xch);
+ if ( xch )
+ xc_interface_close(xch);
if ( paging->mem_event.shared_page )
{
munlock(paging->mem_event.shared_page, PAGE_SIZE);
@@ -340,7 +341,6 @@ static xenpaging_t *xenpaging_init(domid
free(paging);
}
- err_iface:
return NULL;
}
++++++ 24214-xenpaging_remove_xc_dominfo_t_from_paging_t.patch ++++++
changeset: 24214:f06595abfa88
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:36 2011 +0100
files: tools/xenpaging/policy_default.c tools/xenpaging/xenpaging.c
tools/xenpaging/xenpaging.h
description:
xenpaging: remove xc_dominfo_t from paging_t
Remove xc_dominfo_t from paging_t, record only max_pages.
This value is used to setup internal data structures.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy_default.c | 8 ++++----
tools/xenpaging/xenpaging.c | 27 +++++++++++----------------
tools/xenpaging/xenpaging.h | 4 ++--
3 files changed, 17 insertions(+), 22 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -41,17 +41,17 @@ int policy_init(xenpaging_t *paging)
int i;
int rc = -ENOMEM;
+ max_pages = paging->max_pages;
+
/* Allocate bitmap for pages not to page out */
- bitmap = bitmap_alloc(paging->domain_info->max_pages);
+ bitmap = bitmap_alloc(max_pages);
if ( !bitmap )
goto out;
/* Allocate bitmap to track unusable pages */
- unconsumed = bitmap_alloc(paging->domain_info->max_pages);
+ unconsumed = bitmap_alloc(max_pages);
if ( !unconsumed )
goto out;
- max_pages = paging->domain_info->max_pages;
-
/* Initialise MRU list of paged in pages */
if ( paging->policy_mru_size > 0 )
mru_size = paging->policy_mru_size;
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -164,6 +164,7 @@ static void *init_page(void)
static xenpaging_t *xenpaging_init(domid_t domain_id, int num_pages)
{
xenpaging_t *paging;
+ xc_domaininfo_t domain_info;
xc_interface *xch;
xentoollog_logger *dbg = NULL;
char *p;
@@ -275,34 +276,29 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.port = rc;
- /* Get domaininfo */
- paging->domain_info = malloc(sizeof(xc_domaininfo_t));
- if ( paging->domain_info == NULL )
- {
- PERROR("Error allocating memory for domain info");
- goto err;
- }
-
rc = xc_domain_getinfolist(xch, paging->mem_event.domain_id, 1,
- paging->domain_info);
+ &domain_info);
if ( rc != 1 )
{
PERROR("Error getting domain info");
goto err;
}
+ /* Record number of max_pages */
+ paging->max_pages = domain_info.max_pages;
+
/* Allocate bitmap for tracking pages that have been paged out */
- paging->bitmap = bitmap_alloc(paging->domain_info->max_pages);
+ paging->bitmap = bitmap_alloc(paging->max_pages);
if ( !paging->bitmap )
{
PERROR("Error allocating bitmap");
goto err;
}
- DPRINTF("max_pages = %"PRIx64"\n", paging->domain_info->max_pages);
+ DPRINTF("max_pages = %d\n", paging->max_pages);
- if ( num_pages < 0 || num_pages > paging->domain_info->max_pages )
+ if ( num_pages < 0 || num_pages > paging->max_pages )
{
- num_pages = paging->domain_info->max_pages;
+ num_pages = paging->max_pages;
DPRINTF("setting num_pages to %d\n", num_pages);
}
paging->num_pages = num_pages;
@@ -337,7 +333,6 @@ static xenpaging_t *xenpaging_init(domid
}
free(paging->bitmap);
- free(paging->domain_info);
free(paging);
}
@@ -765,7 +760,7 @@ int main(int argc, char *argv[])
if ( interrupted == SIGTERM || interrupted == SIGINT )
{
int num = 0;
- for ( i = 0; i < paging->domain_info->max_pages; i++ )
+ for ( i = 0; i < paging->max_pages; i++ )
{
if ( test_bit(i, paging->bitmap) )
{
@@ -781,7 +776,7 @@ int main(int argc, char *argv[])
*/
if ( num )
page_in_trigger();
- else if ( i == paging->domain_info->max_pages )
+ else if ( i == paging->max_pages )
break;
}
else
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -44,11 +44,11 @@ typedef struct xenpaging {
xc_interface *xc_handle;
struct xs_handle *xs_handle;
- xc_domaininfo_t *domain_info;
-
unsigned long *bitmap;
mem_event_t mem_event;
+ /* number of pages for which data structures were allocated */
+ int max_pages;
int num_pages;
int policy_mru_size;
unsigned long pagein_queue[XENPAGING_PAGEIN_QUEUE_SIZE];
++++++ 24215-xenpaging_track_the_number_of_paged-out_pages.patch ++++++
changeset: 24215:dc7dabe2fe99
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:38 2011 +0100
files: tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: track the number of paged-out pages
This change is required by subsequent changes.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 8 ++++++++
tools/xenpaging/xenpaging.h | 1 +
2 files changed, 9 insertions(+)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -467,6 +467,9 @@ static int xenpaging_evict_page(xenpagin
/* Notify policy of page being paged out */
policy_notify_paged_out(victim->gfn);
+ /* Record number of evicted pages */
+ paging->num_paged_out++;
+
out:
return ret;
}
@@ -480,8 +483,13 @@ static int xenpaging_resume_page(xenpagi
/* Notify policy of page being paged in */
if ( notify_policy )
+ {
policy_notify_paged_in(rsp->gfn);
+ /* Record number of resumed pages */
+ paging->num_paged_out--;
+ }
+
/* Tell Xen page is ready */
ret = xc_mem_paging_resume(paging->xc_handle, paging->mem_event.domain_id,
rsp->gfn);
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -49,6 +49,7 @@ typedef struct xenpaging {
mem_event_t mem_event;
/* number of pages for which data structures were allocated */
int max_pages;
+ int num_paged_out;
int num_pages;
int policy_mru_size;
unsigned long pagein_queue[XENPAGING_PAGEIN_QUEUE_SIZE];
++++++ 24216-xenpaging_move_page_add-resume_loops_into_its_own_function..patch
++++++
changeset: 24216:4fe585c2a3e5
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:39 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: move page add/resume loops into its own function.
Move page resume loop into its own function.
Move page eviction loop into its own function.
Allocate all possible slots in a paging file to allow growing and
shrinking of the number of paged-out pages. Adjust other places to
iterate over all slots.
This change is required by subsequent patches.
v2:
- check if victims allocation succeeded
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 94 ++++++++++++++++++++++++++++----------------
1 file changed, 61 insertions(+), 33 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -553,6 +553,27 @@ static int xenpaging_populate_page(xenpa
return ret;
}
+/* Trigger a page-in for a batch of pages */
+static void resume_pages(xenpaging_t *paging, int num_pages)
+{
+ xc_interface *xch = paging->xc_handle;
+ int i, num = 0;
+
+ for ( i = 0; i < paging->max_pages && num < num_pages; i++ )
+ {
+ if ( test_bit(i, paging->bitmap) )
+ {
+ paging->pagein_queue[num] = i;
+ num++;
+ if ( num == XENPAGING_PAGEIN_QUEUE_SIZE )
+ break;
+ }
+ }
+ /* num may be less than num_pages, caller has to try again */
+ if ( num )
+ page_in_trigger();
+}
+
static int evict_victim(xenpaging_t *paging,
xenpaging_victim_t *victim, int fd, int i)
{
@@ -596,6 +617,30 @@ static int evict_victim(xenpaging_t *pag
return ret;
}
+/* Evict a batch of pages and write them to a free slot in the paging file */
+static int evict_pages(xenpaging_t *paging, int fd, xenpaging_victim_t
*victims, int num_pages)
+{
+ xc_interface *xch = paging->xc_handle;
+ int rc, slot, num = 0;
+
+ for ( slot = 0; slot < paging->max_pages && num < num_pages; slot++ )
+ {
+ /* Slot is allocated */
+ if ( victims[slot].gfn != INVALID_MFN )
+ continue;
+
+ rc = evict_victim(paging, &victims[slot], fd, slot);
+ if ( rc == -ENOSPC )
+ break;
+ if ( rc == -EINTR )
+ break;
+ if ( num && num % 100 == 0 )
+ DPRINTF("%d pages evicted\n", num);
+ num++;
+ }
+ return num;
+}
+
int main(int argc, char *argv[])
{
struct sigaction act;
@@ -638,7 +683,14 @@ int main(int argc, char *argv[])
return 2;
}
- victims = calloc(paging->num_pages, sizeof(xenpaging_victim_t));
+ /* Allocate upper limit of pages to allow growing and shrinking */
+ victims = calloc(paging->max_pages, sizeof(xenpaging_victim_t));
+ if ( !victims )
+ goto out;
+
+ /* Mark all slots as unallocated */
+ for ( i = 0; i < paging->max_pages; i++ )
+ victims[i].gfn = INVALID_MFN;
/* ensure that if we get a signal, we'll do cleanup, then exit */
act.sa_handler = close_handler;
@@ -652,18 +704,7 @@ int main(int argc, char *argv[])
/* listen for page-in events to stop pager */
create_page_in_thread(paging);
- /* Evict pages */
- for ( i = 0; i < paging->num_pages; i++ )
- {
- rc = evict_victim(paging, &victims[i], fd, i);
- if ( rc == -ENOSPC )
- break;
- if ( rc == -EINTR )
- break;
- if ( i % 100 == 0 )
- DPRINTF("%d pages evicted\n", i);
- }
-
+ i = evict_pages(paging, fd, victims, paging->num_pages);
DPRINTF("%d pages evicted. Done.\n", i);
/* Swap pages in and out */
@@ -689,13 +730,13 @@ int main(int argc, char *argv[])
if ( test_and_clear_bit(req.gfn, paging->bitmap) )
{
/* Find where in the paging file to read from */
- for ( i = 0; i < paging->num_pages; i++ )
+ for ( i = 0; i < paging->max_pages; i++ )
{
if ( victims[i].gfn == req.gfn )
break;
}
- if ( i >= paging->num_pages )
+ if ( i >= paging->max_pages )
{
DPRINTF("Couldn't find page %"PRIx64"\n", req.gfn);
goto out;
@@ -767,25 +808,12 @@ int main(int argc, char *argv[])
/* Write all pages back into the guest */
if ( interrupted == SIGTERM || interrupted == SIGINT )
{
- int num = 0;
- for ( i = 0; i < paging->max_pages; i++ )
- {
- if ( test_bit(i, paging->bitmap) )
- {
- paging->pagein_queue[num] = i;
- num++;
- if ( num == XENPAGING_PAGEIN_QUEUE_SIZE )
- break;
- }
- }
- /*
- * One more round if there are still pages to process.
- * If no more pages to process, exit loop.
- */
- if ( num )
- page_in_trigger();
- else if ( i == paging->max_pages )
+ /* If no more pages to process, exit loop. */
+ if ( !paging->num_paged_out )
break;
+
+ /* One more round if there are still pages to process. */
+ resume_pages(paging, paging->num_paged_out);
}
else
{
++++++ 24217-xenpaging_improve_mainloop_exit_handling.patch ++++++
changeset: 24217:b531f5ceddf0
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:39 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: improve mainloop exit handling
Remove the if/else logic to exit from the in case a signal arrives.
Update comments.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -805,7 +805,7 @@ int main(int argc, char *argv[])
}
}
- /* Write all pages back into the guest */
+ /* If interrupted, write all pages back into the guest */
if ( interrupted == SIGTERM || interrupted == SIGINT )
{
/* If no more pages to process, exit loop. */
@@ -814,13 +814,15 @@ int main(int argc, char *argv[])
/* One more round if there are still pages to process. */
resume_pages(paging, paging->num_paged_out);
+
+ /* Resume main loop */
+ continue;
}
- else
- {
- /* Exit on any other signal */
- if ( interrupted )
- break;
- }
+
+ /* Exit main loop on any other signal */
+ if ( interrupted )
+ break;
+
}
DPRINTF("xenpaging got signal %d\n", interrupted);
++++++ 24218-libxc_add_bitmap_clear_function.patch ++++++
changeset: 24218:a2b4ae2becdf
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:40 2011 +0100
files: tools/libxc/xc_bitops.h
description:
libxc: add bitmap_clear function
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/libxc/xc_bitops.h | 6 ++++++
1 file changed, 6 insertions(+)
Index: xen-4.1.2-testing/tools/libxc/xc_bitops.h
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_bitops.h
+++ xen-4.1.2-testing/tools/libxc/xc_bitops.h
@@ -4,6 +4,7 @@
/* bitmap operations for single threaded access */
#include <stdlib.h>
+#include <string.h>
#define BITS_PER_LONG (sizeof(unsigned long) * 8)
#define ORDER_LONG (sizeof(unsigned long) == 4 ? 5 : 6)
@@ -25,6 +26,11 @@ static inline unsigned long *bitmap_allo
return calloc(1, bitmap_size(nr_bits));
}
+static inline void bitmap_clear(unsigned long *addr, int nr_bits)
+{
+ memset(addr, 0, bitmap_size(nr_bits));
+}
+
static inline int test_bit(int nr, volatile unsigned long *addr)
{
return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
++++++ 24219-xenpaging_retry_unpageable_gfns.patch ++++++
changeset: 24219:8eba32b9598c
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:41 2011 +0100
files: tools/xenpaging/policy_default.c
description:
xenpaging: retry unpageable gfns
Nomination of gfns can fail, but may succeed later.
Thats the case for a guest that starts ballooned.
v2:
- print debug when clearing uncosumed happens
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy_default.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -32,6 +32,7 @@ static unsigned int i_mru;
static unsigned int mru_size;
static unsigned long *bitmap;
static unsigned long *unconsumed;
+static unsigned int unconsumed_cleared;
static unsigned long current_gfn;
static unsigned long max_pages;
@@ -87,8 +88,21 @@ int policy_choose_victim(xenpaging_t *pa
current_gfn++;
if ( current_gfn >= max_pages )
current_gfn = 0;
+ /* Could not nominate any gfn */
if ( wrap == current_gfn )
{
+ /* Count wrap arounds */
+ unconsumed_cleared++;
+ /* Force retry every few seconds (depends on poll() timeout) */
+ if ( unconsumed_cleared > 123)
+ {
+ /* Force retry of unconsumed gfns */
+ bitmap_clear(unconsumed, max_pages);
+ unconsumed_cleared = 0;
+ DPRINTF("clearing unconsumed, wrap %lx", wrap);
+ /* One more round before returning ENOSPC */
+ continue;
+ }
victim->gfn = INVALID_MFN;
return -ENOSPC;
}
++++++ 24220-xenpaging_install_into_LIBEXEC_dir.patch ++++++
changeset: 24220:2087f21befc2
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:42 2011 +0100
files: tools/xenpaging/Makefile
description:
xenpaging: install into LIBEXEC dir
In preparation of upcoming libxl integration,
move xenpaging binary from /usr/sbin/ to /usr/lib/xen/bin/
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/Makefile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/Makefile
+++ xen-4.1.2-testing/tools/xenpaging/Makefile
@@ -29,8 +29,8 @@ xenpaging: $(OBJS)
install: all
$(INSTALL_DIR) $(DESTDIR)/var/lib/xen/xenpaging
- $(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
- $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(SBINDIR)
+ $(INSTALL_DIR) $(DESTDIR)$(LIBEXEC)
+ $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(LIBEXEC)
clean:
rm -f *.o *~ $(DEPS) xen TAGS $(IBINS) $(LIB)
++++++ 24221-xenpaging_add_XEN_PAGING_DIR_-_libxl_xenpaging_dir_path.patch
++++++
changeset: 24221:cd5948592b10
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:43 2011 +0100
files: Config.mk config/StdGNU.mk tools/libxl/libxl.h
tools/libxl/libxl_paths.c tools/xenpaging/Makefile
description:
xenpaging: add XEN_PAGING_DIR / libxl_xenpaging_dir_path()
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
Config.mk | 1 +
config/StdGNU.mk | 2 ++
tools/xenpaging/Makefile | 2 +-
3 files changed, 4 insertions(+), 1 deletion(-)
Index: xen-4.1.2-testing/Config.mk
===================================================================
--- xen-4.1.2-testing.orig/Config.mk
+++ xen-4.1.2-testing/Config.mk
@@ -124,6 +124,7 @@ define buildmakevars2file-closure
echo "XEN_CONFIG_DIR=\"$(XEN_CONFIG_DIR)\"" >> $(1).tmp; \
echo "XEN_SCRIPT_DIR=\"$(XEN_SCRIPT_DIR)\"" >> $(1).tmp; \
echo "XEN_LOCK_DIR=\"$(XEN_LOCK_DIR)\"" >> $(1).tmp; \
+ echo "XEN_PAGING_DIR=\"$(XEN_PAGING_DIR)\"" >> $(1).tmp; \
if ! cmp $(1).tmp $(1); then mv -f $(1).tmp $(1); fi
endef
Index: xen-4.1.2-testing/config/StdGNU.mk
===================================================================
--- xen-4.1.2-testing.orig/config/StdGNU.mk
+++ xen-4.1.2-testing/config/StdGNU.mk
@@ -46,9 +46,11 @@ PRIVATE_BINDIR = $(PRIVATE_PREFIX)/bin
ifeq ($(PREFIX),/usr)
CONFIG_DIR = /etc
XEN_LOCK_DIR = /var/lock
+XEN_PAGING_DIR = /var/lib/xen/xenpaging
else
CONFIG_DIR = $(PREFIX)/etc
XEN_LOCK_DIR = $(PREFIX)/var/lock
+XEN_PAGING_DIR = $(PREFIX)/var/lib/xen/xenpaging
endif
SYSCONFIG_DIR = $(CONFIG_DIR)/$(CONFIG_LEAF_DIR)
Index: xen-4.1.2-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/Makefile
+++ xen-4.1.2-testing/tools/xenpaging/Makefile
@@ -28,7 +28,7 @@ xenpaging: $(OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LDLIBS)
install: all
- $(INSTALL_DIR) $(DESTDIR)/var/lib/xen/xenpaging
+ $(INSTALL_DIR) $(DESTDIR)$(XEN_PAGING_DIR)
$(INSTALL_DIR) $(DESTDIR)$(LIBEXEC)
$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(LIBEXEC)
++++++ 24222-xenpaging_use_guests_tot_pages_as_working_target.patch ++++++
changeset: 24222:286a741b4d86
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:45 2011 +0100
files: tools/xenpaging/policy_default.c tools/xenpaging/xenpaging.c
tools/xenpaging/xenpaging.h
description:
xenpaging: use guests tot_pages as working target
This change reverses the task of xenpaging. Before this change a fixed number
of pages was paged out. With this change the guest will not have access to
more than the given number of pages at the same time.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy_default.c | 1
tools/xenpaging/xenpaging.c | 78 ++++++++++++++++++++++++++++++---------
tools/xenpaging/xenpaging.h | 2 -
3 files changed, 61 insertions(+), 20 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -71,7 +71,6 @@ int policy_init(xenpaging_t *paging)
/* Start in the middle to avoid paging during BIOS startup */
current_gfn = max_pages / 2;
- current_gfn -= paging->num_pages / 2;
rc = 0;
out:
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -136,6 +136,21 @@ err:
return rc;
}
+static int xenpaging_get_tot_pages(xenpaging_t *paging)
+{
+ xc_interface *xch = paging->xc_handle;
+ xc_domaininfo_t domain_info;
+ int rc;
+
+ rc = xc_domain_getinfolist(xch, paging->mem_event.domain_id, 1,
&domain_info);
+ if ( rc != 1 )
+ {
+ PERROR("Error getting domain info");
+ return -1;
+ }
+ return domain_info.tot_pages;
+}
+
static void *init_page(void)
{
void *buffer;
@@ -161,7 +176,7 @@ static void *init_page(void)
return NULL;
}
-static xenpaging_t *xenpaging_init(domid_t domain_id, int num_pages)
+static xenpaging_t *xenpaging_init(domid_t domain_id, int target_tot_pages)
{
xenpaging_t *paging;
xc_domaininfo_t domain_info;
@@ -296,12 +311,7 @@ static xenpaging_t *xenpaging_init(domid
}
DPRINTF("max_pages = %d\n", paging->max_pages);
- if ( num_pages < 0 || num_pages > paging->max_pages )
- {
- num_pages = paging->max_pages;
- DPRINTF("setting num_pages to %d\n", num_pages);
- }
- paging->num_pages = num_pages;
+ paging->target_tot_pages = target_tot_pages;
/* Initialise policy */
rc = policy_init(paging);
@@ -648,7 +658,9 @@ int main(int argc, char *argv[])
xenpaging_victim_t *victims;
mem_event_request_t req;
mem_event_response_t rsp;
+ int num, prev_num = 0;
int i;
+ int tot_pages;
int rc = -1;
int rc1;
xc_interface *xch;
@@ -659,7 +671,7 @@ int main(int argc, char *argv[])
if ( argc != 3 )
{
- fprintf(stderr, "Usage: %s <domain_id> <num_pages>\n", argv[0]);
+ fprintf(stderr, "Usage: %s <domain_id> <tot_pages>\n", argv[0]);
return -1;
}
@@ -672,7 +684,7 @@ int main(int argc, char *argv[])
}
xch = paging->xc_handle;
- DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
paging->num_pages);
+ DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
paging->target_tot_pages);
/* Open file */
sprintf(filename, "page_cache_%u", paging->mem_event.domain_id);
@@ -704,9 +716,6 @@ int main(int argc, char *argv[])
/* listen for page-in events to stop pager */
create_page_in_thread(paging);
- i = evict_pages(paging, fd, victims, paging->num_pages);
- DPRINTF("%d pages evicted. Done.\n", i);
-
/* Swap pages in and out */
while ( 1 )
{
@@ -771,12 +780,8 @@ int main(int argc, char *argv[])
goto out;
}
- /* Evict a new page to replace the one we just paged in,
- * or clear this pagefile slot on exit */
- if ( interrupted )
- victims[i].gfn = INVALID_MFN;
- else
- evict_victim(paging, &victims[i], fd, i);
+ /* Clear this pagefile slot */
+ victims[i].gfn = INVALID_MFN;
}
else
{
@@ -823,6 +828,43 @@ int main(int argc, char *argv[])
if ( interrupted )
break;
+ /* Check if the target has been reached already */
+ tot_pages = xenpaging_get_tot_pages(paging);
+ if ( tot_pages < 0 )
+ goto out;
+
+ /* Resume all pages if paging is disabled or no target was set */
+ if ( paging->target_tot_pages == 0 )
+ {
+ if ( paging->num_paged_out )
+ resume_pages(paging, paging->num_paged_out);
+ }
+ /* Evict more pages if target not reached */
+ else if ( tot_pages > paging->target_tot_pages )
+ {
+ num = tot_pages - paging->target_tot_pages;
+ if ( num != prev_num )
+ {
+ DPRINTF("Need to evict %d pages to reach %d
target_tot_pages\n", num, paging->target_tot_pages);
+ prev_num = num;
+ }
+ /* Limit the number of evicts to be able to process page-in
requests */
+ if ( num > 42 )
+ num = 42;
+ evict_pages(paging, fd, victims, num);
+ }
+ /* Resume some pages if target not reached */
+ else if ( tot_pages < paging->target_tot_pages &&
paging->num_paged_out )
+ {
+ num = paging->target_tot_pages - tot_pages;
+ if ( num != prev_num )
+ {
+ DPRINTF("Need to resume %d pages to reach %d
target_tot_pages\n", num, paging->target_tot_pages);
+ prev_num = num;
+ }
+ resume_pages(paging, num);
+ }
+
}
DPRINTF("xenpaging got signal %d\n", interrupted);
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -50,7 +50,7 @@ typedef struct xenpaging {
/* number of pages for which data structures were allocated */
int max_pages;
int num_paged_out;
- int num_pages;
+ int target_tot_pages;
int policy_mru_size;
unsigned long pagein_queue[XENPAGING_PAGEIN_QUEUE_SIZE];
} xenpaging_t;
++++++
24223-xenpaging_watch_the_guests_memory-target-tot_pages_xenstore_value.patch
++++++
changeset: 24223:9e3c2ef70c8a
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:47 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: watch the guests memory/target-tot_pages xenstore value
Subsequent patches will use xenstored to store the numbers of pages
xenpaging is suppose to page-out.
Remove num_pages and use target_pages instead.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 51 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 50 insertions(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -19,8 +19,10 @@
*/
#define _XOPEN_SOURCE 600
+#define _GNU_SOURCE
#include <inttypes.h>
+#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <time.h>
@@ -35,6 +37,10 @@
#include "policy.h"
#include "xenpaging.h"
+/* Defines number of mfns a guest should use at a time, in KiB */
+#define WATCH_TARGETPAGES "memory/target-tot_pages"
+static char *watch_target_tot_pages;
+static char *dom_path;
static char watch_token[16];
static char filename[80];
static int interrupted;
@@ -72,7 +78,7 @@ static int xenpaging_wait_for_event_or_t
{
xc_interface *xch = paging->xc_handle;
xc_evtchn *xce = paging->mem_event.xce_handle;
- char **vec;
+ char **vec, *val;
unsigned int num;
struct pollfd fd[2];
int port;
@@ -111,6 +117,25 @@ static int xenpaging_wait_for_event_or_t
rc = 0;
}
}
+ else if ( strcmp(vec[XS_WATCH_PATH], watch_target_tot_pages) == 0 )
+ {
+ int ret, target_tot_pages;
+ val = xs_read(paging->xs_handle, XBT_NULL, vec[XS_WATCH_PATH],
NULL);
+ if ( val )
+ {
+ ret = sscanf(val, "%d", &target_tot_pages);
+ if ( ret > 0 )
+ {
+ /* KiB to pages */
+ target_tot_pages >>= 2;
+ if ( target_tot_pages < 0 || target_tot_pages >
paging->max_pages )
+ target_tot_pages = paging->max_pages;
+ paging->target_tot_pages = target_tot_pages;
+ DPRINTF("new target_tot_pages %d\n", target_tot_pages);
+ }
+ free(val);
+ }
+ }
free(vec);
}
}
@@ -216,6 +241,25 @@ static xenpaging_t *xenpaging_init(domid
goto err;
}
+ /* Watch xenpagings working target */
+ dom_path = xs_get_domain_path(paging->xs_handle, domain_id);
+ if ( !dom_path )
+ {
+ PERROR("Could not find domain path\n");
+ goto err;
+ }
+ if ( asprintf(&watch_target_tot_pages, "%s/%s", dom_path,
WATCH_TARGETPAGES) < 0 )
+ {
+ PERROR("Could not alloc watch path\n");
+ goto err;
+ }
+ DPRINTF("watching '%s'\n", watch_target_tot_pages);
+ if ( xs_watch(paging->xs_handle, watch_target_tot_pages, "") == false )
+ {
+ PERROR("Could not bind to xenpaging watch\n");
+ goto err;
+ }
+
p = getenv("XENPAGING_POLICY_MRU_SIZE");
if ( p && *p )
{
@@ -342,6 +386,8 @@ static xenpaging_t *xenpaging_init(domid
free(paging->mem_event.ring_page);
}
+ free(dom_path);
+ free(watch_target_tot_pages);
free(paging->bitmap);
free(paging);
}
@@ -357,6 +403,9 @@ static int xenpaging_teardown(xenpaging_
if ( paging == NULL )
return 0;
+ xs_unwatch(paging->xs_handle, watch_target_tot_pages, "");
+ xs_unwatch(paging->xs_handle, "@releaseDomain", watch_token);
+
xch = paging->xc_handle;
paging->xc_handle = NULL;
/* Tear down domain paging in Xen */
++++++ 24224-xenpaging_add_cmdline_interface_for_pager.patch ++++++
changeset: 24224:7243fd87410e
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:48 2011 +0100
files: tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h
description:
xenpaging: add cmdline interface for pager
Introduce a cmdline handling for the pager. This simplifies libxl support,
debug and mru_size are not passed via the environment anymore.
The new interface looks like this:
xenpaging [options] -f <pagefile> -d <domain_id>
options:
-d <domid> --domain=<domid> numerical domain_id of guest. This
option is required.
-f <file> --pagefile=<file> pagefile to use. This option is
required.
-m <max_memkb> --max_memkb=<max_memkb> maximum amount of memory to handle.
-r <num> --mru_size=<num> number of paged-in pages to keep in
memory.
-d --debug enable debug output.
-h --help this output.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 139 ++++++++++++++++++++++++++++++++------------
tools/xenpaging/xenpaging.h | 1
2 files changed, 103 insertions(+), 37 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -31,6 +31,7 @@
#include <poll.h>
#include <xc_private.h>
#include <xs.h>
+#include <getopt.h>
#include "xc_bitops.h"
#include "file_ops.h"
@@ -42,12 +43,12 @@
static char *watch_target_tot_pages;
static char *dom_path;
static char watch_token[16];
-static char filename[80];
+static char *filename;
static int interrupted;
static void unlink_pagefile(void)
{
- if ( filename[0] )
+ if ( filename && filename[0] )
{
unlink(filename);
filename[0] = '\0';
@@ -201,11 +202,85 @@ static void *init_page(void)
return NULL;
}
-static xenpaging_t *xenpaging_init(domid_t domain_id, int target_tot_pages)
+static void usage(void)
+{
+ printf("usage:\n\n");
+
+ printf(" xenpaging [options] -f <pagefile> -d <domain_id>\n\n");
+
+ printf("options:\n");
+ printf(" -d <domid> --domain=<domid> numerical domain_id of
guest. This option is required.\n");
+ printf(" -f <file> --pagefile=<file> pagefile to use. This
option is required.\n");
+ printf(" -m <max_memkb> --max_memkb=<max_memkb> maximum amount of memory
to handle.\n");
+ printf(" -r <num> --mru_size=<num> number of paged-in pages
to keep in memory.\n");
+ printf(" -v --verbose enable debug output.\n");
+ printf(" -h --help this output.\n");
+}
+
+static int xenpaging_getopts(xenpaging_t *paging, int argc, char *argv[])
+{
+ int ch;
+ static const char sopts[] = "hvd:f:m:r:";
+ static const struct option lopts[] = {
+ {"help", 0, NULL, 'h'},
+ {"verbose", 0, NULL, 'v'},
+ {"domain", 1, NULL, 'd'},
+ {"pagefile", 1, NULL, 'f'},
+ {"mru_size", 1, NULL, 'm'},
+ { }
+ };
+
+ while ((ch = getopt_long(argc, argv, sopts, lopts, NULL)) != -1)
+ {
+ switch(ch) {
+ case 'd':
+ paging->mem_event.domain_id = atoi(optarg);
+ break;
+ case 'f':
+ filename = strdup(optarg);
+ break;
+ case 'm':
+ /* KiB to pages */
+ paging->max_pages = atoi(optarg) >> 2;
+ break;
+ case 'r':
+ paging->policy_mru_size = atoi(optarg);
+ break;
+ case 'v':
+ paging->debug = 1;
+ break;
+ case 'h':
+ case '?':
+ usage();
+ return 1;
+ }
+ }
+
+ argv += optind; argc -= optind;
+
+ /* Path to pagefile is required */
+ if ( !filename )
+ {
+ printf("Filename for pagefile missing!\n");
+ usage();
+ return 1;
+ }
+
+ /* Set domain id */
+ if ( !paging->mem_event.domain_id )
+ {
+ printf("Numerical <domain_id> missing!\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static xenpaging_t *xenpaging_init(int argc, char *argv[])
{
xenpaging_t *paging;
xc_domaininfo_t domain_info;
- xc_interface *xch;
+ xc_interface *xch = NULL;
xentoollog_logger *dbg = NULL;
char *p;
int rc;
@@ -215,7 +290,12 @@ static xenpaging_t *xenpaging_init(domid
if ( !paging )
goto err;
- if ( getenv("XENPAGING_DEBUG") )
+ /* Get cmdline options and domain_id */
+ if ( xenpaging_getopts(paging, argc, argv) )
+ goto err;
+
+ /* Enable debug output */
+ if ( paging->debug )
dbg = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr,
XTL_DEBUG, 0);
/* Open connection to xen */
@@ -234,7 +314,7 @@ static xenpaging_t *xenpaging_init(domid
}
/* write domain ID to watch so we can ignore other domain shutdowns */
- snprintf(watch_token, sizeof(watch_token), "%u", domain_id);
+ snprintf(watch_token, sizeof(watch_token), "%u",
paging->mem_event.domain_id);
if ( xs_watch(paging->xs_handle, "@releaseDomain", watch_token) == false )
{
PERROR("Could not bind to shutdown watch\n");
@@ -242,7 +322,7 @@ static xenpaging_t *xenpaging_init(domid
}
/* Watch xenpagings working target */
- dom_path = xs_get_domain_path(paging->xs_handle, domain_id);
+ dom_path = xs_get_domain_path(paging->xs_handle,
paging->mem_event.domain_id);
if ( !dom_path )
{
PERROR("Could not find domain path\n");
@@ -260,16 +340,6 @@ static xenpaging_t *xenpaging_init(domid
goto err;
}
- p = getenv("XENPAGING_POLICY_MRU_SIZE");
- if ( p && *p )
- {
- paging->policy_mru_size = atoi(p);
- DPRINTF("Setting policy mru_size to %d\n", paging->policy_mru_size);
- }
-
- /* Set domain id */
- paging->mem_event.domain_id = domain_id;
-
/* Initialise shared page */
paging->mem_event.shared_page = init_page();
if ( paging->mem_event.shared_page == NULL )
@@ -335,16 +405,20 @@ static xenpaging_t *xenpaging_init(domid
paging->mem_event.port = rc;
- rc = xc_domain_getinfolist(xch, paging->mem_event.domain_id, 1,
- &domain_info);
- if ( rc != 1 )
+ /* Get max_pages from guest if not provided via cmdline */
+ if ( !paging->max_pages )
{
- PERROR("Error getting domain info");
- goto err;
- }
+ rc = xc_domain_getinfolist(xch, paging->mem_event.domain_id, 1,
+ &domain_info);
+ if ( rc != 1 )
+ {
+ PERROR("Error getting domain info");
+ goto err;
+ }
- /* Record number of max_pages */
- paging->max_pages = domain_info.max_pages;
+ /* Record number of max_pages */
+ paging->max_pages = domain_info.max_pages;
+ }
/* Allocate bitmap for tracking pages that have been paged out */
paging->bitmap = bitmap_alloc(paging->max_pages);
@@ -355,8 +429,6 @@ static xenpaging_t *xenpaging_init(domid
}
DPRINTF("max_pages = %d\n", paging->max_pages);
- paging->target_tot_pages = target_tot_pages;
-
/* Initialise policy */
rc = policy_init(paging);
if ( rc != 0 )
@@ -718,25 +790,18 @@ int main(int argc, char *argv[])
mode_t open_mode = S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR | S_IWGRP |
S_IWOTH;
int fd;
- if ( argc != 3 )
- {
- fprintf(stderr, "Usage: %s <domain_id> <tot_pages>\n", argv[0]);
- return -1;
- }
-
/* Initialise domain paging */
- paging = xenpaging_init(atoi(argv[1]), atoi(argv[2]));
+ paging = xenpaging_init(argc, argv);
if ( paging == NULL )
{
- fprintf(stderr, "Error initialising paging");
+ fprintf(stderr, "Error initialising paging\n");
return 1;
}
xch = paging->xc_handle;
- DPRINTF("starting %s %u %d\n", argv[0], paging->mem_event.domain_id,
paging->target_tot_pages);
+ DPRINTF("starting %s for domain_id %u with pagefile %s\n", argv[0],
paging->mem_event.domain_id, filename);
/* Open file */
- sprintf(filename, "page_cache_%u", paging->mem_event.domain_id);
fd = open(filename, open_flags, open_mode);
if ( fd < 0 )
{
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.h
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.h
@@ -52,6 +52,7 @@ typedef struct xenpaging {
int num_paged_out;
int target_tot_pages;
int policy_mru_size;
+ int debug;
unsigned long pagein_queue[XENPAGING_PAGEIN_QUEUE_SIZE];
} xenpaging_t;
++++++ 24225-xenpaging_improve_policy_mru_list_handling.patch ++++++
changeset: 24225:d47d1ad56366
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 17:02:50 2011 +0100
files: tools/xenpaging/policy.h tools/xenpaging/policy_default.c
tools/xenpaging/xenpaging.c
description:
xenpaging: improve policy mru list handling
Without this change it is not possible to page-out all guest pages, then
trigger a page-in for all pages, and then page-out everything once
again. All pages in the mru list can not be paged out because they
remain active in the internal bitmap of paged pages.
Use the mru list only if the number of paged-out pages is larger than
the mru list. If the number is smaller, start to clear the mru list. In
case the number of paged-out pages drops to zero the mru list and the
internal bitmap will be empty as well.
Also add a new interface for dropped pages. If a gfn was dropped there
is no need to adjust the mru list because dropping a page is not usage
of a page.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/policy.h | 2 ++
tools/xenpaging/policy_default.c | 27 ++++++++++++++++++++++++---
tools/xenpaging/xenpaging.c | 11 +++++++++--
3 files changed, 35 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/policy.h
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy.h
+++ xen-4.1.2-testing/tools/xenpaging/policy.h
@@ -32,6 +32,8 @@ int policy_init(xenpaging_t *paging);
int policy_choose_victim(xenpaging_t *paging, xenpaging_victim_t *victim);
void policy_notify_paged_out(unsigned long gfn);
void policy_notify_paged_in(unsigned long gfn);
+void policy_notify_paged_in_nomru(unsigned long gfn);
+void policy_notify_dropped(unsigned long gfn);
#endif // __XEN_PAGING_POLICY_H__
Index: xen-4.1.2-testing/tools/xenpaging/policy_default.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.1.2-testing/tools/xenpaging/policy_default.c
@@ -57,7 +57,7 @@ int policy_init(xenpaging_t *paging)
if ( paging->policy_mru_size > 0 )
mru_size = paging->policy_mru_size;
else
- mru_size = DEFAULT_MRU_SIZE;
+ mru_size = paging->policy_mru_size = DEFAULT_MRU_SIZE;
mru = malloc(sizeof(*mru) * mru_size);
if ( mru == NULL )
@@ -120,17 +120,38 @@ void policy_notify_paged_out(unsigned lo
clear_bit(gfn, unconsumed);
}
-void policy_notify_paged_in(unsigned long gfn)
+static void policy_handle_paged_in(unsigned long gfn, int do_mru)
{
unsigned long old_gfn = mru[i_mru & (mru_size - 1)];
if ( old_gfn != INVALID_MFN )
clear_bit(old_gfn, bitmap);
- mru[i_mru & (mru_size - 1)] = gfn;
+ if (do_mru) {
+ mru[i_mru & (mru_size - 1)] = gfn;
+ } else {
+ clear_bit(gfn, bitmap);
+ mru[i_mru & (mru_size - 1)] = INVALID_MFN;
+ }
+
i_mru++;
}
+void policy_notify_paged_in(unsigned long gfn)
+{
+ policy_handle_paged_in(gfn, 1);
+}
+
+void policy_notify_paged_in_nomru(unsigned long gfn)
+{
+ policy_handle_paged_in(gfn, 0);
+}
+
+void policy_notify_dropped(unsigned long gfn)
+{
+ clear_bit(gfn, bitmap);
+}
+
/*
* Local variables:
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -615,7 +615,14 @@ static int xenpaging_resume_page(xenpagi
/* Notify policy of page being paged in */
if ( notify_policy )
{
- policy_notify_paged_in(rsp->gfn);
+ /*
+ * Do not add gfn to mru list if the target is lower than mru size.
+ * This allows page-out of these gfns if the target grows again.
+ */
+ if (paging->num_paged_out > paging->policy_mru_size)
+ policy_notify_paged_in(rsp->gfn);
+ else
+ policy_notify_paged_in_nomru(rsp->gfn);
/* Record number of resumed pages */
paging->num_paged_out--;
@@ -869,7 +876,7 @@ int main(int argc, char *argv[])
{
DPRINTF("drop_page ^ gfn %"PRIx64" pageslot %d\n",
req.gfn, i);
/* Notify policy of page being dropped */
- policy_notify_paged_in(req.gfn);
+ policy_notify_dropped(req.gfn);
}
else
{
++++++ 24226-xenpaging_add_debug_to_show_received_watch_event..patch ++++++
changeset: 24226:c9b75ccd3ebf
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 18:26:15 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: add debug to show received watch event.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 1 +
1 file changed, 1 insertion(+)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -108,6 +108,7 @@ static int xenpaging_wait_for_event_or_t
vec = xs_read_watch(paging->xs_handle, &num);
if ( vec )
{
+ DPRINTF("path '%s' token '%s'\n", vec[XS_WATCH_PATH],
vec[XS_WATCH_TOKEN]);
if ( strcmp(vec[XS_WATCH_TOKEN], watch_token) == 0 )
{
/* If our guest disappeared, set interrupt flag and fall
through */
++++++ 24227-xenpaging_restrict_pagefile_permissions.patch ++++++
changeset: 24227:1027e7d13d02
user: Olaf Hering <olaf@xxxxxxxxx>
date: Sun Nov 20 18:26:16 2011 +0100
files: tools/xenpaging/xenpaging.c
description:
xenpaging: restrict pagefile permissions
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
---
tools/xenpaging/xenpaging.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -795,7 +795,7 @@ int main(int argc, char *argv[])
xc_interface *xch;
int open_flags = O_CREAT | O_TRUNC | O_RDWR;
- mode_t open_mode = S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR | S_IWGRP |
S_IWOTH;
+ mode_t open_mode = S_IRUSR | S_IWUSR;
int fd;
/* Initialise domain paging */
++++++ 24231-waitqueue_Implement_wake_up_nroneall..patch ++++++
changeset: 24231:2a81ce2b2b93
user: Keir Fraser <keir@xxxxxxx>
date: Fri Nov 25 20:27:11 2011 +0000
files: xen/common/wait.c xen/include/xen/wait.h
description:
waitqueue: Implement wake_up_{nr,one,all}.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 14 ++++++++++++--
xen/include/xen/wait.h | 6 ++++--
2 files changed, 16 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -87,13 +87,13 @@ void init_waitqueue_head(struct waitqueu
INIT_LIST_HEAD(&wq->list);
}
-void wake_up(struct waitqueue_head *wq)
+void wake_up_nr(struct waitqueue_head *wq, unsigned int nr)
{
struct waitqueue_vcpu *wqv;
spin_lock(&wq->lock);
- while ( !list_empty(&wq->list) )
+ while ( !list_empty(&wq->list) && nr-- )
{
wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list);
list_del_init(&wqv->list);
@@ -103,6 +103,16 @@ void wake_up(struct waitqueue_head *wq)
spin_unlock(&wq->lock);
}
+void wake_up_one(struct waitqueue_head *wq)
+{
+ wake_up_nr(wq, 1);
+}
+
+void wake_up_all(struct waitqueue_head *wq)
+{
+ wake_up_nr(wq, UINT_MAX);
+}
+
#ifdef CONFIG_X86
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
Index: xen-4.1.2-testing/xen/include/xen/wait.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/wait.h
+++ xen-4.1.2-testing/xen/include/xen/wait.h
@@ -28,8 +28,10 @@ struct waitqueue_head {
/* Dynamically initialise a waitqueue. */
void init_waitqueue_head(struct waitqueue_head *wq);
-/* Wake all VCPUs waiting on specified waitqueue. */
-void wake_up(struct waitqueue_head *wq);
+/* Wake VCPU(s) waiting on specified waitqueue. */
+void wake_up_nr(struct waitqueue_head *wq, unsigned int nr);
+void wake_up_one(struct waitqueue_head *wq);
+void wake_up_all(struct waitqueue_head *wq);
/* Wait on specified waitqueue until @condition is true. */
#define wait_event(wq, condition) \
++++++ 24232-waitqueue_Hold_a_reference_to_a_domain_on_a_waitqueue..patch ++++++
changeset: 24232:95d4e2e0bed3
user: Keir Fraser <keir@xxxxxxx>
date: Fri Nov 25 20:32:05 2011 +0000
files: xen/common/wait.c xen/include/xen/wait.h
description:
waitqueue: Hold a reference to a domain on a waitqueue.
Also allow waitqueues to be dynamically destroyed.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/wait.c | 8 ++++++++
xen/include/xen/wait.h | 3 ++-
2 files changed, 10 insertions(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -87,6 +87,11 @@ void init_waitqueue_head(struct waitqueu
INIT_LIST_HEAD(&wq->list);
}
+void destroy_waitqueue_head(struct waitqueue_head *wq)
+{
+ wake_up_all(wq);
+}
+
void wake_up_nr(struct waitqueue_head *wq, unsigned int nr)
{
struct waitqueue_vcpu *wqv;
@@ -98,6 +103,7 @@ void wake_up_nr(struct waitqueue_head *w
wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list);
list_del_init(&wqv->list);
vcpu_unpause(wqv->vcpu);
+ put_domain(wqv->vcpu->domain);
}
spin_unlock(&wq->lock);
@@ -218,6 +224,7 @@ void prepare_to_wait(struct waitqueue_he
spin_lock(&wq->lock);
list_add_tail(&wqv->list, &wq->list);
vcpu_pause_nosync(curr);
+ get_knownalive_domain(curr->domain);
spin_unlock(&wq->lock);
}
@@ -236,6 +243,7 @@ void finish_wait(struct waitqueue_head *
{
list_del_init(&wqv->list);
vcpu_unpause(curr);
+ put_domain(curr->domain);
}
spin_unlock(&wq->lock);
}
Index: xen-4.1.2-testing/xen/include/xen/wait.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/wait.h
+++ xen-4.1.2-testing/xen/include/xen/wait.h
@@ -25,8 +25,9 @@ struct waitqueue_head {
.list = LIST_HEAD_INIT((name).list) \
}
-/* Dynamically initialise a waitqueue. */
+/* Dynamically initialise/destroy a waitqueue. */
void init_waitqueue_head(struct waitqueue_head *wq);
+void destroy_waitqueue_head(struct waitqueue_head *wq);
/* Wake VCPU(s) waiting on specified waitqueue. */
void wake_up_nr(struct waitqueue_head *wq, unsigned int nr);
++++++ 24261-x86-cpuidle-Westmere-EX.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1322645021 -3600
# Node ID 64088ba60263d3a623851b43a872c93c71cc3cbf
# Parent df7cec2c6c03f07932555954948ce7c8d09e88f4
x86/cpuidle: add Westmere-EX support to hw residencies reading logic
This is in accordance with
http://software.intel.com/en-us/articles/intel-processor-identification-with-cpuid-model-and-family-numbers/
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Haitao Shan <maillists.shan@xxxxxxxxx>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -113,6 +113,7 @@ static void do_get_hw_residencies(void *
/* Westmere */
case 0x25:
case 0x2C:
+ case 0x2F:
GET_PC3_RES(hw_res->pc3);
GET_PC6_RES(hw_res->pc6);
GET_PC7_RES(hw_res->pc7);
++++++ 24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch ++++++
changeset: 24269:2cbc53a24683
user: Olaf Hering <olaf@xxxxxxxxx>
date: Wed Nov 30 07:08:53 2011 -0800
files: xen/arch/x86/hvm/hvm.c xen/arch/x86/mm/mem_event.c
xen/arch/x86/mm/mem_sharing.c xen/arch/x86/mm/p2m.c xen/common/domain.c
xen/include/xen/sched.h
description:
mem_event: move mem_event_domain out of struct domain
An upcoming change may increase the size of mem_event_domain. The result
is a build failure because struct domain gets larger than a page.
Allocate the room for the three mem_event_domain members at runtime.
v2:
- remove mem_ prefix from members of new struct
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
---
xen/arch/x86/hvm/hvm.c | 4 ++--
xen/arch/x86/mm/mem_event.c | 6 +++---
xen/arch/x86/mm/mem_sharing.c | 6 +++---
xen/arch/x86/mm/p2m.c | 18 +++++++++---------
xen/common/domain.c | 5 +++++
xen/include/xen/sched.h | 18 ++++++++++++------
6 files changed, 34 insertions(+), 23 deletions(-)
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3909,7 +3909,7 @@ static int hvm_memory_event_traps(long p
if ( (p & HVMPME_onchangeonly) && (value == old) )
return 1;
- rc = mem_event_check_ring(d, &d->mem_access);
+ rc = mem_event_check_ring(d, &d->mem_event->access);
if ( rc )
return rc;
@@ -3932,7 +3932,7 @@ static int hvm_memory_event_traps(long p
req.gla_valid = 1;
}
- mem_event_put_request(d, &d->mem_access, &req);
+ mem_event_put_request(d, &d->mem_event->access, &req);
return 1;
}
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -252,7 +252,7 @@ int mem_event_domctl(struct domain *d, x
{
case XEN_DOMCTL_MEM_EVENT_OP_PAGING:
{
- struct mem_event_domain *med = &d->mem_paging;
+ struct mem_event_domain *med = &d->mem_event->paging;
rc = -EINVAL;
switch( mec->op )
@@ -297,7 +297,7 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS:
{
- struct mem_event_domain *med = &d->mem_access;
+ struct mem_event_domain *med = &d->mem_event->access;
rc = -EINVAL;
switch( mec->op )
@@ -320,7 +320,7 @@ int mem_event_domctl(struct domain *d, x
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
{
if ( med->ring_page )
- rc = mem_event_disable(&d->mem_access);
+ rc = mem_event_disable(med);
}
break;
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -322,12 +322,12 @@ static struct page_info* mem_sharing_all
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
- if(mem_event_check_ring(d, &d->mem_share)) return page;
+ if(mem_event_check_ring(d, &d->mem_event->share)) return page;
req.gfn = gfn;
req.p2mt = p2m_ram_shared;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &d->mem_share, &req);
+ mem_event_put_request(d, &d->mem_event->share, &req);
return page;
}
@@ -342,7 +342,7 @@ int mem_sharing_sharing_resume(struct do
mem_event_response_t rsp;
/* Get request off the ring */
- mem_event_get_response(&d->mem_share, &rsp);
+ mem_event_get_response(&d->mem_event->share, &rsp);
/* Unpause domain/vcpu */
if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2996,7 +2996,7 @@ void p2m_mem_paging_drop_page(struct p2m
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d, &d->mem_paging) == 0)
+ if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
{
/* Send release notification to pager */
memset(&req, 0, sizeof(req));
@@ -3004,7 +3004,7 @@ void p2m_mem_paging_drop_page(struct p2m
req.gfn = gfn;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &d->mem_paging, &req);
+ mem_event_put_request(d, &d->mem_event->paging, &req);
}
}
@@ -3039,7 +3039,7 @@ void p2m_mem_paging_populate(struct p2m_
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d, &d->mem_paging) )
+ if ( mem_event_check_ring(d, &d->mem_event->paging) )
return;
memset(&req, 0, sizeof(req));
@@ -3070,7 +3070,7 @@ void p2m_mem_paging_populate(struct p2m_
else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
{
/* gfn is already on its way back and vcpu is not paused */
- mem_event_put_req_producers(&d->mem_paging);
+ mem_event_put_req_producers(&d->mem_event->paging);
return;
}
@@ -3079,7 +3079,7 @@ void p2m_mem_paging_populate(struct p2m_
req.p2mt = p2mt;
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &d->mem_paging, &req);
+ mem_event_put_request(d, &d->mem_event->paging, &req);
}
/**
@@ -3157,7 +3157,7 @@ void p2m_mem_paging_resume(struct p2m_do
mfn_t mfn;
/* Pull the response off the ring */
- mem_event_get_response(&d->mem_paging, &rsp);
+ mem_event_get_response(&d->mem_event->paging, &rsp);
/* Fix p2m entry if the page was not dropped */
if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
@@ -3210,7 +3210,7 @@ void p2m_mem_access_check(unsigned long
p2m_unlock(p2m);
/* Otherwise, check if there is a memory event listener, and send the
message along */
- res = mem_event_check_ring(d, &d->mem_access);
+ res = mem_event_check_ring(d, &d->mem_event->access);
if ( res < 0 )
{
/* No listener */
@@ -3254,7 +3254,7 @@ void p2m_mem_access_check(unsigned long
req.vcpu_id = v->vcpu_id;
- mem_event_put_request(d, &d->mem_access, &req);
+ mem_event_put_request(d, &d->mem_event->access, &req);
/* VCPU paused, mem event request sent */
}
@@ -3264,7 +3264,7 @@ void p2m_mem_access_resume(struct p2m_do
struct domain *d = p2m->domain;
mem_event_response_t rsp;
- mem_event_get_response(&d->mem_access, &rsp);
+ mem_event_get_response(&d->mem_event->access, &rsp);
/* Unpause domain */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -298,6 +298,10 @@ struct domain *domain_create(
init_status |= INIT_gnttab;
poolid = 0;
+
+ d->mem_event = xzalloc(struct mem_event_per_domain);
+ if ( !d->mem_event )
+ goto fail;
}
if ( arch_domain_create(d, domcr_flags) != 0 )
@@ -329,6 +333,7 @@ struct domain *domain_create(
fail:
d->is_dying = DOMDYING_dead;
atomic_set(&d->refcnt, DOMAIN_DESTROYED);
+ xfree(d->mem_event);
if ( init_status & INIT_arch )
arch_domain_destroy(d);
if ( init_status & INIT_gnttab )
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -201,6 +201,16 @@ struct mem_event_domain
int xen_port;
};
+struct mem_event_per_domain
+{
+ /* Memory sharing support */
+ struct mem_event_domain share;
+ /* Memory paging support */
+ struct mem_event_domain paging;
+ /* Memory access support */
+ struct mem_event_domain access;
+};
+
struct domain
{
domid_t domain_id;
@@ -327,12 +337,8 @@ struct domain
/* Non-migratable and non-restoreable? */
bool_t disable_migrate;
- /* Memory sharing support */
- struct mem_event_domain mem_share;
- /* Memory paging support */
- struct mem_event_domain mem_paging;
- /* Memory access support */
- struct mem_event_domain mem_access;
+ /* Various mem_events */
+ struct mem_event_per_domain *mem_event;
/* Currently computed from union of all vcpu cpu-affinity masks. */
nodemask_t node_affinity;
++++++ 24270-Free_d-mem_event_on_domain_destruction..patch ++++++
changeset: 24270:08716a7f1b74
user: Keir Fraser <keir@xxxxxxx>
date: Wed Nov 30 07:12:41 2011 -0800
files: xen/common/domain.c
description:
Free d->mem_event on domain destruction.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/domain.c | 2 ++
1 file changed, 2 insertions(+)
Index: xen-4.1.2-testing/xen/common/domain.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/domain.c
+++ xen-4.1.2-testing/xen/common/domain.c
@@ -661,6 +661,8 @@ static void complete_domain_destroy(stru
/* Free page used by xen oprofile buffer. */
free_xenoprof_pages(d);
+ xfree(d->mem_event);
+
for ( i = d->max_vcpus - 1; i >= 0; i-- )
if ( (v = d->vcpu[i]) != NULL )
free_vcpu_struct(v);
++++++
24272-xenpaging_Fix_c-s_235070a29c8c3ddf7_update_machine_to_phys_mapping_during_page_deallocation.patch
++++++
changeset: 24272:62ff6a318c5d
user: Keir Fraser <keir@xxxxxxx>
date: Wed Nov 30 16:59:58 2011 -0800
files: xen/common/page_alloc.c
description:
xenpaging: Fix c/s 23507:0a29c8c3ddf7 ("update machine_to_phys_mapping[] during
page deallocation")
This patch clobbers page owner in free_heap_pages() before we are
finished using it. This means that a subsequent test to determine
whether it is safe to avoid safety TLB flushes incorrectly always
determines that it is safe to do so.
The fix is simple: we can defer the original patch's work until after
we are done with the page-owner field.
Thanks to Christian Limpach for spotting this one.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
---
xen/common/page_alloc.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/page_alloc.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/page_alloc.c
+++ xen-4.1.2-testing/xen/common/page_alloc.c
@@ -540,10 +540,6 @@ static void free_heap_pages(
for ( i = 0; i < (1 << order); i++ )
{
- /* This page is not a guest frame any more. */
- page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
- set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY);
-
/*
* Cannot assume that count_info == 0, as there are some corner cases
* where it isn't the case and yet it isn't a bug:
@@ -567,6 +563,10 @@ static void free_heap_pages(
pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
if ( pg[i].u.free.need_tlbflush )
pg[i].tlbflush_timestamp = tlbflush_current_time();
+
+ /* This page is not a guest frame any more. */
+ page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
+ set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY);
}
avail[node][zone] += 1 << order;
++++++ 24275-x86-emul-lzcnt.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1322725849 -3600
# Node ID 76ea126f21724b72c120aff59460f7bbe9e6960d
# Parent 07cf778d517fdf661a34027af653a489489bf222
x86/emulator: properly handle lzcnt and tzcnt
These instructions are prefix selected flavors of bsf and bsr
respectively, and hence the presences of the F3 prefix must be handled
in the emulation code in order to avoid running into problems on newer
CPUs.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -990,6 +990,9 @@ static bool_t vcpu_has(
return rc == X86EMUL_OKAY;
}
+#define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops)
+#define vcpu_has_bmi1() vcpu_has(0x00000007, EBX, 3, ctxt, ops)
+
#define vcpu_must_have(leaf, reg, bit) \
generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
#define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
@@ -4114,13 +4117,24 @@ x86_emulate(
dst.val = (uint8_t)src.val;
break;
- case 0xbc: /* bsf */ {
- int zf;
+ case 0xbc: /* bsf or tzcnt */ {
+ bool_t zf;
asm ( "bsf %2,%0; setz %b1"
: "=r" (dst.val), "=q" (zf)
- : "r" (src.val), "1" (0) );
+ : "r" (src.val) );
_regs.eflags &= ~EFLG_ZF;
- if ( zf )
+ if ( (rep_prefix == REPE_PREFIX) && vcpu_has_bmi1() )
+ {
+ _regs.eflags &= ~EFLG_CF;
+ if ( zf )
+ {
+ _regs.eflags |= EFLG_CF;
+ dst.val = op_bytes * 8;
+ }
+ else if ( !dst.val )
+ _regs.eflags |= EFLG_ZF;
+ }
+ else if ( zf )
{
_regs.eflags |= EFLG_ZF;
dst.type = OP_NONE;
@@ -4128,13 +4142,28 @@ x86_emulate(
break;
}
- case 0xbd: /* bsr */ {
- int zf;
+ case 0xbd: /* bsr or lzcnt */ {
+ bool_t zf;
asm ( "bsr %2,%0; setz %b1"
: "=r" (dst.val), "=q" (zf)
- : "r" (src.val), "1" (0) );
+ : "r" (src.val) );
_regs.eflags &= ~EFLG_ZF;
- if ( zf )
+ if ( (rep_prefix == REPE_PREFIX) && vcpu_has_lzcnt() )
+ {
+ _regs.eflags &= ~EFLG_CF;
+ if ( zf )
+ {
+ _regs.eflags |= EFLG_CF;
+ dst.val = op_bytes * 8;
+ }
+ else
+ {
+ dst.val = op_bytes * 8 - 1 - dst.val;
+ if ( !dst.val )
+ _regs.eflags |= EFLG_ZF;
+ }
+ }
+ else if ( zf )
{
_regs.eflags |= EFLG_ZF;
dst.type = OP_NONE;
++++++ 24277-x86-dom0-features.patch ++++++
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1322738484 -3600
# Node ID 1f6b58c8e1ba8d27dfb97f0da96d18d3ad163317
# Parent 89f7273681696022cc44db4f2ec5b22560482869
X86: expose Intel new features to dom0
This patch expose Intel new features to dom0, including
FMA/AVX2/BMI1/BMI2/LZCNT/MOVBE.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -848,8 +848,11 @@ static void pv_cpuid(struct cpu_user_reg
break;
case 7:
if ( regs->ecx == 0 )
- b &= (cpufeat_mask(X86_FEATURE_FSGSBASE) |
- cpufeat_mask(X86_FEATURE_ERMS));
+ b &= (cpufeat_mask(X86_FEATURE_BMI1) |
+ cpufeat_mask(X86_FEATURE_AVX2) |
+ cpufeat_mask(X86_FEATURE_BMI2) |
+ cpufeat_mask(X86_FEATURE_ERMS) |
+ cpufeat_mask(X86_FEATURE_FSGSBASE));
else
b = 0;
a = c = d = 0;
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -93,6 +93,7 @@
#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental Streaming SIMD
Extensions-3 */
#define X86_FEATURE_CID (4*32+10) /* Context ID */
+#define X86_FEATURE_FMA (4*32+12) /* Fused Multiply Add */
#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
#define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capability MSR */
@@ -100,6 +101,7 @@
#define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */
#define X86_FEATURE_X2APIC (4*32+21) /* Extended xAPIC */
+#define X86_FEATURE_MOVBE (4*32+22) /* movbe instruction */
#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
#define X86_FEATURE_TSC_DEADLINE (4*32+24) /* "tdt" TSC Deadline Timer */
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
@@ -144,7 +146,10 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */
#define X86_FEATURE_FSGSBASE (7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_BMI1 (7*32+ 3) /* 1st bit manipulation extensions */
+#define X86_FEATURE_AVX2 (7*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution
Protection */
+#define X86_FEATURE_BMI2 (7*32+ 8) /* 2nd bit manipulation extensions */
#define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
++++++ 24278-x86-dom0-no-PCID.patch ++++++
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1322738563 -3600
# Node ID d9cb04ed55398ea4043c85573460afaf023aa1e9
# Parent 1f6b58c8e1ba8d27dfb97f0da96d18d3ad163317
X86: Disable PCID/INVPCID for dom0
PCID (Process-context identifier) is a facility by which a logical
processor may cache information for multiple linear-address spaces.
INVPCID is an new instruction to invalidate TLB. Refer latest Intel SDM
http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html
We disable PCID/INVPCID for dom0 and pv. Exposing them into dom0 and pv
may result in performance regression, and it would trigger GP or UD
depending on whether platform suppport INVPCID or not.
This patch disables PCID/INVPCID for dom0.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -836,6 +836,7 @@ static void pv_cpuid(struct cpu_user_reg
__clear_bit(X86_FEATURE_CX16 % 32, &c);
__clear_bit(X86_FEATURE_XTPR % 32, &c);
__clear_bit(X86_FEATURE_PDCM % 32, &c);
+ __clear_bit(X86_FEATURE_PCID % 32, &c);
__clear_bit(X86_FEATURE_DCA % 32, &c);
if ( !xsave_enabled(current) )
{
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -97,6 +97,7 @@
#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
#define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capability MSR */
+#define X86_FEATURE_PCID (4*32+17) /* Process Context ID */
#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
#define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */
@@ -151,6 +152,7 @@
#define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution
Protection */
#define X86_FEATURE_BMI2 (7*32+ 8) /* 2nd bit manipulation extensions */
#define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID (7*32+10) /* Invalidate Process Context ID */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
++++++ 24282-x86-log-dirty-bitmap-leak.patch ++++++
# HG changeset patch
# User Tim Deegan <tim@xxxxxxx>
# Date 1322749036 0
# Node ID a06cda9fb25f2d7b7b5c7da170813e4a8bb0cd67
# Parent 75f4e4d9f039ea656051e6dfd73e40d4cb32896b
x86/mm: Don't lose track of the log dirty bitmap
hap_log_dirty_init unconditionally sets the top of the log dirty
bitmap to INVALID_MFN. If there had been a bitmap allocated, it is
then leaked, and the host crashes on an ASSERT when the domain is
cleaned up.
Signed-off-by: Tim Deegan <tim@xxxxxxx>
Acked-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -665,7 +665,6 @@ void paging_log_dirty_init(struct domain
d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty;
d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty;
d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
- d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
}
/* This function fress log dirty bitmap resources. */
@@ -686,6 +685,11 @@ int paging_domain_init(struct domain *d,
if ( (rc = p2m_init(d)) != 0 )
return rc;
+ /* This must be initialized separately from the rest of the
+ * log-dirty init code as that can be called more than once and we
+ * don't want to leak any active log-dirty bitmaps */
+ d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
+
/* The order of the *_init calls below is important, as the later
* ones may rewrite some common fields. Shadow pagetables are the
* default... */
++++++ 24318-x86-mm_Fix_checks_during_foreign_mapping_of_paged_pages.patch
++++++
changeset: 24318:f25a004a6de8
user: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
date: Thu Dec 01 17:21:24 2011 +0000
files: xen/arch/x86/mm.c
description:
x86/mm: Fix checks during foreign mapping of paged pages
Check that the valid mfn is the one we are mapping, not the
mfn of the page table of the foreign domain.
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.1.2-testing/xen/arch/x86/mm.c
@@ -3492,8 +3492,9 @@ int do_mmu_update(
{
l1_pgentry_t l1e = l1e_from_intpte(req.val);
p2m_type_t l1e_p2mt;
+ unsigned long l1emfn = mfn_x(
gfn_to_mfn(p2m_get_hostp2m(pg_owner),
- l1e_get_pfn(l1e), &l1e_p2mt);
+ l1e_get_pfn(l1e), &l1e_p2mt));
if ( p2m_is_paged(l1e_p2mt) )
{
@@ -3502,7 +3503,8 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l1e_p2mt &&
!mfn_valid(mfn) )
+ else if ( p2m_ram_paging_in_start == l1e_p2mt &&
+ !mfn_valid(l1emfn) )
{
rc = -ENOENT;
break;
@@ -3534,7 +3536,8 @@ int do_mmu_update(
{
l2_pgentry_t l2e = l2e_from_intpte(req.val);
p2m_type_t l2e_p2mt;
- gfn_to_mfn(p2m_get_hostp2m(pg_owner), l2e_get_pfn(l2e),
&l2e_p2mt);
+ unsigned long l2emfn = mfn_x(
+ gfn_to_mfn(p2m_get_hostp2m(pg_owner), l2e_get_pfn(l2e),
&l2e_p2mt));
if ( p2m_is_paged(l2e_p2mt) )
{
@@ -3543,7 +3546,8 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l2e_p2mt &&
!mfn_valid(mfn) )
+ else if ( p2m_ram_paging_in_start == l2e_p2mt &&
+ !mfn_valid(l2emfn) )
{
rc = -ENOENT;
break;
@@ -3563,7 +3567,8 @@ int do_mmu_update(
{
l3_pgentry_t l3e = l3e_from_intpte(req.val);
p2m_type_t l3e_p2mt;
- gfn_to_mfn(p2m_get_hostp2m(pg_owner), l3e_get_pfn(l3e),
&l3e_p2mt);
+ unsigned long l3emfn = mfn_x(
+ gfn_to_mfn(p2m_get_hostp2m(pg_owner), l3e_get_pfn(l3e),
&l3e_p2mt));
if ( p2m_is_paged(l3e_p2mt) )
{
@@ -3572,7 +3577,8 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l3e_p2mt &&
!mfn_valid(mfn) )
+ else if ( p2m_ram_paging_in_start == l3e_p2mt &&
+ !mfn_valid(l3emfn) )
{
rc = -ENOENT;
break;
@@ -3592,8 +3598,9 @@ int do_mmu_update(
{
l4_pgentry_t l4e = l4e_from_intpte(req.val);
p2m_type_t l4e_p2mt;
+ unsigned long l4emfn = mfn_x(
gfn_to_mfn(p2m_get_hostp2m(pg_owner),
- l4e_get_pfn(l4e), &l4e_p2mt);
+ l4e_get_pfn(l4e), &l4e_p2mt));
if ( p2m_is_paged(l4e_p2mt) )
{
@@ -3602,7 +3609,8 @@ int do_mmu_update(
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l4e_p2mt &&
!mfn_valid(mfn) )
+ else if ( p2m_ram_paging_in_start == l4e_p2mt &&
+ !mfn_valid(l4emfn) )
{
rc = -ENOENT;
break;
++++++
24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch
++++++
changeset: 24327:8529bca7a3f0
parent: 24322:6bac46816504
user: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
date: Thu Dec 01 18:14:24 2011 +0000
files: xen/arch/x86/mm/mem_event.c xen/arch/x86/mm/mem_paging.c
xen/arch/x86/mm/p2m.c xen/include/asm-x86/p2m.h xen/include/public/domctl.h
description:
After preparing a page for page-in, allow immediate fill-in of the page contents
p2m_mem_paging_prep ensures that an mfn is backing the paged-out gfn, and
transitions to the next state in the paging state machine for that page.
Foreign mappings of the gfn will now succeed. This is the key idea, as
it allows the pager to now map the gfn and fill in its contents.
Unfortunately, it also allows any other foreign mapper to map the gfn and read
its contents. This is particularly dangerous when the populate is launched
by a foreign mapper in the first place, which will be actively retrying the
map operation and might race with the pager. Qemu-dm being a prime example.
Fix the race by allowing a buffer to be optionally passed in the prep
operation, and having the hypervisor memcpy from that buffer into the newly
prepped page before promoting the gfn type.
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
xen/arch/x86/mm/mem_event.c | 2 +-
xen/arch/x86/mm/mem_paging.c | 2 +-
xen/arch/x86/mm/p2m.c | 32 ++++++++++++++++++++++++++++++--
xen/include/asm-x86/p2m.h | 2 +-
xen/include/public/domctl.h | 8 ++++++--
5 files changed, 39 insertions(+), 7 deletions(-)
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -45,7 +45,7 @@ static int mem_event_enable(struct domai
struct domain *dom_mem_event = current->domain;
struct vcpu *v = current;
unsigned long ring_addr = mec->ring_addr;
- unsigned long shared_addr = mec->shared_addr;
+ unsigned long shared_addr = mec->u.shared_addr;
l1_pgentry_t l1e;
unsigned long gfn;
p2m_type_t p2mt;
--- a/xen/arch/x86/mm/mem_paging.c
+++ b/xen/arch/x86/mm/mem_paging.c
@@ -50,7 +50,7 @@ int mem_paging_domctl(struct domain *d,
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP:
{
unsigned long gfn = mec->gfn;
- rc = p2m_mem_paging_prep(p2m, gfn);
+ rc = p2m_mem_paging_prep(p2m, gfn, mec->u.buffer);
}
break;
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -3093,13 +3093,20 @@ void p2m_mem_paging_populate(struct p2m_
* mfn if populate was called for gfn which was nominated but not evicted. In
* this case only the p2mt needs to be forwarded.
*/
-int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn)
+int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn, uint64_t
buffer)
{
struct page_info *page;
p2m_type_t p2mt;
p2m_access_t a;
mfn_t mfn;
- int ret;
+ int ret, page_extant = 1;
+ const void *user_ptr = (const void *) buffer;
+
+ if ( user_ptr )
+ /* Sanity check the buffer and bail out early if trouble */
+ if ( (buffer & (PAGE_SIZE - 1)) ||
+ (!access_ok(user_ptr, PAGE_SIZE)) )
+ return -EINVAL;
p2m_lock(p2m);
@@ -3119,6 +3126,28 @@ int p2m_mem_paging_prep(struct p2m_domai
if ( unlikely(page == NULL) )
goto out;
mfn = page_to_mfn(page);
+ page_extant = 0;
+ }
+
+ /* If we were given a buffer, now is the time to use it */
+ if ( !page_extant && user_ptr )
+ {
+ void *guest_map;
+ int rc;
+
+ ASSERT( mfn_valid(mfn) );
+ guest_map = map_domain_page(mfn_x(mfn));
+ rc = copy_from_user(guest_map, user_ptr, PAGE_SIZE);
+ unmap_domain_page(guest_map);
+ if ( rc )
+ {
+ gdprintk(XENLOG_ERR, "Failed to load paging-in gfn %lx domain %u "
+ "bytes left %d\n",
+ gfn, p2m->domain->domain_id, rc);
+ ret = -EFAULT;
+ put_page(page); /* Don't leak pages */
+ goto out;
+ }
}
/* Fix p2m mapping */
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -524,7 +524,7 @@ void p2m_mem_paging_drop_page(struct p2m
/* Start populating a paged out frame */
void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn);
/* Prepare the p2m for paging a frame in */
-int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn);
+int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn, uint64_t
buffer);
/* Resume normal operation (in case a domain was paused) */
void p2m_mem_paging_resume(struct p2m_domain *p2m);
#else
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -741,8 +741,12 @@ struct xen_domctl_mem_event_op {
uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */
uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */
- /* OP_ENABLE */
- uint64_aligned_t shared_addr; /* IN: Virtual address of shared page */
+ union {
+ /* OP_ENABLE IN: Virtual address of shared page */
+ uint64_aligned_t shared_addr;
+ /* PAGING_PREP IN: buffer to immediately fill page in */
+ uint64_aligned_t buffer;
+ } u;
uint64_aligned_t ring_addr; /* IN: Virtual address of ring page */
/* Other OPs */
++++++
24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch
++++++
changeset: 24328:8ad47b48047d
user: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
date: Thu Dec 01 18:14:24 2011 +0000
files: tools/libxc/xc_mem_event.c tools/libxc/xc_mem_paging.c
tools/libxc/xenctrl.h
description:
Tools: Libxc wrappers to automatically fill in page oud page contents on prepare
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/libxc/xc_mem_event.c | 4 ++--
tools/libxc/xc_mem_paging.c | 23 +++++++++++++++++++++++
tools/libxc/xenctrl.h | 2 ++
3 files changed, 27 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/libxc/xc_mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_mem_event.c
+++ xen-4.1.2-testing/tools/libxc/xc_mem_event.c
@@ -24,7 +24,7 @@
#include "xc_private.h"
int xc_mem_event_control(xc_interface *xch, domid_t domain_id, unsigned int op,
- unsigned int mode, void *shared_page,
+ unsigned int mode, void *page,
void *ring_page, unsigned long gfn)
{
DECLARE_DOMCTL;
@@ -34,7 +34,7 @@ int xc_mem_event_control(xc_interface *x
domctl.u.mem_event_op.op = op;
domctl.u.mem_event_op.mode = mode;
- domctl.u.mem_event_op.shared_addr = (unsigned long)shared_page;
+ domctl.u.mem_event_op.u.shared_addr = (unsigned long)page;
domctl.u.mem_event_op.ring_addr = (unsigned long)ring_page;
domctl.u.mem_event_op.gfn = gfn;
Index: xen-4.1.2-testing/tools/libxc/xc_mem_paging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_mem_paging.c
+++ xen-4.1.2-testing/tools/libxc/xc_mem_paging.c
@@ -65,6 +65,29 @@ int xc_mem_paging_prep(xc_interface *xch
NULL, NULL, gfn);
}
+int xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
+ unsigned long gfn, void *buffer)
+{
+ int rc;
+
+ if ( !buffer )
+ return -EINVAL;
+
+ if ( ((unsigned long) buffer) & (XC_PAGE_SIZE - 1) )
+ return -EINVAL;
+
+ if ( mlock(buffer, XC_PAGE_SIZE) )
+ return -errno;
+
+ rc = xc_mem_event_control(xch, domain_id,
+ XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP,
+ XEN_DOMCTL_MEM_EVENT_OP_PAGING,
+ buffer, NULL, gfn);
+
+ (void)munlock(buffer, XC_PAGE_SIZE);
+ return rc;
+}
+
int xc_mem_paging_resume(xc_interface *xch, domid_t domain_id, unsigned long
gfn)
{
return xc_mem_event_control(xch, domain_id,
Index: xen-4.1.2-testing/tools/libxc/xenctrl.h
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xenctrl.h
+++ xen-4.1.2-testing/tools/libxc/xenctrl.h
@@ -1742,6 +1742,8 @@ int xc_mem_paging_nominate(xc_interface
unsigned long gfn);
int xc_mem_paging_evict(xc_interface *xch, domid_t domain_id, unsigned long
gfn);
int xc_mem_paging_prep(xc_interface *xch, domid_t domain_id, unsigned long
gfn);
+int xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
+ unsigned long gfn, void *buffer);
int xc_mem_paging_resume(xc_interface *xch, domid_t domain_id,
unsigned long gfn);
++++++
24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch
++++++
changeset: 24329:a8f5faa127c4
user: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
date: Thu Dec 01 18:14:24 2011 +0000
files: tools/xenpaging/xenpaging.c
description:
Teach xenpaging to use the new and non-racy xc_mem_paging_load interface
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
---
tools/xenpaging/xenpaging.c | 43 +++++++++++++++++++++----------------------
1 file changed, 21 insertions(+), 22 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -45,6 +45,7 @@ static char *dom_path;
static char watch_token[16];
static char *filename;
static int interrupted;
+static void *paging_buffer = NULL;
static void unlink_pagefile(void)
{
@@ -438,6 +439,13 @@ static xenpaging_t *xenpaging_init(int a
goto err;
}
+ paging_buffer = init_page();
+ if ( !paging_buffer )
+ {
+ ERROR("Creating page aligned load buffer");
+ goto err;
+ }
+
return paging;
err:
@@ -649,10 +657,20 @@ static int xenpaging_populate_page(xenpa
unsigned char oom = 0;
DPRINTF("populate_page < gfn %"PRI_xen_pfn" pageslot %d\n", gfn, i);
+
+ /* Read page */
+ ret = read_page(fd, paging_buffer, i);
+ if ( ret != 0 )
+ {
+ ERROR("Error reading page");
+ goto out;
+ }
+
do
{
/* Tell Xen to allocate a page for the domain */
- ret = xc_mem_paging_prep(xch, paging->mem_event.domain_id, gfn);
+ ret = xc_mem_paging_load(xch, paging->mem_event.domain_id, gfn,
+ paging_buffer);
if ( ret != 0 )
{
if ( errno == ENOMEM )
@@ -662,33 +680,14 @@ static int xenpaging_populate_page(xenpa
sleep(1);
continue;
}
- PERROR("Error preparing %"PRI_xen_pfn" for page-in", gfn);
- goto out_map;
+ PERROR("Error loading %"PRI_xen_pfn" during page-in", gfn);
+ goto out;
}
}
while ( ret && !interrupted );
- /* Map page */
- ret = -EFAULT;
- page = xc_map_foreign_pages(xch, paging->mem_event.domain_id,
- PROT_READ | PROT_WRITE, &gfn, 1);
- if ( page == NULL )
- {
- PERROR("Error mapping page %"PRI_xen_pfn": page is null", gfn);
- goto out_map;
- }
-
- /* Read page */
- ret = read_page(fd, page, i);
- if ( ret != 0 )
- {
- PERROR("Error reading page %"PRI_xen_pfn"", gfn);
- goto out;
- }
out:
- munmap(page, PAGE_SIZE);
- out_map:
return ret;
}
++++++
24344-tools-x86_64_Fix_cpuid_inline_asm_to_not_clobber_stacks_red_zone.patch
++++++
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1322836274 28800
# Node ID 72f4e4cb7440c6ab64d4c08dfdc3158112cc95ac
# Parent 109b99239b21275ee2249873dcdb9a413741142d
tools/x86_64: Fix cpuid() inline asm to not clobber stack's red zone
Pushing stuff onto the stack on x86-64 when we do not specify
-mno-red-zone is unsafe. Since the complicated asm is due to register
pressure on i386, we simply implement an all-new simpler alternative
for x86-64.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxxxx>
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1322844002 28800
# Node ID 491c3ebf1d371d03fdd0aabe82b0f422037c67ba
# Parent 72f4e4cb7440c6ab64d4c08dfdc3158112cc95ac
tools/libxc: Fix x86_32 build breakage in previous changeset.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -42,23 +42,23 @@ static int hypervisor_is_64bit(xc_interf
static void cpuid(const unsigned int *input, unsigned int *regs)
{
unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
- asm (
#ifdef __i386__
+ /* Use the stack to avoid reg constraint failures with some gcc flags */
+ asm (
"push %%ebx; push %%edx\n\t"
-#else
- "push %%rbx; push %%rdx\n\t"
-#endif
"cpuid\n\t"
"mov %%ebx,4(%4)\n\t"
"mov %%edx,12(%4)\n\t"
-#ifdef __i386__
"pop %%edx; pop %%ebx\n\t"
-#else
- "pop %%rdx; pop %%rbx\n\t"
-#endif
: "=a" (regs[0]), "=c" (regs[2])
: "0" (input[0]), "1" (count), "S" (regs)
: "memory" );
+#else
+ asm (
+ "cpuid"
+ : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+ : "0" (input[0]), "2" (count) );
+#endif
}
/* Get the manufacturer brand name of the host processor. */
--- a/tools/misc/xen-detect.c
+++ b/tools/misc/xen-detect.c
@@ -35,18 +35,21 @@
static void cpuid(uint32_t idx, uint32_t *regs, int pv_context)
{
- asm volatile (
#ifdef __i386__
-#define R(x) "%%e"#x"x"
-#else
-#define R(x) "%%r"#x"x"
-#endif
- "push "R(a)"; push "R(b)"; push "R(c)"; push "R(d)"\n\t"
+ /* Use the stack to avoid reg constraint failures with some gcc flags */
+ asm volatile (
+ "push %%eax; push %%ebx; push %%ecx; push %%edx\n\t"
"test %1,%1 ; jz 1f ; ud2a ; .ascii \"xen\" ; 1: cpuid\n\t"
"mov %%eax,(%2); mov %%ebx,4(%2)\n\t"
"mov %%ecx,8(%2); mov %%edx,12(%2)\n\t"
- "pop "R(d)"; pop "R(c)"; pop "R(b)"; pop "R(a)"\n\t"
+ "pop %%edx; pop %%ecx; pop %%ebx; pop %%eax\n\t"
: : "a" (idx), "c" (pv_context), "S" (regs) : "memory" );
+#else
+ asm volatile (
+ "test %5,%5 ; jz 1f ; ud2a ; .ascii \"xen\" ; 1: cpuid\n\t"
+ : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+ : "0" (idx), "1" (pv_context), "2" (0) );
+#endif
}
static int check_for_xen(int pv_context)
++++++ 24357-firmware-no-_PS0-_PS3.patch ++++++
References: bnc#711219
# HG changeset patch
# User Xudong Hao <xudong.hao@xxxxxxxxx>
# Date 1323113706 0
# Node ID 832fa3f3543298a7125cd5f996d1e28dd7ba47b1
# Parent 60ea36c0512b779f291bb6c007e1f05c16054ec2
tools/firmware: remove "_PS0/3" Method
Do not expose the ACPI power management "_PS0/3" Method to guest
firmware. According to section 3.4 of the APCI specification 4.0, PCI
device control the device power through its own specification but not
through APCI.
Qemu pushes "_PS0/3" to guest will cause a mess between ACPI PM and
PCI PM as a result of incorrect ACPI table shipped with the guest
BIOS, it may cause a failure of PCI device PM state transition(from
PCI_UNKNOWN to PCI_D0).
Signed-off-by: Xudong Hao <xudong.hao@xxxxxxxxx>
Signed-off-by: Haitao Shan <haitao.shan@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/firmware/hvmloader/acpi/mk_dsdt.c
+++ b/tools/firmware/hvmloader/acpi/mk_dsdt.c
@@ -251,8 +251,6 @@ int main(int argc, char **argv)
* the ACPI event:
* _EJ0: eject a device
* _STA: return a device's status, e.g. enabled or removed
- * Other methods are optional:
- * _PS0/3: put them here for debug purpose
*
* Eject button would generate a general-purpose event, then the
* control method for this event uses Notify() to inform OSPM which
@@ -271,14 +269,6 @@ int main(int argc, char **argv)
stmt("Name", "_ADR, 0x%08x", ((slot & ~7) << 13) | (slot & 7));
/* _SUN == dev */
stmt("Name", "_SUN, 0x%08x", slot >> 3);
- push_block("Method", "_PS0, 0");
- stmt("Store", "0x%02x, \\_GPE.DPT1", slot);
- stmt("Store", "0x80, \\_GPE.DPT2");
- pop_block();
- push_block("Method", "_PS3, 0");
- stmt("Store", "0x%02x, \\_GPE.DPT1", slot);
- stmt("Store", "0x83, \\_GPE.DPT2");
- pop_block();
push_block("Method", "_EJ0, 1");
stmt("Store", "0x%02x, \\_GPE.DPT1", slot);
stmt("Store", "0x88, \\_GPE.DPT2");
++++++ 24358-kexec-compat-overflow.patch ++++++
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
# Date 1323114166 0
# Node ID 9961a6d5356a57685b06f65133c6ade5041e3356
# Parent 832fa3f3543298a7125cd5f996d1e28dd7ba47b1
KEXEC: fix kexec_get_range_compat to fail vocally.
Fail with -ERANGE rather than silently truncating 64bit values (a
physical address and size) into 32bit integers for dom0 to consume.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Simplify the bitwise arithmetic a bit.
Signed-off-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -395,6 +395,10 @@ static int kexec_get_range_compat(XEN_GU
ret = kexec_get_range_internal(&range);
+ /* Dont silently truncate physical addresses or sizes. */
+ if ( (range.start | range.size) & ~(unsigned long)(~0u) )
+ return -ERANGE;
+
if ( ret == 0 ) {
XLAT_kexec_range(&compat_range, &range);
if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) )
++++++ 24359-x86-domU-features.patch ++++++
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1323170838 0
# Node ID a0befa32e927cc147aaee9bce42c51f53580a875
# Parent 9961a6d5356a57685b06f65133c6ade5041e3356
X86: expose Intel new features to pv/hvm
Intel recently release some new features, including
FMA/AVX2/BMI1/BMI2/LZCNT/MOVBE.
Refer to http://software.intel.com/file/36945
This patch expose these new features to pv and hvm.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -74,6 +74,7 @@
#define X86_FEATURE_TM2 8 /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 9 /* Supplemental Streaming SIMD Exts-3 */
#define X86_FEATURE_CID 10 /* Context ID */
+#define X86_FEATURE_FMA 12 /* Fused Multiply Add */
#define X86_FEATURE_CX16 13 /* CMPXCHG16B */
#define X86_FEATURE_XTPR 14 /* Send Task Priority Messages */
#define X86_FEATURE_PDCM 15 /* Perf/Debug Capability MSR */
@@ -81,6 +82,7 @@
#define X86_FEATURE_SSE4_1 19 /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 20 /* Streaming SIMD Extensions 4.2 */
#define X86_FEATURE_X2APIC 21 /* x2APIC */
+#define X86_FEATURE_MOVBE 22 /* movbe instruction */
#define X86_FEATURE_POPCNT 23 /* POPCNT instruction */
#define X86_FEATURE_TSC_DEADLINE 24 /* "tdt" TSC Deadline Timer */
#define X86_FEATURE_AES 25 /* AES acceleration instructions */
@@ -125,7 +127,10 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
#define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_BMI1 3 /* 1st group bit manipulation extensions */
+#define X86_FEATURE_AVX2 5 /* AVX2 instructions */
#define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 8 /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */
#endif /* __LIBXC_CPUFEATURE_H */
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -148,7 +148,8 @@ static void intel_xc_cpuid_policy(
int is_64bit = hypervisor_is_64bit(xch) && is_pae;
/* Only a few features are advertised in Intel's 0x80000001. */
- regs[2] &= (is_64bit ? bitmaskof(X86_FEATURE_LAHF_LM) : 0);
+ regs[2] &= (is_64bit ? bitmaskof(X86_FEATURE_LAHF_LM) : 0) |
+ bitmaskof(X86_FEATURE_ABM);
regs[3] &= ((is_pae ? bitmaskof(X86_FEATURE_NX) : 0) |
(is_64bit ? bitmaskof(X86_FEATURE_LM) : 0) |
(is_64bit ? bitmaskof(X86_FEATURE_SYSCALL) : 0) |
@@ -256,9 +257,11 @@ static void xc_cpuid_hvm_policy(
regs[2] &= (bitmaskof(X86_FEATURE_XMM3) |
bitmaskof(X86_FEATURE_PCLMULQDQ) |
bitmaskof(X86_FEATURE_SSSE3) |
+ bitmaskof(X86_FEATURE_FMA) |
bitmaskof(X86_FEATURE_CX16) |
bitmaskof(X86_FEATURE_SSE4_1) |
bitmaskof(X86_FEATURE_SSE4_2) |
+ bitmaskof(X86_FEATURE_MOVBE) |
bitmaskof(X86_FEATURE_POPCNT) |
bitmaskof(X86_FEATURE_AES) |
bitmaskof(X86_FEATURE_F16C) |
@@ -303,7 +306,10 @@ static void xc_cpuid_hvm_policy(
case 0x00000007: /* Intel-defined CPU features */
if ( input[1] == 0 ) {
- regs[1] &= (bitmaskof(X86_FEATURE_SMEP) |
+ regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+ bitmaskof(X86_FEATURE_AVX2) |
+ bitmaskof(X86_FEATURE_SMEP) |
+ bitmaskof(X86_FEATURE_BMI2) |
bitmaskof(X86_FEATURE_ERMS) |
bitmaskof(X86_FEATURE_FSGSBASE));
} else
@@ -427,8 +433,11 @@ static void xc_cpuid_pv_policy(
case 7:
if ( input[1] == 0 )
- regs[1] &= (bitmaskof(X86_FEATURE_FSGSBASE) |
- bitmaskof(X86_FEATURE_ERMS));
+ regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+ bitmaskof(X86_FEATURE_AVX2) |
+ bitmaskof(X86_FEATURE_BMI2) |
+ bitmaskof(X86_FEATURE_ERMS) |
+ bitmaskof(X86_FEATURE_FSGSBASE));
else
regs[1] = 0;
regs[0] = regs[2] = regs[3] = 0;
++++++ 24360-x86-pv-domU-no-PCID.patch ++++++
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1323170884 0
# Node ID d313582d4fa2157332f1d50e599aebca36c41b3b
# Parent a0befa32e927cc147aaee9bce42c51f53580a875
X86: Disable PCID/INVPCID for pv
This patch disable PCID/INVPCID for pv.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -78,6 +78,7 @@
#define X86_FEATURE_CX16 13 /* CMPXCHG16B */
#define X86_FEATURE_XTPR 14 /* Send Task Priority Messages */
#define X86_FEATURE_PDCM 15 /* Perf/Debug Capability MSR */
+#define X86_FEATURE_PCID 17 /* Process Context ID */
#define X86_FEATURE_DCA 18 /* Direct Cache Access */
#define X86_FEATURE_SSE4_1 19 /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 20 /* Streaming SIMD Extensions 4.2 */
@@ -132,5 +133,6 @@
#define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 8 /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID 10 /* Invalidate Process Context ID */
#endif /* __LIBXC_CPUFEATURE_H */
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -427,6 +427,7 @@ static void xc_cpuid_pv_policy(
}
clear_bit(X86_FEATURE_XTPR, regs[2]);
clear_bit(X86_FEATURE_PDCM, regs[2]);
+ clear_bit(X86_FEATURE_PCID, regs[2]);
clear_bit(X86_FEATURE_DCA, regs[2]);
set_bit(X86_FEATURE_HYPERVISOR, regs[2]);
break;
++++++ 24389-amd-fam10-gart-tlb-walk-err.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1323765911 -3600
# Node ID 868d82faf6511de3b3edce18cc6a9e1c938f0b8f
# Parent 7ca56cca09ade16645fb4806be2c5b2b0bc3332b
x86, amd: Disable GartTlbWlkErr when BIOS forgets it
This patch disables GartTlbWlk errors on AMD Fam10h CPUs if the BIOS
forgets to do is (or is just too old). Letting these errors enabled
can cause a sync-flood on the CPU causing a reboot.
The AMD BKDG recommends disabling GART TLB Wlk Error completely.
Based on a Linux patch from Joerg Roedel <joerg.roedel@xxxxxxx>; see e.g.
https://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=patch;h=5bbc097d890409d8eff4e3f1d26f11a9d6b7c07e
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/cpu/mcheck/amd_f10.c
+++ b/xen/arch/x86/cpu/mcheck/amd_f10.c
@@ -46,6 +46,7 @@
#include <asm/msr.h>
#include "mce.h"
+#include "mce_quirks.h"
#include "x86_mca.h"
@@ -91,9 +92,14 @@ amd_f10_handler(struct mc_info *mi, uint
/* AMD Family10 machine check */
enum mcheck_type amd_f10_mcheck_init(struct cpuinfo_x86 *c)
{
+ enum mcequirk_amd_flags quirkflag = mcequirk_lookup_amd_quirkdata(c);
+
if (amd_k8_mcheck_init(c) == mcheck_none)
return mcheck_none;
+ if (quirkflag == MCEQUIRK_F10_GART)
+ mcequirk_amd_apply(quirkflag);
+
x86_mce_callback_register(amd_f10_handler);
return mcheck_amd_famXX;
--- a/xen/arch/x86/cpu/mcheck/mce_amd_quirks.c
+++ b/xen/arch/x86/cpu/mcheck/mce_amd_quirks.c
@@ -29,6 +29,8 @@ static const struct mce_quirkdata mce_am
MCEQUIRK_K7_BANK0 },
{ 0xf /* cpu family */, ANY /* all models */, ANY /* all steppings */,
MCEQUIRK_K8_GART },
+ { 0x10 /* cpu family */, ANY /* all models */, ANY /* all steppings */,
+ MCEQUIRK_F10_GART },
};
enum mcequirk_amd_flags
@@ -54,6 +56,8 @@ mcequirk_lookup_amd_quirkdata(struct cpu
int mcequirk_amd_apply(enum mcequirk_amd_flags flags)
{
+ u64 val;
+
switch (flags) {
case MCEQUIRK_K7_BANK0:
return 1; /* first bank */
@@ -67,6 +71,10 @@ int mcequirk_amd_apply(enum mcequirk_amd
wrmsrl(MSR_IA32_MC4_CTL, ~(1ULL << 10));
wrmsrl(MSR_IA32_MC4_STATUS, 0ULL);
break;
+ case MCEQUIRK_F10_GART:
+ if (rdmsr_safe(MSR_AMD64_MCx_MASK(4), val) == 0)
+ wrmsr_safe(MSR_AMD64_MCx_MASK(4), val | (1 << 10));
+ break;
}
return 0;
--- a/xen/arch/x86/cpu/mcheck/mce_quirks.h
+++ b/xen/arch/x86/cpu/mcheck/mce_quirks.h
@@ -33,8 +33,9 @@ struct mce_quirkdata {
*/
enum mcequirk_amd_flags {
- MCEQUIRK_K7_BANK0 = 0x1,
- MCEQUIRK_K8_GART = 0x2,
+ MCEQUIRK_K7_BANK0 = 1,
+ MCEQUIRK_K8_GART,
+ MCEQUIRK_F10_GART
};
enum mcequirk_intel_flags {
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -98,6 +98,8 @@
#define CMCI_EN (1UL<<30)
#define CMCI_THRESHOLD_MASK 0x7FFF
+#define MSR_AMD64_MC0_MASK 0xc0010044
+
#define MSR_IA32_MC1_CTL 0x00000404
#define MSR_IA32_MC1_CTL2 0x00000281
#define MSR_IA32_MC1_STATUS 0x00000405
@@ -151,6 +153,8 @@
#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
+#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
+
#define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186
++++++ 24391-x86-pcpu-version.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1323766131 -3600
# Node ID 3f4ffde189f228d88e534865023fd795f77f0d05
# Parent 77528dbced3ea74901be6b1aeddedda22bfdaf63
x86: add platform hypercall to retrieve pCPU-s' family, model, and stepping
With the recent hotplug changes to the Xen part of the microcode
loading, this allows the kernel driver to avoid unnecessary calls into
the hypervisor during pCPU hot-enabling: Knowing that the hypervisor
retains the data for already booted CPUs, only data for CPUs with a
different signature needs to be passed down. Since the microcode
loading code can be pretty verbose, avoiding to invoke it can make the
log much easier to look at in case of problems.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -469,6 +469,42 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
}
break;
+ case XENPF_get_cpu_version:
+ {
+ struct xenpf_pcpu_version *ver = &op->u.pcpu_version;
+
+ if ( !get_cpu_maps() )
+ {
+ ret = -EBUSY;
+ break;
+ }
+
+ if ( (ver->xen_cpuid >= NR_CPUS) || !cpu_online(ver->xen_cpuid) )
+ {
+ memset(ver->vendor_id, 0, sizeof(ver->vendor_id));
+ ver->family = 0;
+ ver->model = 0;
+ ver->stepping = 0;
+ }
+ else
+ {
+ const struct cpuinfo_x86 *c = &cpu_data[ver->xen_cpuid];
+
+ memcpy(ver->vendor_id, c->x86_vendor_id, sizeof(ver->vendor_id));
+ ver->family = c->x86;
+ ver->model = c->x86_model;
+ ver->stepping = c->x86_mask;
+ }
+
+ ver->max_present = cpumask_last(&cpu_present_map);
+
+ put_cpu_maps();
+
+ if ( copy_field_to_guest(u_xenpf_op, op, u.pcpu_version) )
+ ret = -EFAULT;
+ }
+ break;
+
case XENPF_cpu_online:
{
int cpu = op->u.cpu_ol.cpuid;
--- a/xen/arch/x86/x86_64/platform_hypercall.c
+++ b/xen/arch/x86/x86_64/platform_hypercall.c
@@ -3,7 +3,7 @@
*/
#include <xen/config.h>
-#include <xen/types.h>
+#include <xen/lib.h>
#include <compat/platform.h>
DEFINE_XEN_GUEST_HANDLE(compat_platform_op_t);
@@ -26,8 +26,13 @@ DEFINE_XEN_GUEST_HANDLE(compat_platform_
#define xen_processor_power_t compat_processor_power_t
#define set_cx_pminfo compat_set_cx_pminfo
-#define xenpf_pcpuinfo compat_pf_pcpuinfo
-#define xenpf_pcpuinfo_t compat_pf_pcpuinfo_t
+#define xen_pf_pcpuinfo xenpf_pcpuinfo
+CHECK_pf_pcpuinfo;
+#undef xen_pf_pcpuinfo
+
+#define xen_pf_pcpu_version xenpf_pcpu_version
+CHECK_pf_pcpu_version;
+#undef xen_pf_pcpu_version
#define xenpf_enter_acpi_sleep compat_pf_enter_acpi_sleep
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -425,6 +425,21 @@ struct xenpf_pcpuinfo {
typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t);
+#define XENPF_get_cpu_version 48
+struct xenpf_pcpu_version {
+ /* IN */
+ uint32_t xen_cpuid;
+ /* OUT */
+ /* The maxium cpu_id that is present */
+ uint32_t max_present;
+ char vendor_id[12];
+ uint32_t family;
+ uint32_t model;
+ uint32_t stepping;
+};
+typedef struct xenpf_pcpu_version xenpf_pcpu_version_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t);
+
#define XENPF_cpu_online 56
#define XENPF_cpu_offline 57
struct xenpf_cpu_ol
@@ -468,6 +483,7 @@ struct xen_platform_op {
struct xenpf_getidletime getidletime;
struct xenpf_set_processor_pminfo set_pminfo;
struct xenpf_pcpuinfo pcpu_info;
+ struct xenpf_pcpu_version pcpu_version;
struct xenpf_cpu_ol cpu_ol;
struct xenpf_cpu_hotadd cpu_add;
struct xenpf_mem_hotadd mem_add;
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -61,6 +61,17 @@
! memory_reservation memory.h
! pod_target memory.h
? physdev_pci_mmcfg_reserved physdev.h
+! pct_register platform.h
+! power_register platform.h
+? processor_csd platform.h
+! processor_cx platform.h
+! processor_flags platform.h
+! processor_performance platform.h
+! processor_power platform.h
+? processor_px platform.h
+! psd_package platform.h
+? xenpf_pcpuinfo platform.h
+? xenpf_pcpu_version platform.h
! sched_poll sched.h
? sched_remote_shutdown sched.h
? sched_shutdown sched.h
@@ -73,12 +84,3 @@
! vcpu_set_singleshot_timer vcpu.h
? xenoprof_init xenoprof.h
? xenoprof_passive xenoprof.h
-! power_register platform.h
-? processor_csd platform.h
-! processor_cx platform.h
-! processor_flags platform.h
-! processor_power platform.h
-! pct_register platform.h
-? processor_px platform.h
-! psd_package platform.h
-! processor_performance platform.h
++++++ 24411-x86-ucode-AMD-Fam15.patch ++++++
References: bnc#736824
# HG changeset patch
# User Christoph Egger <Christoph.Egger@xxxxxxx>
# Date 1323943209 -3600
# Node ID ca5f588bd203c9207e0988fcc80f43d83eed5420
# Parent 25f8952313ae683f41b634163f62651185d7be38
x86/ucode: fix for AMD Fam15 CPUs
Remove hardcoded maximum size a microcode patch can have. This is
dynamic now.
The microcode patch for family15h can be larger than 2048 bytes and
gets silently truncated.
Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx>
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
--- 2011-12-14.orig/xen/arch/x86/microcode_amd.c 2011-12-15
14:55:15.000000000 +0100
+++ 2011-12-14/xen/arch/x86/microcode_amd.c 2011-12-15 14:59:47.000000000
+0100
@@ -27,18 +27,10 @@
#include <asm/processor.h>
#include <asm/microcode.h>
-#define pr_debug(x...) ((void)0)
-
#define UCODE_MAGIC 0x00414d44
#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
#define UCODE_UCODE_TYPE 0x00000001
-#define UCODE_MAX_SIZE (2048)
-#define DEFAULT_UCODE_DATASIZE (896)
-#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd))
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
-#define DWSIZE (sizeof(uint32_t))
-
/* serialize access to the physical write */
static DEFINE_SPINLOCK(microcode_update_lock);
@@ -99,7 +91,7 @@ static int microcode_fits(void *mc, int
}
if ( mc_header->patch_id <= uci->cpu_sig.rev )
- return -EINVAL;
+ return 0;
printk(KERN_DEBUG "microcode: CPU%d found a matching microcode "
"update with version 0x%x (current=0x%x)\n",
@@ -147,8 +139,12 @@ static int apply_microcode(int cpu)
return 0;
}
-static int get_next_ucode_from_buffer_amd(void *mc, const void *buf,
- size_t size, unsigned long *offset)
+static int get_next_ucode_from_buffer_amd(
+ void **mc,
+ size_t *mc_size,
+ const void *buf,
+ size_t size,
+ unsigned long *offset)
{
struct microcode_header_amd *mc_header;
size_t total_size;
@@ -181,8 +177,17 @@ static int get_next_ucode_from_buffer_am
return -EINVAL;
}
- memset(mc, 0, UCODE_MAX_SIZE);
- memcpy(mc, (const void *)(&bufp[off + 8]), total_size);
+ if ( *mc_size < total_size )
+ {
+ xfree(*mc);
+ *mc = xmalloc_bytes(total_size);
+ if ( !*mc )
+ return -ENOMEM;
+ *mc_size = total_size;
+ }
+ else if ( *mc_size > total_size )
+ memset(*mc + total_size, 0, *mc_size - total_size);
+ memcpy(*mc, mc_header, total_size);
*offset = off + total_size + 8;
@@ -236,10 +241,10 @@ static int cpu_request_microcode(int cpu
{
const uint32_t *buf_pos;
unsigned long offset = 0;
- int error = 0;
- int ret;
+ int error;
struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu);
void *mc;
+ size_t mc_size;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
@@ -260,7 +265,9 @@ static int cpu_request_microcode(int cpu
return -EINVAL;
}
- mc = xmalloc_bytes(UCODE_MAX_SIZE);
+ /* Size of 1st microcode patch in bytes */
+ mc_size = buf_pos[offset / sizeof(*buf_pos) + 1];
+ mc = xmalloc_bytes(mc_size);
if ( mc == NULL )
{
printk(KERN_ERR "microcode: error! "
@@ -276,24 +284,33 @@ static int cpu_request_microcode(int cpu
* It's possible the data file has multiple matching ucode,
* lets keep searching till the latest version
*/
- while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) ==
0)
+ while ( (error = get_next_ucode_from_buffer_amd(&mc, &mc_size, buf, size,
+ &offset)) == 0 )
{
+ uci->mc.mc_amd = mc;
+
error = microcode_fits(mc, cpu);
if (error <= 0)
continue;
error = apply_microcode(cpu);
if (error == 0)
+ {
+ error = 1;
break;
+ }
}
/* On success keep the microcode patch for
* re-apply on resume.
*/
- if (error) {
+ if ( error <= 0 )
+ {
xfree(mc);
mc = NULL;
}
+ else
+ error = 0;
uci->mc.mc_amd = mc;
out:
++++++ 24412-x86-AMD-errata-model-shift.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1323955725 -3600
# Node ID 99caac2e35df41cbece606f663cb5570a62613c3
# Parent ca5f588bd203c9207e0988fcc80f43d83eed5420
x86/AMD: use correct shift count when merging model and stepping
... for legacy errata matching.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -216,7 +216,7 @@ int cpu_has_amd_erratum(const struct cpu
}
/* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 8) | cpu->x86_mask;
+ ms = (cpu->x86_model << 4) | cpu->x86_mask;
while ((range = va_arg(ap, int))) {
if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
(ms >= AMD_MODEL_RANGE_START(range)) &&
++++++ 24417-amd-erratum-573.patch ++++++
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1324046740 -3600
# Node ID 1452fb248cd513832cfbbd1100b9b72a0dde7ea6
# Parent 01c8b27e3d7d4ad2b469be9922bb04b5eb0195e8
x86/emulator: workaround for AMD erratum 573
The only cases where we might end up emulating fsincos (as any other
x87 operations without memory operands) are
- when a HVM guest is in real mode (not applicable on AMD)
- between two half page table updates in PAE mode (unlikely, and not
doing the emulation here does affect only performance, not
correctness)
- when a guest maliciously (or erroneously) modifies an (MMIO or page
table update) instruction under emulation (unspecified behavior)
Hence, in order to avoid the erratum to cause harm to the entire host,
don't emulate fsincos on the affected AMD CPU families.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -3,5 +3,7 @@
#include <string.h>
#include <public/xen.h>
+#define cpu_has_amd_erratum(nr) 0
+
#include "x86_emulate/x86_emulate.h"
#include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -10,8 +10,15 @@
*/
#include <asm/x86_emulate.h>
+#include <asm/processor.h> /* current_cpu_info */
+#include <asm/amd.h> /* cpu_has_amd_erratum() */
/* Avoid namespace pollution. */
#undef cmpxchg
+#undef cpuid
+#undef wbinvd
+
+#define cpu_has_amd_erratum(nr) \
+ cpu_has_amd_erratum(¤t_cpu_data, AMD_ERRATUM_##nr)
#include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2621,6 +2621,9 @@ x86_emulate(
case 0xd9: /* FPU 0xd9 */
switch ( modrm )
{
+ case 0xfb: /* fsincos */
+ fail_if(cpu_has_amd_erratum(573));
+ /* fall through */
case 0xc0 ... 0xc7: /* fld %stN */
case 0xc8 ... 0xcf: /* fxch %stN */
case 0xd0: /* fnop */
@@ -2646,7 +2649,6 @@ x86_emulate(
case 0xf8: /* fprem */
case 0xf9: /* fyl2xp1 */
case 0xfa: /* fsqrt */
- case 0xfb: /* fsincos */
case 0xfc: /* frndint */
case 0xfd: /* fscale */
case 0xfe: /* fsin */
--- a/xen/include/asm-x86/amd.h
+++ b/xen/include/asm-x86/amd.h
@@ -138,6 +138,12 @@
AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), \
AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf))
+#define AMD_ERRATUM_573
\
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x10, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf))
+
struct cpuinfo_x86;
int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
++++++ 24429-mceinj-tool.patch ++++++
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1324219200 0
# Node ID 9587ccc2ae3192fd5625a87fa58e840377471867
# Parent 5b4b7e565ab82b06940889f2be7e30042b2881fc
X86-MCE: fix a bug of xen-mceinj tool
Fix a bug of xen-mceinj tool which used to test mce by software way.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
--- a/tools/tests/mce-test/tools/xen-mceinj.c
+++ b/tools/tests/mce-test/tools/xen-mceinj.c
@@ -134,8 +134,12 @@ static int mca_cpuinfo(xc_interface *xc_
{
struct xen_mc mc;
+ memset(&mc, 0, sizeof(struct xen_mc));
+
mc.cmd = XEN_MC_physcpuinfo;
- if (xc_mca_op(xc_handle, &mc))
+ mc.interface_version = XEN_MCA_INTERFACE_VERSION;
+
+ if (!xc_mca_op(xc_handle, &mc))
return mc.u.mc_physcpuinfo.ncpus;
else
return 0;
++++++ 24447-x86-TXT-INIT-SIPI-delay.patch ++++++
# HG changeset patch
# User Gang Wei <gang.wei@xxxxxxxxx>
# Date 1325153274 0
# Node ID a7b2610b8e5c9a15b1f5de9a3eabf7f19d0b4199
# Parent 2863b2f43a3bc9268885379d6fd55ed325b8c0a2
X86: Add a delay between INIT & SIPIs for tboot AP bring-up in X2APIC case
Without this delay, Xen could not bring APs up while working with
TXT/tboot, because tboot needs some time in APs to handle INIT before
becoming ready for receiving SIPIs (this delay was removed as part of
c/s 23724 by Tim Deegan).
Signed-off-by: Gang Wei <gang.wei@xxxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>
Committed-by: Tim Deegan <tim@xxxxxxx>
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -42,6 +42,7 @@
#include <asm/msr.h>
#include <asm/mtrr.h>
#include <asm/time.h>
+#include <asm/tboot.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
@@ -473,6 +474,18 @@ static int wakeup_secondary_cpu(int phys
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
} while ( send_status && (timeout++ < 1000) );
}
+ else if ( tboot_in_measured_env() )
+ {
+ /*
+ * With tboot AP is actually spinning in a mini-guest before
+ * receiving INIT. Upon receiving INIT ipi, AP need time to VMExit,
+ * update VMCS to tracking SIPIs and VMResume.
+ *
+ * While AP is in root mode handling the INIT the CPU will drop
+ * any SIPIs
+ */
+ udelay(10);
+ }
/*
* Should we send STARTUP IPIs ?
++++++ 24448-x86-pt-irq-leak.patch ++++++
References: bnc#735806
# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1325492779 -3600
# Node ID 3a22ed3ec534799b3cab55b0dc0a7380e701ecbe
# Parent a7b2610b8e5c9a15b1f5de9a3eabf7f19d0b4199
x86/passthrough: don't leak guest IRQs
As unmap_domain_pirq_emuirq() fails on a never mapped pIRQ, it must not
be called for the non-emu-IRQ case (to prevent the entire unmap
operation failing).
Based on a suggestion from Stefano.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Tested-by: Yongjie Ren <yongjie.ren@xxxxxxxxx>
Acked-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -228,7 +228,8 @@ static int physdev_unmap_pirq(struct phy
if ( is_hvm_domain(d) )
{
spin_lock(&d->event_lock);
- ret = unmap_domain_pirq_emuirq(d, unmap->pirq);
+ if ( domain_pirq_to_emuirq(d, unmap->pirq) != IRQ_UNBOUND )
+ ret = unmap_domain_pirq_emuirq(d, unmap->pirq);
spin_unlock(&d->event_lock);
if ( unmap->domid == DOMID_SELF || ret )
goto free_domain;
++++++ 24478-libxl_add_feature_flag_to_xenstore_for_XS_RESET_WATCHES.patch
++++++
changeset: 24478:ef99b8571a6f
user: Olaf Hering <olaf@xxxxxxxxx>
date: Thu Jan 05 19:40:40 2012 +0100
files: tools/libxl/libxl_create.c tools/python/xen/xend/XendDomainInfo.py
description:
libxl: add feature flag to xenstore for XS_RESET_WATCHES
Tell guest about availibilty of xenstoreds XS_RESET_WATCHES function.
Guests can not issue this command unconditionally because some buggy
toolstacks (such as EC2) do not ignore unknown commands properly.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Committed-by: Ian Jackson <ian.jackson.citrix.com>
Acked-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
tools/libxl/libxl_create.c | 1 +
tools/python/xen/xend/XendDomainInfo.py | 1 +
2 files changed, 2 insertions(+)
Index: xen-4.1.2-testing/tools/libxl/libxl_create.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxl/libxl_create.c
+++ xen-4.1.2-testing/tools/libxl/libxl_create.c
@@ -382,6 +382,7 @@ retry_transaction:
libxl__xs_writev(&gc, t, libxl__sprintf(&gc, "%s/platform", dom_path),
info->platformdata);
xs_write(ctx->xsh, t, libxl__sprintf(&gc,
"%s/control/platform-feature-multiprocessor-suspend", dom_path), "1", 1);
+ xs_write(ctx->xsh, t, libxl__sprintf(&gc,
"%s/control/platform-feature-xs_reset_watches", dom_path), "1", 1);
if (!xs_transaction_end(ctx->xsh, t, 0)) {
if (errno == EAGAIN) {
t = 0;
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1770,6 +1770,7 @@ class XendDomainInfo:
f('store/port', self.store_port)
f('store/ring-ref', self.store_mfn)
+ f('control/platform-feature-xs_reset_watches', True)
if arch.type == "x86":
f('control/platform-feature-multiprocessor-suspend', True)
++++++ 2XXXX-vif-bridge.patch ++++++
# HG changeset patch
# User Jim Fehlig <jfehlig@xxxxxxxx>
# Date 1319581952 21600
# Node ID 74da2a3a1db1476d627f42e4a99e9e720cc6774d
# Parent 6c583d35d76dda2236c81d9437ff9d57ab02c006
Prevent vif-bridge from adding user-created tap interfaces to a bridge
Exit vif-bridge script if there is no device info in xenstore, preventing
it from adding user-created taps to bridges.
Signed-off-by: Jim Fehlig <jfehlig@xxxxxxxx>
diff -r 6c583d35d76d -r 74da2a3a1db1 tools/hotplug/Linux/vif-bridge
--- a/tools/hotplug/Linux/vif-bridge Thu Oct 20 15:36:01 2011 +0100
+++ b/tools/hotplug/Linux/vif-bridge Tue Oct 25 16:32:32 2011 -0600
@@ -31,6 +31,13 @@
dir=$(dirname "$0")
. "$dir/vif-common.sh"
+
+domu=$(xenstore_read_default "$XENBUS_PATH/domain" "")
+if [ -z "$domu" ]
+then
+ log debug "No device details in $XENBUS_PATH, exiting."
+ exit 0
+fi
bridge=${bridge:-}
bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge")
++++++ 32on64-extra-mem.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2932,7 +2932,7 @@ class XendDomainInfo:
self.guest_bitsize = self.image.getBitSize()
# Make sure there's enough RAM available for the domain
- balloon.free(memory + shadow + vtd_mem, self)
+ balloon.free(memory + shadow + vtd_mem + 512, self)
# Set up the shadow memory
shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
++++++ README.SuSE ++++++
README for the Xen packages
===========================
This file contains SUSE-specific instructions and suggestions for using Xen.
For more in-depth documentation of using Xen on SUSE, consult the
virtualization chapter in the SLES or SUSE Linux manual, or read up-to-date
virtualization information, including a list of known issues, at
http://www.novell.com/documentation/vmserver/.
For more complete documentation on Xen itself, please install one of the
xen-doc-* packages and read the documentation installed into
/usr/share/doc/packages/xen/.
About
-----
Xen allows you to run multiple virtual machines on a single physical machine.
See the Xen homepage for more information:
http://www.cl.cam.ac.uk/research/srg/netos/xen/
If you want to use Xen, you need to install the Xen hypervisor and a number of
supporting packages. During the initial SUSE installation (or when installing
from YaST) check-mark the "Xen Virtual Machine Host Server" pattern. If,
instead, you wish to install Xen manually later, click on the "Install
Hypervisor and Tools" icon in YaST.
If you want to install and manage VMs graphically, be sure to install a
graphical desktop environment like KDE or GNOME. The following optional
packages are needed to manage VMs graphically. Note that "Install Hypervisor
and Tools" installs all the packages below:
vm-install (Optional, to install VMs)
virt-manager (Optional, to manage VMs graphically)
virt-viewer (Optional, to view VMs outside virt-manager)
Additional packages:
nbd-client (Optional, to access virtual disks stored on NBD servers)
open-iscsi (Optional, to access virtual disks stored on iSCSI targets)
You then need to reboot your machine. Instead of booting a normal Linux
kernel, you will boot the Xen hypervisor and a slightly changed Linux kernel.
This Linux kernel runs in the first virtual machine and will drive most of
your hardware.
This approach is called paravirtualization, since it is a partial
virtualization (the Linux kernel needs to be changed slightly, to make the
virtualization easier). It results in very good performance (consult
http://www.cl.cam.ac.uk/research/srg/netos/xen/performance.html) but has the
downside of unchanged operating systems not being supported. However, new
hardware features (e.g., Intel VT and AMD-V) are overcoming this limitation.
Terminology
-----------
The Xen open-source community has a number of terms that you should be
familiar with.
A "domain" is Xen's term for a virtual machine.
"Domain 0" is the first virtual machine. It can control all other virtual
machines. It also (usually) controls the physical hardware. A kernel used in
domain 0 may sometimes be referred to as a dom0 kernel.
"Domain U" is any virtual machine other than domain 0. The "U" indicates it
is unprivileged (that is, it cannot control other domains). A kernel used in
an unprivileged domain may be referred to as a domU kernel.
Novell documentation will use the more industry-standard term "virtual
machine", or "VM", rather than "domain" where possible. And to that end,
domain 0 will be called the "virtual machine server", since it essentially the
server on which the other VMs run. All other domains are simply "virtual
machines".
The acronym "HVM" refers to a hardware-assisted virtual machine. These are
VMs that have not been modified (e.g., Windows) and therefore need hardware
support such as Intel VT or AMD-V to run on Xen.
Kernels
-------
Xen supports two kinds of kernels: A privileged kernel (which boots the
machine, controls other VMs, and usually controls all your physical hardware)
and unprivileged kernels (which can't control other VMs, and usually don't need
drivers for physical hardware). The privileged kernel boots first (as the VM
server); an unprivileged kernel is used in all subsequent VMs.
The VM server takes control of the boot process after Xen has initialized the
CPU and the memory. This VM contains a privileged kernel and all the hardware
drivers.
For the other virtual machines, you usually don't need the hardware drivers.
(It is possible to hide a PCI device from the VM server and re-assign it to
another VM for direct access, but that is a more advanced topic.) Instead you
use virtual network and block device drivers in the unprivileged VMs to access
the physical network and block drivers in the VM server.
For simplicity, SUSE ships a single Xen-enabled Linux kernel, rather than
separate privileged and unprivileged kernels. As most of the hardware drivers
are modules anyway, using this kernel as an unprivileged kernel has very
little extra overhead.
The kernel is contained in the kernel-xen package, which you need to install to
use Xen.
Booting
-------
If you installed Xen during the initial SUSE installation, or installed one
of the kernel-xen* packages later, a "XEN" option should exist in your Grub
bootloader. Select that to boot SUSE on top of Xen.
If you want to add additional entries, or modify the existing ones, you will
have to edit Grub yourself. All Xen entries in the Grub configuration file
(usually /boot/grub/menu.lst) look something like this:
title XEN
root (hd0,5)
kernel /xen.gz
module /vmlinuz-xen <parameters>
module /initrd-xen
Replace (hd0,5) with the partition that holds your /boot directory in
grub-speak, e.g., hda1 -> (hd0,0) and sda5 -> (hd2,4).
Normally, xen.gz requires no parameters. If you want to add parameters,
see below.
Replace "<parameters>" with the kernel parameters that you want to pass to
your kernel. These should be very similar, if not identical, to those passed
to a normal kernel that you boot on bare iron.
Once you have booted this configuration successfully, you are running Xen with
a privileged kernel on top of it.
Xen Boot Parameters
-------------------
Normally, xen.gz requires no parameters. However, in special cases (such as
debugging or a dedicated VM server) you may wish to pass it parameters.
In particular in case of problems you may want to attach a serial terminal and
direct Xen to send its output not only to the screen, but also to that
terminal. In order to do so, add "console=vga,com<n> com<n>=<baud>" (without
the quotes and with <n> replaced by the serial port number - generally 1 or 2 -
and with <baud> replaced by the baud rate the serial terminal is configured
for) to the xen.gz line.
For a more complete discussion of possible parameters, see the user
documentation in the xen-doc-* packages.
Init scripts
------------
Before you can create additional VMs (or use any other xm command) xend must
be running. This init script is part of the xen-tools package, and it is
activated at installation time. You can (de)activate it using insserv. You
can also start it manually with "rcxend start".
The deprecated xendomains script is also shipped, but disabled by default. In
SLES 10 GA (xen 3.0.2) and older, this script allowed VMs to be started and
stopped automatically when the machine starts and stops. In SLES 10 SP1 (xen
3.0.4) and newer, the proper way to start and stop VMs automatically is to set
the "on_xend_start" and "on_xend_stop" settings in the VMs configuration.
(Deprecating xendomains was necessary because xend, not the configuration file
in /etc/xen/vm, is now the authoritative source for the VM's settings.)
Consult the online documentation for more information.
Creating a VM with vm-install
-----------------------------
The vm-install program (part of the vm-install package, and accessible
through YaST's Control Center) is the recommended method to create VMs. This
program handles creating both the VM's configuration file and disk(s). It can
help install any operating system, not just SUSE.
From the command line, run "vm-install". If the DISPLAY environment variableis set and the supporting packages (python-gtk) are installed, a graphical
wizard will start. Otherwise, a text wizard will start.
Each VM needs to have its own root filesystem. The root filesystem can live
on a block device (e.g., a hard disk partition, or an LVM2 or EVMS volume) or
in a file that holds the filesystem image.
VMs can share filesystems, such as /usr or /opt, that are mounted read-only
from _all_ VMs. Never try to share a filesystem that is mounted read-write;
filesystem corruption will result. For sharing writable data between VMs, use
NFS or other networked or cluster filesystems.
When defining the virtual network adapter(s), we recommend using a static MAC
for the VM rather than allowing Xen to randomly select one each time the VM
boots. (See "Network Troubleshooting" below.) XenSource has been allocated a
range of MAC addresses with the OUI of 00-16-3E. By using MACs from this
range you can be sure they will not conflict with any physical adapters.
Once you have the VM configured, click "OK". The wizard will now create a
configuration file for the VM, and create a disk image. The disk image will
exist in /var/lib/xen/images, and a corresponding config file will exist in
/etc/xen/vm. The operating system's installation program will then run within
the VM.
When the VM shuts down (because the installation -- or at least the first
stage of it -- is done), the wizard finalizes the VM's configuration and
restarts the VM.
The creation of VMs can be automated; read the vm-install man page for more
details. The installation of an OS within the VM can be automated if the OS
supports it.
Creating a VM Manually
----------------------
If you create a VM manually (as opposed to using vm-install, which is the
recommended way), you will need to create a disk (or reuse an existing one)
and a configuration file.
If you are using a disk or disk image that is already installed with an
operating system, you'll probably need to replace its kernel with a
Xen-enabled kernel.
The kernel and ramdisk used to bootstrap the VM must match any kernel modules
that might be present in the VM's disk. It is possible to manually copy the
kernel and ramdisk from the VM's disk (for example, after updating the kernel
within that VM) to the VM server's filesystem. However, an easier (and less
error-prone) method is to use something called the "domUloader". Before a new
VM is started, this loader automatically copies the kernel and ramdisk into
the VM server's filesystem, so that it can be used to bootstrap the new VM.
See /etc/xen/examples/xmexample.domUloader for an example.
Next, make a copy of one of the /etc/xen/examples/* files, and modify it to
suit your needs. For paravirtualized VMs, start with
/etc/xen/examples/xmexample1; for fully virtualized VMs, start with
/etc/xen/examples/xmexample.hvm. You'll need to change (at very least) the
"name" and "disk" parameters.
Managing Virtual Machines
-------------------------
VMs can be managed from the command line or from virt-manager.
Before a VM can be started, xend must be informed of it. vm-install will
automatically import new VM configurations into xend. However, if you copy a
VM from another machine, or manually create a VM configuration file, you will
need to import it into xend with a command like:
xm new my-vm
If your VM's configuration file is not located in /etc/xen/vm, you must
specify the full path. This imports the configuration into xend (and
therefore virt-manager becomes aware of it, also).
Now to start the VM:
xm start my-vm
or start it graphically from virt-manager.
Have a look at running sessions with "xm list". Note the ID of the newly
created VM. Attach to the VM's text console with "xm console <ID>" (replacing
ID with the VM's ID). Attaching to multiple VM consoles is most conveniently
done with the terminal multiplexer "screen".
Have a look at the other xm commands by typing "xm help". Note that most xm
commands must be done as root.
Using the Mouse via VNC in Fully Virtual Mode
---------------------------------------------
In a fully virtualized VM, the mouse may be emulated as a PS/2 mouse, USB
mouse, or USB tablet. The vm-install tool selects the best emulation that is
known to be automatically detected and supported by the operating system.
However, when accessing some fully virtualized operating systems via VNC, the
mouse may be difficult to control if the VM is emulating a PS/2 mouse. PS/2
provides mouse deltas, but VNC only provides absolute coordinates. In such
cases, you may want to manually switch the operating system and VM to use a
USB tablet.
Emulation of a SummaSketch graphics tablet is provided for this reason. To
use the Summa emulation, you will need to configure your fully virtualized OS.
Note that the virtual tablet is connected to the second virtual serial port
(/dev/ttyS1 or COM2).
Most Linux distributions ship with appropriate drivers, and only need to be
configured. To configure gpm, edit /etc/sysconfig/mouse and add these lines:
MOUSETYPE="summa"
XMOUSETYPE="SUMMA"
DEVICE=/dev/ttyS1
The format and location of your configuration file could vary depending upon
your Linux distribution. The goal is to run the gpm daemon as follows:
gpm -t summa -m /dev/ttyS1
X also needs to be configured to use the Summa emulation. Add the following
stanza to /etc/X11/xorg.conf, or use your distribution's tools to add these
settings:
Section "InputDevice"
Identifier "Mouse0"
Driver "summa"
Option "Device" "/dev/ttyS1"
Option "InputFashion" "Tablet"
Option "Mode" "Absolute"
Option "Name" "EasyPen"
Option "Compatible" "True"
Option "Protocol" "Auto"
Option "SendCoreEvents" "on"
Option "Vendor" "GENIUS"
EndSection
After making these changes, restart gpm and X.
To ensure the VM is emulating a USB tablet, add these lines to the
configuration file in /etc/xen/vm:
usb=1
usbdevice='tablet'
Then re-import the configuration into xend:
xm new my-vm
HVM Console in Fully Virtual Mode
---------------------------------
When running a VM in fully virtual mode, a special console is available that
provides some additional ways to control the VM. Press Ctrl-Alt-2 to access
the console; press Ctrl-Alt-1 to return to the VM. While at the console,
type "help" for help.
The two most important commands are "send-key" and "change". The "send-key"
command allows you to send any key sequence to the VM, which might otherwise
be intercepted by your local window manager.
The "change" command allows the target of a block device to be changed; for
example, use it to change from one CD ISO to another. Some versions of Xen
have this command disabled for security reasons. Consult the online
documentation for workarounds.
Networking
----------
Your virtual machines become much more useful if you can reach them via the
network. Starting with openSUSE11.1 and SLE11, networking in domain 0 is
configured and managed via YaST. The yast2-networking module can be used
to create and manage bridged networks. During initial installation, a bridged
networking proposal will be presented if the "Xen Virtual Machine Host Server"
pattern is selected. The proposal will also be presented if you install Xen
after initial installation using the "Install Hypervisor and Tools" module in
YaST.
The default proposal creates a virtual bridge in domain 0 for each active
ethernet device, enslaving the device to the bridge. Consider a machine
containing two ethernet devices (eth0 and eth1), both with active carriers.
YaST will create br0 and br1, enslaving the eth0 and eth1 devices repectively.
VMs get a virtual network interface (e.g. eth0), which is visible in domain 0
as vifN.0 and connected to the bridge. This means that if you set up an IP
address in the VMs belonging to the same subnet as br0 from your domain 0,
you'll be able to communicate not only with the other slave VMs, but also with
domain 0 and with the external network. If you have a DHCP server running in
your network, your VMs should succeed in getting an IP address.
Be aware that this may have unwanted security implications. You may want to
opt for routing instead of bridging, so you can set up firewalling rules in
domain 0.
Please read about the network configuration in the Xen manual. You can set up
bridging or routing for other interfaces also.
For debugging, here's what happens on bootup of a domU:
- xenstored saves the device setup in xenstore
- domU is created
- vifN.0 shows up in domain 0 and a hotplug event is triggered
- hotplug is /sbin/udev; udev looks at /etc/udev/rules.d/40-xen.rules and
calls /etc/xen/scripts/vif-bridge online
- vif-bridge set the vifN.0 device up and enslaves it to the bridge
- eth0 shows up in domU (hotplug event triggered)
Similar things happen for block devices, except that /etc/xen/scripts/block is
called.
It's not recommended to use ifplugd nor NetworkManager for managing the
interfaces if you use bridging mode. Use routing with nat or proxy-arp
in that case. You also need to do that in case you want to send out packets
on wireless; you can't bridge Xen "ethernet" packets into 802.11 packets.
Thread-Local Storage
--------------------
For some time now, the glibc thread library (NPTL) has used a shortcut to
access thread-local variables at a negative segment offset from the segment
selector GS instead of reading the linear address from the TDB (offset 0).
Unfortunately, this optimization has been made the default by the glibc and
gcc maintainers, as it saves one indirection. For Xen this is bad: The access
to these variables will trap, and Xen will need to use some tricks to make the
access work. It does work, but it's very slow.
SUSE Linux 9.1 and SLES 9 were prior to this change, and thus are not
affected. SUSE Linux 9.2 and 9.3 are affected. For SUSE Linux 10.x and SLES
10, we have disabled negative segment references in gcc and glibc, and so
these are not affected. Other non-SUSE Linux distributions may be affected.
For affected distributions, one way to work around the problem is to rename
the /lib/tls directory, so the pre-i686 version gets used, where no such
tricks are done. An example LSB-compliant init script which automates these
steps is installed at /usr/share/doc/packages/xen/boot.xen. This script
renames /lib/tls when running on Xen, and restores it when not running on Xen.
Modify this script to work with your specific distribution.
Mono has a similar problem, but this has been fixed in SUSE Linux 10.1 and
SLES 10. Older or non-SUSE versions of Mono may have a performance impact.
Security
--------
Domain 0 has control over all domains. This means that care should be taken to
keep domain 0 safe; ideally you strip it down to only do as little there as
possible, preferably with no local users except for the system administrator.
Most commands in domain 0 can only be performed as root, but this protection
scheme only has moderate security and might be defeated. In case domain 0 is
compromised, all other domains are compromised as well.
To allow relocation of VMs (migration), the receiving machine listens on TCP
port 8002. You might want to put firewall rules in place in domain 0 to
restrict this to machines which you trust. You have some access control in
xend-config.sxp as well by tweaking the xend-relocation-hosts-allow
setting. Relocating VMs with sensitive data is not a good idea in untrusted
networks, since the data is not sent encrypted.
The memory protections for the domUs are effective; so far no way to break out
of a virtual machine is known. A VM is an effective jail.
Limitations
-----------
When booting, Linux reserves data structures matching the amount of RAM found.
This has the side-effect that you can't dynamically grow the memory beyond
what the kernel has been booted with. But you can trick domU Linux to prepare
for a larger amount of RAM by passing the mem= boot parameter.
The export of virtual hard disks from files in Xen can be handled via the
loopback driver (although in Xen >= 3.0.4, this is can be replaced by the
"blktap" user-space driver.) If you are still using loopback, it may be
possible to run out of loopback devices, as by default only 64 are supported.
You can change this by inserting:
options loop max_loop=128
into /etc/modprobe.conf.local in domain 0.
Network Troubleshooting
-----------------------
First ensure the VM server is configured correctly and can access the network.
Do not use ifplugd or NetworkManager, neither are bridge aware.
Specify a static virtual MAC in the VM's configuration file. Random MACs can
be problematic, since with each boot of the VM it appears that some hardware
has been removed (the previous random MAC) and new hardware is present (the
new random MAC). This can cause network configuration files (which were
intended for the old MAC) to not be matched up with the new virtual hardware.
In the VM's filesystem, ensure the ifcfg-eth* files are named appropriately.
For example, if you do decide to use a randomly-selected MAC for the VM, the
ifcfg-eth* file must not include the MAC in its name; name it generically
("ifcfg-eth0") instead. If you use a static virtual MAC for the VM, be sure
that is reflected in the file's name.
Troubleshooting
---------------
First try to get Linux running on bare iron before trying with Xen.
Be sure your Xen hypervisor (xen) and VM kernels (kernel-xen) are compatible.
The hypervisor and domain 0 kernel are a matched set, and usually must be
upgraded together. Consult the online documentation for a matrix of supported
32- and 64-bit combinations
On certain machines with 2GB or less of RAM, domain 0 Linux may fail to boot,
printing the following messages:
PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
...
Kernel panic - not syncing: PCI-DMA: Memory would be corrupted
Fix this by adding "swiotlb=16" to the Linux kernel command line, which
reserves additional memory for the swiotlb (the actual number to be used here
of course depends on the system configuration).
If you have trouble early in the boot, try passing pnpacpi=off to the Linux
kernel. If you have trouble with interrupts or timers, passing lapic to Xen
may help. Xen and Linux understand similar ACPI boot parameters. Try the
options acpi=off,force,strict,ht,noirq or acpi_skip_timer_override.
Other useful debugging options to Xen may be nosmp, noreboot, mem=1024M,
sync_console, noirqbalance (Dell). For a complete list of Xen boot options,
consult chapter 11.3 of the Xen users' manual.
If domain 0 Linux crashes on X11 startup, please try to boot into runlevel 3.
To debug Xen or domain 0 Linux crashes or hangs, it may be useful to use the
debug-enabled hypervisor, and/or to prevent automatic rebooting. Change your
Grub configuration from something like this:
kernel (hd0,5)/xen.gz
To something like this:
kernel (hd0,5)/xen-dbg.gz noreboot
After rebooting, the Xen hypervisor will write any error messages to the log
file (viewable with the "xm dmesg" command).
If problems persist, check if a newer version is available. Well-tested
versions will be shipped with SUSE and via YaST Online Update. More frequent
(but less supported) updates are available on Novell's Forge site:
http://forge.novell.com/modules/xfmod/project/?xenpreview
Upgrading the Host Operating System
-----------------------------------
When upgrading the host operating system from one major release to another
(for example, SLES 10 to SLES 11 or openSUSE 11.4 to openSUSE 12.1) or when
applying a service pack like SLES 11 SP2 to SLES 11 SP1 all running VMs must
be shut down before the upgrade process is begun.
Known Issues
------------
For a list of known issues and work-arounds, see
http://www.novell.com/documentation/vmserver/.
Disclaimer
----------
Xen performed amazingly well in our tests and proved very stable. Still, you
should be careful when using it, just like you'd be careful if you boot an
experimental kernel. Expect that it may not boot and be prepared to have a
fall-back solution for that scenario. Be prepared that it may not support all
of your hardware. And for the worst of all cases, have your most valuable
data backed up. (This is always a good idea, of course.)
Feedback
--------
In case you have remarks about, problems with, ideas for, or praise for Xen,
please report it back to the xen-devel list:
xen-devel@xxxxxxxxxxxxxxxxxxx
If you find issues with the packaging or setup done by Novell/SUSE, please
report it to:
http://www.suse.de/feedback/
ENJOY!
Your Novell SUSE Team.
++++++ _link ++++++
<link project="openSUSE:12.1" package="xen"
baserev="aa6de9a40241df7973eeaba172315a03">
<patches>
<branch/>
</patches>
</link>
++++++ altgr_2.patch ++++++
When access domU from Windows VNC client, spanish keyboard altgr key
doesn't work. According to log info, we found that the keycodes passed
from vncclient to qemu vncserver have something wrong. When altgr and "2"
pressed, keycodes vncserver receives are:
ALT_R down,
CTRL_L down,
CTRL_L up,
ATL_R up,
"2" down,
"2" up,
...
Since when send "2" down, there is no altgr modifier, the char displayed
on screen will be "2" but not "@".
To solve this problem, there is another patch applied by upstream which
sends an additional altgr modifier before "2" down in the above case.
It works well when domU is windows, but on sles10 sp3 domU, sometimes it
display "@" and sometimes it still displays "2", especially when press
altgr+2 continuously.
For the sles10 sp3 domU problem, maybe because there are two many alt_r (same
keycode as altgr on "es") up and down events and the domU OS couldn't handle
it well.
To furtherly solve this problem, I write this patch, when vncserver
is "es" and receives a alt_r keysym (this is already abnormal since "es" has
no alt_r), then treat the alt_r as alt_l. This can avoid too many altgr
keycodes up and down events and make sure the intentionally added altgr keycode
can take effect.
Signed-off by Chunyan Liu (cyliu@xxxxxxxxxx)
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/vnc.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
@@ -1308,6 +1308,9 @@ static void do_key_event(VncState *vs, i
shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36];
altgr_keys = vs->modifiers_state[0xb8];
+ if ( !strcmp(keyboard_layout,"es") && sym == 0xffea )
+ sym = 0xffe9;
+
keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
if (keycode == 0) {
fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym);
++++++ baselibs.conf ++++++
xen-libs
++++++ bdrv_default_rwflag.patch ++++++
Subject: modify default read/write flag in bdrv_init.
Signed-off by Chunyan Liu <cyliu@xxxxxxxxxx>
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/vl.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/vl.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/vl.c
@@ -2617,6 +2617,8 @@ int drive_init(struct drive_opt *arg, in
strncpy(drives_table[nb_drives].serial, serial, sizeof(serial));
nb_drives++;
+ bdrv_flags = BDRV_O_RDWR;
+
switch(type) {
case IF_IDE:
case IF_XEN:
@@ -2630,6 +2632,7 @@ int drive_init(struct drive_opt *arg, in
break;
case MEDIA_CDROM:
bdrv_set_type_hint(bdrv, BDRV_TYPE_CDROM);
+ bdrv_flags &= ~BDRV_O_RDWR;
break;
}
break;
@@ -2650,7 +2653,6 @@ int drive_init(struct drive_opt *arg, in
}
if (!file[0])
return -2;
- bdrv_flags = 0;
if (snapshot) {
bdrv_flags |= BDRV_O_SNAPSHOT;
cache = 2; /* always use write-back with snapshot */
++++++ bdrv_open2_fix_flags.patch ++++++
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/block.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/block.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/block.c
@@ -350,7 +350,7 @@ int bdrv_file_open(BlockDriverState **pb
int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
{
- return bdrv_open2(bs, filename, flags, NULL);
+ return bdrv_open2(bs, filename, flags|BDRV_O_RDWR, NULL);
}
int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
@@ -419,12 +419,13 @@ int bdrv_open2(BlockDriverState *bs, con
}
bs->drv = drv;
bs->opaque = qemu_mallocz(drv->instance_size);
- /* Note: for compatibility, we open disk image files as RDWR, and
- RDONLY as fallback */
if (!(flags & BDRV_O_FILE))
- open_flags = (flags & BDRV_O_ACCESS) | (flags & BDRV_O_CACHE_MASK);
+ open_flags = flags;
else
open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
+ if (!(open_flags & BDRV_O_RDWR))
+ bs->read_only = 1;
+
ret = drv->bdrv_open(bs, filename, open_flags);
if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/hw/usb-msd.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/hw/usb-msd.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/hw/usb-msd.c
@@ -551,7 +551,7 @@ USBDevice *usb_msd_init(const char *file
s = qemu_mallocz(sizeof(MSDState));
bdrv = bdrv_new("usb");
- if (bdrv_open2(bdrv, filename, 0, drv) < 0)
+ if (bdrv_open2(bdrv, filename, BDRV_O_RDWR, drv) < 0)
goto fail;
s->bs = bdrv;
*pbs = bdrv;
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/qemu-img.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/qemu-img.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/qemu-img.c
@@ -32,7 +32,7 @@
#endif
/* Default to cache=writeback as data integrity is not important for qemu-tcg.
*/
-#define BRDV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_O_FLAGS BDRV_O_CACHE_WB
static void QEMU_NORETURN error(const char *fmt, ...)
{
@@ -185,7 +185,7 @@ static int read_password(char *buf, int
#endif
static BlockDriverState *bdrv_new_open(const char *filename,
- const char *fmt)
+ const char *fmt, int flags)
{
BlockDriverState *bs;
BlockDriver *drv;
@@ -201,7 +201,7 @@ static BlockDriverState *bdrv_new_open(c
} else {
drv = &bdrv_raw;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, flags, drv) < 0) {
error("Could not open '%s'", filename);
}
if (bdrv_is_encrypted(bs)) {
@@ -253,7 +253,7 @@ static int img_create(int argc, char **a
size = 0;
if (base_filename) {
BlockDriverState *bs;
- bs = bdrv_new_open(base_filename, NULL);
+ bs = bdrv_new_open(base_filename, NULL, BDRV_O_RDWR);
bdrv_get_geometry(bs, &size);
size *= 512;
bdrv_delete(bs);
@@ -332,7 +332,7 @@ static int img_commit(int argc, char **a
} else {
drv = NULL;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_RDWR, drv) < 0) {
error("Could not open '%s'", filename);
}
ret = bdrv_commit(bs);
@@ -455,7 +455,8 @@ static int img_convert(int argc, char **
total_sectors = 0;
for (bs_i = 0; bs_i < bs_n; bs_i++) {
- bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt);
+ bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt,
+ BDRV_O_CACHE_WB|BDRV_O_RDONLY);
if (!bs[bs_i])
error("Could not open '%s'", argv[optind + bs_i]);
bdrv_get_geometry(bs[bs_i], &bs_sectors);
@@ -483,7 +484,7 @@ static int img_convert(int argc, char **
}
}
- out_bs = bdrv_new_open(out_filename, out_fmt);
+ out_bs = bdrv_new_open(out_filename, out_fmt, BDRV_O_CACHE_WB|BDRV_O_RDWR);
bs_i = 0;
bs_offset = 0;
@@ -706,7 +707,7 @@ static int img_info(int argc, char **arg
} else {
drv = NULL;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_FLAGS|BDRV_O_RDWR, drv) < 0) {
error("Could not open '%s'", filename);
}
bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
@@ -810,7 +811,7 @@ static void img_snapshot(int argc, char
if (!bs)
error("Not enough memory");
- if (bdrv_open2(bs, filename, 0, NULL) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_RDWR, NULL) < 0) {
error("Could not open '%s'", filename);
}
++++++ bdrv_open2_flags_2.patch ++++++
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
@@ -237,6 +237,7 @@ static int open_disk(struct td_state *s,
BlockDriver* drv;
char* devname;
static int devnumber = 0;
+ int flags = readonly ? BDRV_O_RDONLY : BDRV_O_RDWR;
int i;
DPRINTF("Opening %s as blktap%d\n", path, devnumber);
@@ -259,7 +260,7 @@ static int open_disk(struct td_state *s,
DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");
/* Open the image */
- if (bdrv_open2(bs, path, 0, drv) != 0) {
+ if (bdrv_open2(bs, path, flags, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/xenstore.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
@@ -136,7 +136,8 @@ static void insert_media(void *opaque)
else
format = &bdrv_raw;
- bdrv_open2(bs, media_filename[i], 0, format);
+ /* Temporary BDRV_O_RDWR */
+ bdrv_open2(bs, media_filename[i], BDRV_O_RDWR, format);
#ifdef CONFIG_STUBDOM
{
char *buf, *backend, *params_path, *params;
@@ -511,7 +512,8 @@ void xenstore_parse_domain_config(int hv
}
for (i = 0; i < num; i++) {
- format = NULL; /* don't know what the format is yet */
+ flags = 0;
+ format = NULL; /* don't know what the format is yet */
/* read the backend path */
xenstore_get_backend_path(&bpath, "vbd", danger_path, hvm_domid,
e_danger[i]);
if (bpath == NULL)
@@ -597,6 +599,17 @@ void xenstore_parse_domain_config(int hv
format = &bdrv_raw;
}
+ /* read the mode of the device */
+ if (pasprintf(&buf, "%s/mode", bpath) == -1)
+ continue;
+ free(mode);
+ mode = xs_read(xsh, XBT_NULL, buf, &len);
+
+ if (!strcmp(mode, "r") || !strcmp(mode, "ro"))
+ flags |= BDRV_O_RDONLY;
+ if (!strcmp(mode, "w") || !strcmp(mode, "rw"))
+ flags |= BDRV_O_RDWR;
+
#if 0
/* Phantom VBDs are disabled because the use of paths
* from guest-controlled areas in xenstore is unsafe.
@@ -664,7 +677,7 @@ void xenstore_parse_domain_config(int hv
#ifdef CONFIG_STUBDOM
if (pasprintf(&danger_buf, "%s/device/vbd/%s", danger_path,
e_danger[i]) == -1)
continue;
- if (bdrv_open2(bs, danger_buf, BDRV_O_CACHE_WB /* snapshot and
write-back */, &bdrv_raw) == 0) {
+ if (bdrv_open2(bs, danger_buf, flags|BDRV_O_CACHE_WB /* snapshot and
write-back */, &bdrv_raw) == 0) {
pstrcpy(bs->filename, sizeof(bs->filename), params);
}
#else
@@ -716,7 +729,7 @@ void xenstore_parse_domain_config(int hv
fprintf(stderr, "Using file %s in read-%s mode\n", bs->filename,
is_readonly ? "only" : "write");
- if (bdrv_open2(bs, params, BDRV_O_CACHE_WB /* snapshot and
write-back */, format) < 0) {
+ if (bdrv_open2(bs, params, flags|BDRV_O_CACHE_WB /* snapshot and
write-back */, format) < 0) {
fprintf(stderr, "qemu: could not open vbd '%s' or hard disk
image '%s' (drv '%s' format '%s')\n", buf, params, drv ? drv : "?", format ?
format->format_name : "0");
} else {
char* snapshot = get_snapshot_name(atoi(e_danger[i]));
++++++ blktap-close-fifos.patch ++++++
Index: xen-4.1.2-testing/tools/blktap/drivers/blktapctrl.c
===================================================================
--- xen-4.1.2-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-4.1.2-testing/tools/blktap/drivers/blktapctrl.c
@@ -280,7 +280,7 @@ static int del_disktype(blkif_t *blkif)
* qemu-dm instance. We may close the file handle only if there is
* no other disk left for this domain.
*/
- if (dtypes[type]->use_ioemu)
+ if (dtypes[type]->use_ioemu && dtypes[type]->idnum != DISK_TYPE_AIO)
return !qemu_instance_has_disks(blkif->tappid);
/* Caller should close() if no single controller, or list is empty. */
++++++ blktap-pv-cdrom.patch ++++++
++++ 803 lines (skipped)
++++++ blktap.patch ++++++
bug #239173
bug #242953
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3296,7 +3296,7 @@ class XendDomainInfo:
(fn, BOOTLOADER_LOOPBACK_DEVICE))
vbd = {
- 'mode': 'RO',
+ 'mode': 'RW',
'device': BOOTLOADER_LOOPBACK_DEVICE,
}
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/xenstore.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
@@ -447,9 +447,9 @@ void xenstore_parse_domain_config(int hv
{
char **e_danger = NULL;
char *buf = NULL;
- char *fpath = NULL, *bpath = NULL,
+ char *fpath = NULL, *bpath = NULL, *btype = NULL,
*dev = NULL, *params = NULL, *drv = NULL;
- int i, ret;
+ int i, ret, is_tap;
unsigned int len, num, hd_index, pci_devid = 0;
BlockDriverState *bs;
BlockDriver *format;
@@ -486,6 +486,14 @@ void xenstore_parse_domain_config(int hv
e_danger[i]);
if (bpath == NULL)
continue;
+ /* check to see if type is tap or not */
+ if (pasprintf(&buf, "%s/type", bpath) == -1)
+ continue;
+ free(btype);
+ btype = xs_read(xsh, XBT_NULL, buf, &len);
+ if (btype == NULL)
+ continue;
+ is_tap = !strncmp(btype, "tap", 3);
/* read the name of the device */
if (pasprintf(&buf, "%s/dev", bpath) == -1)
continue;
@@ -775,6 +783,7 @@ void xenstore_parse_domain_config(int hv
free(mode);
free(params);
free(dev);
+ free(btype);
free(bpath);
free(buf);
free(danger_buf);
++++++ blktapctrl-default-to-ioemu.patch ++++++
Index: xen-4.1.2-testing/tools/blktap/drivers/tapdisk.h
===================================================================
--- xen-4.1.2-testing.orig/tools/blktap/drivers/tapdisk.h
+++ xen-4.1.2-testing/tools/blktap/drivers/tapdisk.h
@@ -168,7 +168,7 @@ static disk_info_t aio_disk = {
"raw image (aio)",
"aio",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_aio,
#endif
@@ -179,7 +179,7 @@ static disk_info_t sync_disk = {
"raw image (sync)",
"sync",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_sync,
#endif
@@ -190,7 +190,7 @@ static disk_info_t vmdk_disk = {
"vmware image (vmdk)",
"vmdk",
1,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_vmdk,
#endif
@@ -212,7 +212,7 @@ static disk_info_t qcow_disk = {
"qcow disk (qcow)",
"qcow",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_qcow,
#endif
@@ -223,7 +223,7 @@ static disk_info_t qcow2_disk = {
"qcow2 disk (qcow2)",
"qcow2",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_qcow2,
#endif
++++++ block-dmmd ++++++
#! /bin/bash
# Usage: block-dmmd [add args | remove args]
#
# the xm config file should have something like:
# dmmd:md;/dev/md0;md;/dev/md1;lvm;/dev/vg1/lv1
# or
# dmmd:lvm;/dev/vg1/lv1;lvm;/dev/vg1/lv2;md;/dev/md0
# note the last device will be used for VM
# History:
# 2009-06-09, mh@xxxxxxxxxx:
# Emit debugging messages into a temporary file; if no longer needed,
# just comment the exec I/O redirection below
# Make variables used in functions local to avoid global overridings
# Use vgscan and vgchange where required
# Use the C locale to avoid dealing with localized messages
# Assign output from assembling an MD device to a variable to aid
debugging
# We do not want to deal with localized messages:
LANG=C
LC_MESSAGES=C
export LANG LC_MESSAGES
dir=$(dirname "$0")
. "$dir/block-common.sh"
#exec >> /tmp/block-dmmd-`date +%F_%T.%N`.log 2>&1
echo ""
date
set -x
echo shell-flags: $-
command=$1
# We check for errors ourselves:
set +e
function run_mdadm()
{
local mdadm_cmd=$1
local msg
local rc
msg="`/sbin/mdadm $mdadm_cmd 2>&1`"
rc=$?
case "$msg" in
*"has been started"* | *"already active"* )
return 0
;;
*"is already in use"* )
: hmm, might be used by another device in this domU
: leave it to upper layers to detect a real error
return 2
;;
* )
return $rc
;;
esac
return 1
}
function activate_md()
{
local par=$1
local already_active=0 cfg dev rc t
if [ ${par} = ${par%%(*} ]; then
# No configuration file specified:
dev=$par
cfd=
else
dev=${par%%(*}
t=${par#*(}
cfg="-c ${t%%)*}"
fi
if /sbin/mdadm -Q -D $dev; then
already_active=1
fi
run_mdadm "-A $dev $cfg"
rc=$?
if [ $already_active -eq 1 ] && [ $rc -eq 2 ]; then
return 0
fi
return $rc
}
function deactivate_md ()
{
local par=$1 # Make it explicitly local
## We need the device name only while deactivating
/sbin/mdadm -S ${par%%(*}
return $?
}
function activate_lvm ()
{
# First scan for PVs and VGs; we may then have to activate the VG
# first, but can ignore errors:
# /sbin/pvscan || :
# /sbin/vgscan --mknodes || :
# /sbin/vgchange -ay ${1%/*} || :
/sbin/lvchange -ay $1
return $?
}
function deactivate_lvm ()
{
/sbin/lvchange -an $1
if [ $? -eq 0 ]; then
# We may have to deactivate the VG now, but can ignore errors:
# /sbin/vgchange -an ${1%/*} || :
# Maybe we need to cleanup the LVM cache:
# /sbin/vgscan --mknodes || :
return 0
fi
return 1
}
BP=100
SP=$BP
VBD=
declare -a stack
function push ()
{
if [ -z "$1" ]; then
return
fi
let "SP -= 1"
stack[$SP]="${1}"
return
}
function pop ()
{
VBD=
if [ "$SP" -eq "$BP" ]; then
return
fi
VBD=${stack[$SP]}
let "SP += 1"
return
}
function activate_dmmd ()
{
# echo $1 $2
case $1 in
md)
activate_md $2
return
;;
lvm)
activate_lvm $2
return
;;
esac
}
function deactivate_dmmd()
{
case "$1" in
md)
deactivate_md $2
return
;;
lvm)
deactivate_lvm $2
return
;;
esac
}
function cleanup_stack ()
{
while [ 1 ]; do
pop
if [ -z "$VBD" ]; then
break
fi
deactivate_dmmd $VBD
done
return
}
function parse_par ()
{
local ac par rc s t # Make these explicitly local vars
ac=$1
par="$2"
echo "parse_paring $1, $2"
par="$par;"
while [ 1 ]; do
t=${par%%;*}
if [ -z "$t" ]; then
return 0
fi
par=${par#*;}
s=${par%%;*}
if [ -z "$s" ]; then
return 1
fi
par=${par#*;}
echo "type is $t, dev is $s"
if [ "$ac" = "activate" ]; then
activate_dmmd $t $s
rc=$?
if [ $rc -ne 0 ]; then
return 1
fi
fi
echo "push $t $s"
push "$t $s"
done
}
echo $command
case "$command" in
add)
p=`xenstore-read $XENBUS_PATH/params` || true
claim_lock "dmmd"
dmmd=$p
echo "before parse_par $dmmd"
parse_par activate "$dmmd"
rc=$?
echo "reach here with rc: $rc"
if [ $rc -ne 0 ]; then
cleanup_stack
release_lock "dmmd"
exit 1
fi
lastparam=${dmmd##*;}
usedevice=${lastparam%(*}
claim_lock "block"
xenstore-write $XENBUS_PATH/node "$usedevice"
write_dev "$usedevice"
release_lock "block"
release_lock "dmmd"
exit 0
;;
remove)
p=`xenstore-read $XENBUS_PATH/params` || true
claim_lock "dmmd"
dmmd=$p
parse_par noactivate "$dmmd"
cleanup_stack
release_lock "dmmd"
exit 0
;;
esac
++++++ block-iscsi ++++++
#!/bin/bash
# Usage: block-iscsi [add tgtname | remove dev]
#
# This assumes you're running a correctly configured
# iscsi target (server) at the other end!
# Note that we assume that the passwords for discovery (if needed)
# are in /etc/iscsid.conf
# and the node session passwords (if required) in the
# open-iscsi database below /var/lib/open-iscsi/node.db
#
# (c) Kurt Garloff <kurt@xxxxxxxxxx>, 2006-09-04, GNU GPL
# Contributors: Jim Fehlig <jfehlig@xxxxxxxxxx>
# Stefan de Konink <skinkie@xxxxxxxxx>
dir=$(dirname "$0")
. "$dir/block-common.sh"
# echo "DBG:xen/scripts/block-iscsi $1 $2 XENBUS_PATH=$XENBUS_PATH $par $node"
find_sdev()
{
unset dev
for session in /sys/class/iscsi_session/session*; do
if [ "$1" = "`cat $session/targetname 2>/dev/null`" ]; then
dev=`basename $session/device/target*/*:0:*/block*/*`
return
fi
done
}
find_sdev_rev()
{
unset tgt
for session in /sys/class/iscsi_session/session*; do
dev=`basename $session/device/target*/*:0:*/block*/*`
if [ "$dev" = "$1" ]; then
tgt=`cat $session/targetname 2>/dev/null`
return
fi
done
}
case "$command" in
add)
# load modules and start iscsid
/etc/init.d/open-iscsi status >/dev/null 2>&1 ||
{ /etc/init.d/open-iscsi start >/dev/null 2>&1; sleep 1; }
par=`xenstore-read $XENBUS_PATH/params` || true
TGTID=`echo $par | sed "s/\/\///g"`
while read rec uuid; do
if [ "$uuid" = "$TGTID" ]; then
find_sdev $TGTID
if [ -z "$dev" ]; then
/sbin/iscsiadm -m node -T $uuid -p $rec --login || exit 2
sleep 4
find_sdev $TGTID
fi
xenstore-write $XENBUS_PATH/node /dev/$dev
write_dev /dev/$dev
exit 0
fi
done < <(/sbin/iscsiadm -m node)
exit 1
;;
remove)
node=`xenstore-read $XENBUS_PATH/node` || true
dev=$node; dev=${dev#/dev/}
find_sdev_rev $dev
if [ -x /sbin/blockdev -a -n "$node" ]; then blockdev --flushbufs "$node";
fi
test -z "$tgt" && exit 2
/sbin/iscsiadm -m node -T $tgt --logout
exit 1
;;
esac
++++++ block-nbd ++++++
#!/bin/sh
# Usage: block-nbd [bind server ctl_port |unbind node]
#
# The node argument to unbind is the name of the device node we are to
# unbind.
#
# This assumes you're running a correctly configured server at the other end!
dir=$(dirname "$0")
. "$dir/block-common.sh"
#set -x
par=`xenstore-read $XENBUS_PATH/params` || true
#echo $par
case "$command" in
add)
modprobe nbd
for dev in /dev/nbd*; do
if nbd-client $par $dev; then
xenstore-write $XENBUS_PATH/node $dev
write_dev $dev
exit 0
fi
done
exit 1
;;
remove)
node=`xenstore-read $XENBUS_PATH/node` || true
nbd-client -d $node
exit 0
;;
esac
++++++ block-npiv ++++++
#!/bin/bash
# Usage: block-npiv [add npiv | remove dev]
dir=$(dirname "$0")
. "$dir/block-npiv-common.sh"
. "$dir/block-common.sh"
#set -x
#command=$1
case "$command" in
add)
# Params is one big arg, with fields separated by hyphens:
# single path:
# FABRIC-VPWWPN-VPWWNN-TGTWWPN-LUN#
# multipath:
# {FABRIC1.FABRIC2}-{VPWWPN1.VPWWPN2.VPWWPN3}-VPWWNN-TGTWWPN-LUN#
# arg 2 - Fabric Name
# arg 3 - VPORT's WWPN
# arg 4 - VPORT's WWNN
# arg 5 - Target's WWPN
# arg 6 - LUN # on Target
# no wwn contains a leading 0x - it is a 16 character hex value
# You may want to optionally pick a specific adapter ?
par=`xenstore-read $XENBUS_PATH/params` || true
#par=$2
NPIVARGS=$par;
LUN=${NPIVARGS##*-*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $LUN = $NPIVARGS ; then exit 1; fi
TGTWWPN=${NPIVARGS##*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $TGTWWPN = $NPIVARGS ; then exit 1; fi
VPORTWWNN=${NPIVARGS##*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWNN = $NPIVARGS ; then exit 1; fi
VPORTWWPNS=${NPIVARGS##*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWPNS = $NPIVARGS ; then exit 1; fi
FABRICNMS=$NPIVARGS
# Ensure we compare everything using lower-case hex characters
TGTWWPN=`echo $TGTWWPN | tr A-Z a-z`
VPORTWWPNS=`echo $VPORTWWPNS | tr A-Z a-z |sed 's/[{.}]/ /g'`
VPORTWWNN=`echo $VPORTWWNN | tr A-Z a-z`
FABRICNMS=`echo $FABRICNMS | tr A-Z a-z |sed 's/[{.}]/ /g'`
claim_lock "npiv"
paths=0
for FABRICNM in $FABRICNMS; do
for VPORTWWPN in $VPORTWWPNS; do
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then
create_vport $FABRICNM $VPORTWWPN $VPORTWWNN
if [ $? -ne 0 ] ; then exit 2; fi
sleep 8
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 3; fi
fi
find_sdev $vhost $TGTWWPN $LUN
if test -z "$dev"; then
echo "- - -" > /sys/class/scsi_host/$vhost/scan
sleep 2
find_sdev $vhost $TGTWWPN $LUN
fi
if test -z "$dev"; then
exit 4
fi
paths=$(($paths+1))
done
done
release_lock "npiv"
if test $paths -gt 1; then
xenstore-write $XENBUS_PATH/multipath 1
/etc/init.d/multipathd start
if test $? -ne 0 ; then exit 4; fi
dm=`multipath -l /dev/$dev | grep dm | cut -f2 -d' '`
else
xenstore-write $XENBUS_PATH/multipath 0
dm=$dev
fi
if test ! -z "$dm"; then
xenstore-write $XENBUS_PATH/node /dev/$dm
write_dev /dev/$dm
exit 0
fi
exit 4
;;
remove)
node=`xenstore-read $XENBUS_PATH/node` || true
multipath=`xenstore-read $XENBUS_PATH/multipath` || true
# this is really screwy. the first delete of a lun will
# terminate the entire vport (all luns)
if test $multipath = 1; then
par=`xenstore-read $XENBUS_PATH/params` || true
NPIVARGS=$par;
FABRICNMS=${NPIVARGS%%-*}; NPIVARGS=${NPIVARGS#*-}
VPORTWWPNS=${NPIVARGS%%-*}
VPORTWWPNS=`echo $VPORTWWPNS | tr A-Z a-z |sed 's/[{.}]/ /g'`
FABRICNMS=`echo $FABRICNMS | tr A-Z a-z |sed 's/[{.}]/ /g'`
for FABRICNM in $FABRICNMS; do
for VPORTWWPN in $VPORTWWPNS; do
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 5; fi
flush_nodes_on_vhost $vhost
delete_vhost $vhost
done
done
else
dev=$node; dev=${dev#/dev/}
find_vhost_from_dev $dev
if test -z "$vhost" ; then exit 5; fi
flush_nodes_on_vhost $vhost
delete_vhost $vhost
fi
exit 0
;;
esac
++++++ block-npiv-common.sh ++++++
# Look for the NPIV vport with the WWPN
# $1 contains the WWPN (assumes it does not contain a leading "0x")
# $2 contains the FABRICNM (assumes it does not contain "0x")
find_vhost()
{
unset vhost
# look in upstream locations
for fchost in /sys/class/fc_vports/* ; do
if test -e $fchost/port_name ; then
wwpn=`cat $fchost/port_name | sed -e s/^0x//`
if test $wwpn = $1 ; then
# Note: makes the assumption the vport will always have an scsi_host
child
vhost=`ls -d $fchost/device/host*`
vhost=`basename $vhost`
fname=`cat /sys/class/fc_host/$vhost/fabric_name | sed -e s/^0x//`
if test $fname = $2 ; then
return
fi
fi
fi
done
# look in vendor-specific locations
# Emulex - just looks like another scsi_host - so look at fc_hosts...
for fchost in /sys/class/fc_host/* ; do
if test -e $fchost/port_name ; then
wwpn=`cat $fchost/port_name | sed -e s/^0x//`
if test $wwpn = $1 ; then
# Note: makes the assumption the vport will always have an scsi_host
child
vhost=`basename $fchost`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if test $fname = $2 ; then
return
fi
fi
fi
done
}
# Create a NPIV vport on the fabric w/ FABRICNM, with WWPN,WWNN
# $1 contains FABRICNM
# $2 contains the VPORT WWPN
# $3 contains the VPORT WWNN
# (assumes no name contains a leading "0x")
create_vport()
{
# find a base adapter with npiv support that is on the right fabric
# Look via upstream interfaces
for fchost in /sys/class/fc_host/* ; do
if test -e $fchost/vport_create ; then
# is the link up, w/ NPIV support ?
pstate=`cat $fchost/port_state`
ptype=`cat $fchost/port_type | cut -c 1-5`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then
vmax=`cat $fchost/max_npiv_vports`
vinuse=`cat $fchost/npiv_vports_inuse`
avail=`expr $vmax - $vinuse`
if [ $avail -gt 0 ] ; then
# create the vport
echo $2":"$3 > $fchost/vport_create
if [ $? -eq 0 ] ; then
return 0
fi
# failed - so we'll just look for the next adapter
fi
fi
fi
done
# Look in vendor-specific locations
# Emulex: interfaces mirror upstream, but are under adapter scsi_host
for shost in /sys/class/scsi_host/* ; do
if [ -e $shost/vport_create ] ; then
fchost=`ls -d $shost/device/fc_host*`
# is the link up, w/ NPIV support ?
if [ -e $fchost/port_state ] ; then
pstate=`cat $fchost/port_state`
ptype=`cat $fchost/port_type | cut -c 1-5`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then
vmax=`cat $shost/max_npiv_vports`
vinuse=`cat $shost/npiv_vports_inuse`
avail=`expr $vmax - $vinuse`
if [ $avail -gt 0 ] ; then
# create the vport
echo $2":"$3 > $shost/vport_create
if [ $? -eq 0 ] ; then
return 0
fi
# failed - so we'll just look for the next adapter
fi
fi
fi
fi
done
# BFA are under adapter scsi_host
for shost in /sys/class/scsi_host/* ; do
if [ -e $shost/vport_create ] ; then
fchost=`ls -d $shost/device/fc_host/*`
# is the link up, w/ NPIV support ?
if [ -e $fchost/port_state ] ; then
pstate=`cat $fchost/port_state`
ptype=`cat $fchost/port_type | cut -c 1-5`
fname=`cat $fchost/fabric_name | sed -e s/^0x//`
if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then
# create the vport
echo $2":"$3 > $shost/vport_create
if [ $? -eq 0 ] ; then
return 0
fi
# failed - so we'll just look for the next adapter
fi
fi
fi
done
return 1
}
# Look for the LUN on the indicated scsi_host (which is an NPIV vport)
# $1 is the scsi_host name (normalized to simply the hostX name)
# $2 is the WWPN of the tgt port the lun is on
# Note: this implies we don't support a multipath'd lun, or we
# are explicitly identifying a "path"
# $3 is the LUN number of the scsi device
find_sdev()
{
unset dev
hostno=${1/*host/}
for sdev in /sys/class/scsi_device/${hostno}:*:$3 ; do
if test -e $sdev/device/../fc_trans*/target${hostno}*/port_name ; then
tgtwwpn=`cat $sdev/device/../fc_trans*/target${hostno}*/port_name | sed
-e s/^0x//`
if test $tgtwwpn = $2 ; then
if test -e $sdev/device/block* ; then
dev=`ls $sdev/device/block*`
dev=${dev##*/}
return
fi
fi
fi
done
}
# Look for the NPIV vhost based on a scsi "sdX" name
# $1 is the "sdX" name
find_vhost_from_dev()
{
unset vhost
hostno=`readlink /sys/block/$1/device`
hostno=${hostno##*/}
hostno=${hostno%%:*}
if test -z "$hostno" ; then return; fi
vhost="host"$hostno
}
# We're about to terminate a vhost based on a scsi device
# Flush all nodes on that vhost as they are about to go away
# $1 is the vhost
flush_nodes_on_vhost()
{
if test ! -x /sbin/blockdev ; then return; fi
hostno=${1/*host/}
for sdev in /sys/class/scsi_device/${hostno}:* ; do
if test -e $sdev/device/block* ; then
dev=`ls $sdev/device/block*`
dev="/dev/"$dev
if test -n "$dev"; then
blockdev --flushbufs $dev
fi
fi
done
}
# Terminate a NPIV vhost
# $1 is vhost
delete_vhost()
{
# use upstream interface
for vport in /sys/class/fc_vports/* ; do
if test -e $vport/device/$1 ; then
if test -e $vport/vport_delete ; then
echo "1" > $vport/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi
fi
done
# use vendor specific interface
# Emulex
if test -e /sys/class/fc_host/$1/device/../scsi_host*/lpfc_drvr_version ; then
shost=`ls -1d /sys/class/fc_host/$1/device/../scsi_host* | sed
s/.*scsi_host://`
vportwwpn=`cat /sys/class/fc_host/$1/port_name | sed s/^0x//`
vportwwnn=`cat /sys/class/fc_host/$1/node_name | sed s/^0x//`
echo "$vportwwpn:$vportwwnn" > /sys/class/scsi_host/$shost/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi
# Qlogic
if test -e /sys/class/fc_host/$1/device/../scsi_host*/driver_version ; then
shost=`ls -1d /sys/class/fc_host/$1/device/../scsi_host* | sed
s/.*scsi_host://`
vportwwpn=`cat /sys/class/fc_host/$1/port_name | sed s/^0x//`
vportwwnn=`cat /sys/class/fc_host/$1/node_name | sed s/^0x//`
echo "$vportwwpn:$vportwwnn" > /sys/class/scsi_host/$shost/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi
# BFA
if test -e /sys/class/fc_host/$1/device/../scsi_host/*/driver_name ; then
shost=`ls -1d /sys/class/fc_host/$1/device/../scsi_host/* | sed
s#.*scsi_host/##`
vportwwpn=`cat /sys/class/fc_host/$1/port_name | sed s/^0x//`
vportwwnn=`cat /sys/class/fc_host/$1/node_name | sed s/^0x//`
echo "$vportwwpn:$vportwwnn" > /sys/class/scsi_host/$shost/vport_delete
if test $? -ne 0 ; then exit 6; fi
sleep 4
return
fi
exit 6
}
vport_status()
{
# Look via upstream interfaces
for fchost in /sys/class/fc_host/* ; do
if test -e $fchost/vport_create ; then
vport_status_display $fchost $fchost
fi
done
# Look in vendor-specific locations
# Emulex: interfaces mirror upstream, but are under adapter scsi_host
for shost in /sys/class/scsi_host/* ; do
if [ -e $shost/vport_create ] ; then
fchost=`ls -d $shost/device/fc_host*`
vport_status_display $fchost $shost
fi
done
return 0
}
vport_status_display()
{
echo
echo "fc_host: " $2
echo "port_state: " `cat $1/port_state`
echo "port_type: " `cat $1/port_type`
echo "fabric_name: " `cat $1/fabric_name`
echo "max_npiv_vports: " `cat $2/max_npiv_vports`
echo "npiv_vports_inuse: " `cat $2/npiv_vports_inuse`
echo "modeldesc: " `cat $2/modeldesc`
echo "speed: " `cat $1/speed`
return 0
}
++++++ block-npiv-vport ++++++
#!/bin/bash
# Usage: block-npiv-vport [create npivargs | delete vportwwpn | status]
dir=$(dirname "$0")
. "$dir/block-npiv-common.sh"
#set -x
command=$1
params=$2
case "$command" in
create)
# Params is one big arg, with fields separated by hyphens:
# FABRIC-VPWWPN-VPWWNN-TGTWWPN-LUN#
# arg 2 - Fabric Name
# arg 3 - VPORT's WWPN
# arg 4 - VPORT's WWNN
# arg 5 - Target's WWPN
# arg 6 - LUN # on Target
# no wwn contains a leading 0x - it is a 16 character hex value
# You may want to optionally pick a specific adapter ?
NPIVARGS=$params;
LUN=${NPIVARGS##*-*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $LUN = $NPIVARGS ; then exit 1; fi
TGTWWPN=${NPIVARGS##*-*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $TGTWWPN = $NPIVARGS ; then exit 1; fi
VPORTWWNN=${NPIVARGS##*-*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWNN = $NPIVARGS ; then exit 1; fi
VPORTWWPN=${NPIVARGS##*-}; NPIVARGS=${NPIVARGS%-*}
if test $VPORTWWPN = $NPIVARGS ; then exit 1; fi
FABRICNM=$NPIVARGS
# Ensure we compare everything using lower-case hex characters
TGTWWPN=`echo $TGTWWPN | tr A-Z a-z`
VPORTWWPN=`echo $VPORTWWPN | tr A-Z a-z`
VPORTWWNN=`echo $VPORTWWNN | tr A-Z a-z`
FABRICNM=`echo $FABRICNM | tr A-Z a-z`
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then
create_vport $FABRICNM $VPORTWWPN $VPORTWWNN
if [ $? -ne 0 ] ; then exit 2; fi
sleep 8
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 3; fi
fi
exit 0
;;
delete)
# Params is VPORT's WWPN
# no wwn contains a leading 0x - it is a 16 character hex value
VPORTWWPN=$params
# Ensure we compare everything using lower-case hex characters
VPORTWWPN=`echo $VPORTWWPN | tr A-Z a-z`
find_vhost $VPORTWWPN $FABRICNM
if test -z "$vhost" ; then exit 4; fi
delete_vhost $vhost
exit 0
;;
status)
vport_status
exit 0
;;
*)
echo "Usage: block-npiv-vport [create npivargs | delete vportwwpn | status]"
exit 1
;;
esac
++++++ boot.local.xenU ++++++
#! /bin/sh
#
# Copyright (c) 1996 SuSE GmbH Nuernberg, Germany. All rights reserved.
#
# Author: Werner Fink <werner@xxxxxxx>, 1996
# Burchard Steinbild <bs@xxxxxxx>, 1996
#
# /etc/init.d/boot.local
#
# script with local commands to be executed from init on system startup
#
#
# Here you should add things, that should happen directly after booting
# before we're going to the first run level.
#
date
# echo "$MACHINE: running $0 $*"
my_REDIRECT="$(echo $REDIRECT | sed 's#^/dev/##')"
my_DEVICE="$(echo $my_REDIRECT | sed 's#^tty##')"
my_SPEED="$(stty speed)"
# echo REDIRECT $REDIRECT $my_REDIRECT
# echo my_DEVICE $my_DEVICE
# echo my_SPEED $my_SPEED
# compose a line like that for inittab
# S0:12345:respawn:/sbin/agetty -L 9600 ttyS0 vt102
case $my_REDIRECT in
ttyS*)
echo adding this line to inittab
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt102"
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt102" >> /etc/inittab
echo $my_REDIRECT >> /etc/securetty
;;
hvc*)
echo adding this line to inittab
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt320"
echo "$my_DEVICE:12345:respawn:/sbin/agetty -L $my_SPEED $my_REDIRECT
vt320" >> /etc/inittab
echo $my_REDIRECT >> /etc/securetty
;;
*)
echo "no modification in inittab needed for: $my_REDIRECT"
;;
esac
telinit q
# Changes for Xen
test -f /lib/modules/`uname -r`/modules.dep || depmod -ae
CMDLINE=`cat /proc/cmdline | grep 'ip='`
if test ! -z "$CMDLINE"; then
OLDIFS=$IFS
IFS=":"
read ip oth mask gw hostname dev dhcp rest < /proc/cmdline
IFS=$OLDIFS
hostname $hostname
ip=`echo $ip | sed 's/ip= *//'`
if test ! -z "$ip"; then
if test -z "$mask"; then
if [ ${ip%/*} = $ip ]; then
ip="$ip/27"
fi
echo "ip addr add $ip dev $dev"
ip addr add $ip dev $dev
ip link set $dev up
else
ifconfig add $ip netmask $mask $dev
fi
fi
if test "${dhcp#dhcp}" != "$dhcp"; then
ifup-dhcp $dev
fi
fi
++++++ boot.xen ++++++
#! /bin/sh
# Copyright (c) 2005-2006 SUSE Linux AG, Nuernberg, Germany.
# All rights reserved.
#
# /etc/init.d/boot.xen
#
# LSB compatible service control script; see http://www.linuxbase.org/spec/
#
### BEGIN INIT INFO
# Provides: Xen
# Required-Start: boot.localfs
# Should-Start: boot.localnet
# Required-Stop: boot.localfs
# Should-Stop:
# Default-Start: B
# Default-Stop:
# Short-Description: Switch on and off TLS depending on whether Xen is running
# Description: Xen gets a major performance hit by the way
# recent glibc (& gcc) set up the TLS offset, as it needs to
# play segmentation tricks. This can be avoided by moving away
# the tls libs.
### END INIT INFO
. /etc/rc.status
# Reset status of this service
rc_reset
case "$1" in
start)
echo -n "Starting Xen setup "
if test -d /proc/xen; then
export LD_ASSUME_KERNEL=2.4.21
echo -n "Xen running "
fi
if test -d /proc/xen -a -d /lib/tls; then
echo -n "move /lib/tls away "
mv /lib/tls /lib/tls.save
elif test ! -d /proc/xen -a -d /lib/tls.save; then
echo -n "move back /lib/tls "
mv /lib/tls.save /lib/tls
fi
rc_status -v
;;
stop)
# rc_status -v
;;
try-restart|condrestart)
$0 restart
# Remember status and be quiet
rc_status
;;
restart)
## Stop the service and regardless of whether it was
## running or not, start it again.
$0 start
# Remember status and be quiet
rc_status
;;
force-reload)
$0 try-restart
rc_status
;;
reload)
rc_failed 3
rc_status -v
;;
status)
echo -n "Checking for Xen "
# Return value is slightly different for the status command:
# 0 - service up and running
# 1 - service dead, but /var/run/ pid file exists
# 2 - service dead, but /var/lock/ lock file exists
# 3 - service not running (unused)
# 4 - service status unknown :-(
# 5--199 reserved (5--99 LSB, 100--149 distro, 150--199 appl.)
if test -d /proc/xen; then
if test -d /lib/tls; then
echo -n "Xen running, /lib/tls existing "
rc_failed 1
else
echo -n "Xen running, /lib/tls not existing "
fi
else
if test -d /lib/tls.save; then
echo -n "Xen not running, /lib/tls existing "
rc_failed 2
else
echo -n "Xen not running, /lib/tls not existing "
rc_failed 3
fi
fi
rc_status -v
;;
*)
echo "Usage: $0
{start|stop|status|try-restart|restart|force-reload|reload}"
exit 1
;;
esac
rc_exit
++++++ bridge-bonding.diff ++++++
Index: xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
@@ -249,6 +249,9 @@ op_start () {
claim_lock "network-bridge"
+ local bonded=""
+ [ -e /sys/class/net/${netdev}/bonding ] && bonded="yes"
+
vlans=$(find_active_vlans "${netdev}")
for vlan in $vlans ; do ifdown $vlan ; done
@@ -266,18 +269,32 @@ op_start () {
ip link set ${netdev} down
ip addr flush ${netdev}
fi
- ip link set ${netdev} name ${pdev}
- ip link set ${tdev} name ${bridge}
-
- setup_physical_bridge_port ${pdev}
- # Restore slaves
- if [ -n "${slaves}" ]; then
- ip link set ${pdev} up
- ifenslave ${pdev} ${slaves}
+ if [ "x${bonded}" = "xyes" ]
+ then
+ ip link set ${tdev} name ${bridge}
+ ln -sf /etc/sysconfig/network/ifcfg-${netdev}
/etc/sysconfig/network/ifcfg-${pdev}
+ ifup ${pdev}
+ local gw=`ip route show dev ${pdev} | fgrep default | sed 's/default
via //'`
+ ip addr flush ${pdev}
+ rm -f /etc/sysconfig/network/ifcfg-${pdev}
+ brctl addif ${bridge} ${pdev}
+ ip link set ${bridge} up
+ [ -n "$gw" ] && ip route add default via ${gw}
+ else
+ ip link set ${netdev} name ${pdev}
+ ip link set ${tdev} name ${bridge}
+
+ setup_bridge_port ${pdev}
+
+ # Restore slaves
+ if [ -n "${slaves}" ]; then
+ ip link set ${pdev} up
+ ifenslave ${pdev} ${slaves}
+ fi
+ add_to_bridge2 ${bridge} ${pdev}
+ do_ifup ${bridge}
fi
- add_to_bridge2 ${bridge} ${pdev}
- do_ifup ${bridge}
for vlan in $vlans ; do ifup $vlan ; done
++++++ bridge-opensuse.patch ++++++
Index: xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
@@ -278,19 +278,19 @@ op_stop () {
transfer_addrs ${bridge} ${pdev}
if ! ifdown ${bridge}; then
get_ip_info ${bridge}
- fi
- ip link set ${pdev} down
- ip addr flush ${bridge}
+ ip link set ${pdev} down
+ ip addr flush ${bridge}
- brctl delif ${bridge} ${pdev}
- ip link set ${bridge} down
+ brctl delif ${bridge} ${pdev}
+ ip link set ${bridge} down
- ip link set ${bridge} name ${tdev}
+ ip link set ${bridge} name ${tdev}
+ brctl delbr ${tdev}
+ fi
+ ip link set ${pdev} down
ip link set ${pdev} name ${netdev}
do_ifup ${netdev}
- brctl delbr ${tdev}
-
release_lock "network-bridge"
}
++++++ bridge-record-creation.patch ++++++
Index: xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
@@ -257,6 +257,11 @@ op_start () {
create_bridge ${tdev}
+ # Record creation of bridge in /dev/.sysconfig/network/xenbridges so other
+ # tools, e.g. yast2 lan, know that Xen bridging is active.
+ [ -d /dev/.sysconfig/network/xenbridges ] || mkdir
/dev/.sysconfig/network/xenbridges
+ touch /dev/.sysconfig/network/xenbridges/${bridge}
+
preiftransfer ${netdev}
transfer_addrs ${netdev} ${tdev}
# Remember slaves for bonding interface.
@@ -338,6 +343,13 @@ op_stop () {
ip link set ${pdev} name ${netdev}
do_ifup ${netdev}
+ # Remove record of bridge from /dev/.sysconfig/network/xenbridges ...
+ rm -f /dev/.sysconfig/network/xenbridges/${bridge}
+ # ... and directory itself if empty
+ if [ -z "$(ls -A /dev/.sysconfig/network/xenbridges 2>/dev/null)" ]; then
+ rmdir /dev/.sysconfig/network/xenbridges
+ fi
+
for vlan in $vlans ; do ifup $vlan ; done
release_lock "network-bridge"
++++++ bridge-vlan.diff ++++++
Index: xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-4.1.2-testing/tools/hotplug/Linux/network-bridge
@@ -193,6 +193,28 @@ antispoofing () {
iptables -A FORWARD -m physdev --physdev-in ${pdev} -j ACCEPT
}
+find_active_vlans() {
+ local netdev=$1
+ local vlan
+ local vlans
+ vlans=""
+ for vifcfg in /etc/sysconfig/network/ifcfg-vlan* ; do
+ vlan=${vifcfg/*\/ifcfg-}
+ if [ "$vlan" = "vlan*" ]; then
+ continue
+ fi
+ . $vifcfg
+ etherdevice="$ETHERDEVICE"
+ if [ -x /sbin/getcfg-interface ]; then
+ etherdevice=$(/sbin/getcfg-interface "$ETHERDEVICE")
+ fi
+ if [ "$ETHERDEVICE" = "$netdev" ] || [ "$etherdevice" = "$netdev" ] ;
then
+ link_exists "$vlan" && vlans="$vlans $vlan"
+ fi
+ done
+ echo "$vlans"
+}
+
# Usage: show_status dev bridge
# Print ifconfig and routes.
show_status () {
@@ -227,6 +249,9 @@ op_start () {
claim_lock "network-bridge"
+ vlans=$(find_active_vlans "${netdev}")
+ for vlan in $vlans ; do ifdown $vlan ; done
+
create_bridge ${tdev}
preiftransfer ${netdev}
@@ -254,6 +279,8 @@ op_start () {
add_to_bridge2 ${bridge} ${pdev}
do_ifup ${bridge}
+ for vlan in $vlans ; do ifup $vlan ; done
+
if [ ${antispoof} = 'yes' ] ; then
antispoofing
fi
@@ -275,6 +302,9 @@ op_stop () {
claim_lock "network-bridge"
+ vlans=$(find_active_vlans "${netdev}")
+ for vlan in $vlans ; do ifdown $vlan ; done
+
transfer_addrs ${bridge} ${pdev}
if ! ifdown ${bridge}; then
get_ip_info ${bridge}
@@ -291,6 +321,8 @@ op_stop () {
ip link set ${pdev} name ${netdev}
do_ifup ${netdev}
+ for vlan in $vlans ; do ifup $vlan ; done
+
release_lock "network-bridge"
}
++++++ build-tapdisk-ioemu.patch ++++++
From f1ebeae7802a5775422004f62630c42e46dcf664 Mon Sep 17 00:00:00 2001From: Kevin Wolf <kwolf@xxxxxxx>
Date: Tue, 10 Mar 2009 16:32:40 +0100
Subject: [PATCH 3/6] ioemu: Build tapdisk-ioemu binary
When changing away from the old ioemu, changes in the Makefiles
resulted in tapdisk-ioemu appearing there, but actually not
being built. This patch re-enables the build of tapdisk-ioemu.
Signed-off-by: Kevin Wolf <kwolf@xxxxxxx>
---
Makefile | 22 +++++++++++++++-------
configure | 2 +-
qemu-tool.c | 2 +-
tapdisk-ioemu.c | 17 -----------------
4 files changed, 17 insertions(+), 26 deletions(-)
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/Makefile
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/Makefile
@@ -46,14 +46,6 @@ $(filter %-user,$(SUBDIR_RULES)): libqem
recurse-all: $(SUBDIR_RULES)
-CPPFLAGS += -I$(XEN_ROOT)/tools/libxc
-CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib
-CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore
-CPPFLAGS += -I$(XEN_ROOT)/tools/include
-
-tapdisk-ioemu: tapdisk-ioemu.c cutils.c block.c block-raw.c block-cow.c
block-qcow.c aes.c block-vmdk.c block-cloop.c block-dmg.c block-bochs.c
block-vpc.c block-vvfat.c block-qcow2.c hw/xen_blktap.c osdep.c
- $(CC) -DQEMU_TOOL $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) $(LDFLAGS)
$(BASE_LDFLAGS) -o $@ $^ -lz $(LIBS)
-
#######################################################################
# BLOCK_OBJS is code used by both qemu system emulation and qemu-img
@@ -72,6 +64,21 @@ endif
BLOCK_OBJS += block-raw-posix.o
endif
+#######################################################################
+# tapdisk-ioemu
+
+hw/tapdisk-xen_blktap.o: hw/xen_blktap.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $<
+tapdisk-ioemu.o: tapdisk-ioemu.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $<
+
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/libxc
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/include
+tapdisk-ioemu: tapdisk-ioemu.o $(BLOCK_OBJS) qemu-tool.o
hw/tapdisk-xen_blktap.o
+ $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS)
+
######################################################################
# libqemu_common.a: Target independent part of system emulation. The
# long term path is to suppress *all* target specific code in case of
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/configure
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/configure
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/configure
@@ -1511,7 +1511,7 @@ bsd)
;;
esac
-tools=
+tools="tapdisk-ioemu"
if test `expr "$target_list" : ".*softmmu.*"` != 0 ; then
tools="qemu-img\$(EXESUF) $tools"
if [ "$linux" = "yes" ] ; then
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/qemu-tool.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/qemu-tool.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/qemu-tool.c
@@ -68,7 +68,7 @@ void qemu_bh_delete(QEMUBH *bh)
qemu_free(bh);
}
-int qemu_set_fd_handler2(int fd,
+int __attribute__((weak)) qemu_set_fd_handler2(int fd,
IOCanRWHandler *fd_read_poll,
IOHandler *fd_read,
IOHandler *fd_write,
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/tapdisk-ioemu.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/tapdisk-ioemu.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/tapdisk-ioemu.c
@@ -12,34 +12,12 @@
extern void qemu_aio_init(void);
extern void qemu_aio_poll(void);
-extern void bdrv_init(void);
-
-extern void *qemu_mallocz(size_t size);
-extern void qemu_free(void *ptr);
extern void *fd_start;
int domid = 0;
FILE* logfile;
-void term_printf(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
-}
-
-void term_print_filename(const char *filename)
-{
- term_printf(filename);
-}
-
-
-typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
-typedef int IOCanRWHandler(void *opaque);
-typedef void IOHandler(void *opaque);
-
typedef struct IOHandlerRecord {
int fd;
IOCanRWHandler *fd_read_poll;
@@ -103,7 +81,6 @@ int main(void)
logfile = stderr;
bdrv_init();
- qemu_aio_init();
init_blktap();
/* Daemonize */
@@ -115,8 +92,6 @@ int main(void)
* completed aio operations.
*/
while (1) {
- qemu_aio_poll();
-
max_fd = -1;
FD_ZERO(&rfds);
for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next)
++++++ capslock_enable.patch ++++++
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/vnc.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
@@ -1342,6 +1342,11 @@ static void do_key_event(VncState *vs, i
}
break;
case 0x3a: /* CapsLock */
+ if(!down){
+ vs->modifiers_state[keycode] ^= 1;
+ kbd_put_keycode(keycode | 0x80);
+ }
+ return;
case 0x45: /* NumLock */
if (down) {
kbd_put_keycode(keycode & 0x7f);
++++++ cdrom-removable.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/server/HalDaemon.py
===================================================================
--- /dev/null
+++ xen-4.1.2-testing/tools/python/xen/xend/server/HalDaemon.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+# -*- mode: python; -*-
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2007 Pat Campbell <plc@xxxxxxxxxx>
+# Copyright (C) 2007 Novell Inc.
+#============================================================================
+
+"""hald (Hardware Abstraction Layer Daemon) watcher for Xen management
+ of removable block device media.
+
+"""
+
+import gobject
+import dbus
+import dbus.glib
+import os
+import types
+import sys
+import signal
+import traceback
+from xen.xend.xenstore.xstransact import xstransact, complete
+from xen.xend.xenstore.xsutil import xshandle
+from xen.xend import PrettyPrint
+from xen.xend import XendLogging
+from xen.xend.XendLogging import log
+
+DEVICE_TYPES = ['vbd', 'tap']
+
+class HalDaemon:
+ """The Hald block device watcher for XEN
+ """
+
+ """Default path to the log file. """
+ logfile_default = "/var/log/xen/hald.log"
+
+ """Default level of information to be logged."""
+ loglevel_default = 'INFO'
+
+
+ def __init__(self):
+
+ XendLogging.init(self.logfile_default, self.loglevel_default)
+ log.debug( "%s", "__init__")
+
+ self.udi_dict = {}
+ self.debug = 0
+ self.dbpath = "/local/domain/0/backend"
+ self.bus = dbus.SystemBus()
+ self.hal_manager_obj = self.bus.get_object('org.freedesktop.Hal',
'/org/freedesktop/Hal/Manager')
+ self.hal_manager = dbus.Interface( self.hal_manager_obj,
'org.freedesktop.Hal.Manager')
+ self.gatherBlockDevices()
+ self.registerDeviceCallbacks()
+
+ def run(self):
+ log.debug( "%s", "In new run" );
+ try:
+ self.mainloop = gobject.MainLoop()
+ self.mainloop.run()
+ except KeyboardInterrupt, ex:
+ log.debug('Keyboard exception handler: %s', ex )
+ self.mainloop.quit()
+ except Exception, ex:
+ log.debug('Generic exception handler: %s', ex )
+ self.mainloop.quit()
+
+ def __del__(self):
+ log.debug( "%s", "In del " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def shutdown(self):
+ log.debug( "%s", "In shutdown now " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def stop(self):
+ log.debug( "%s", "In stop now " );
+ self.unRegisterDeviceCallbacks()
+ self.mainloop.quit()
+
+ def gatherBlockDevices(self):
+
+ # Get all the current devices from hal and save in a dictionary
+ try:
+ device_names = self.hal_manager.GetAllDevices()
+ i = 0;
+ for name in device_names:
+ #log.debug("device name, device=%s",name)
+ dev_obj = self.bus.get_object ('org.freedesktop.Hal', name)
+ dev = dbus.Interface (dev_obj, 'org.freedesktop.Hal.Device')
+ dev_properties =
dev_obj.GetAllProperties(dbus_interface="org.freedesktop.Hal.Device")
+ if dev_properties.has_key('block.device'):
+ dev_str = dev_properties['block.device']
+ dev_major = dev_properties['block.major']
+ dev_minor = dev_properties['block.minor']
+ udi_info = {}
+ udi_info['device'] = dev_str
+ udi_info['major'] = dev_major
+ udi_info['minor'] = dev_minor
+ udi_info['udi'] = name
+ self.udi_dict[i] = udi_info
+ i = i + 1
+ except Exception, ex:
+ print >>sys.stderr, 'Exception gathering block devices:', ex
+ log.warn("Exception gathering block devices (%s)",ex)
+
+ #
+ def registerDeviceCallbacks(self):
+ # setup the callbacks for when the gdl changes
+ self.hal_manager.connect_to_signal('DeviceAdded',
self.device_added_callback)
+ self.hal_manager.connect_to_signal('DeviceRemoved',
self.device_removed_callback)
+
+ #
+ def unRegisterDeviceCallbacks(self):
+ # setup the callbacks for when the gdl changes
+
self.hal_manager.remove_signal_receiver(self.device_added_callback,'DeviceAdded')
+
self.hal_manager.remove_signal_receiver(self.device_removed_callback,'DeviceRemoved')
+
+ #
+ def device_removed_callback(self,udi):
+ log.debug('UDI %s was removed',udi)
+ self.show_dict(self.udi_dict)
+ for key in self.udi_dict:
+ udi_info = self.udi_dict[key]
+ if udi_info['udi'] == udi:
+ device = udi_info['device']
+ major = udi_info['major']
+ minor = udi_info['minor']
+ self.change_xenstore( "remove", device, major, minor)
+
+ # Adds device to dictionary if not already there
+ def device_added_callback(self,udi):
+ log.debug('UDI %s was added', udi)
+ self.show_dict(self.udi_dict)
+ dev_obj = self.bus.get_object ('org.freedesktop.Hal', udi)
+ dev = dbus.Interface (dev_obj, 'org.freedesktop.Hal.Device')
+ device = dev.GetProperty ('block.device')
+ major = dev.GetProperty ('block.major')
+ minor = dev.GetProperty ('block.minor')
+ udi_info = {}
+ udi_info['device'] = device
+ udi_info['major'] = major
+ udi_info['minor'] = minor
+ udi_info['udi'] = udi
+ already = 0
+ cnt = 0;
+ for key in self.udi_dict:
+ info = self.udi_dict[key]
+ if info['udi'] == udi:
+ already = 1
+ break
+ cnt = cnt + 1
+ if already == 0:
+ self.udi_dict[cnt] = udi_info;
+ log.debug('UDI %s was added, device:%s major:%s minor:%s
index:%d\n', udi, device, major, minor, cnt)
+ self.change_xenstore( "add", device, major, minor)
+
+ # Debug helper, shows dictionary contents
+ def show_dict(self,dict=None):
+ if self.debug == 0 :
+ return
+ if dict == None :
+ dict = self.udi_dict
+ for key in dict:
+ log.debug('udi_info %s udi_info:%s',key,dict[key])
+
+ # Set or clear xenstore media-present depending on the action argument
+ # for every vbd that has this block device
+ def change_xenstore(self,action, device, major, minor):
+ for type in DEVICE_TYPES:
+ path = self.dbpath + '/' + type
+ domains = xstransact.List(path)
+ log.debug('domains: %s', domains)
+ for domain in domains: # for each domain
+ devices = xstransact.List( path + '/' + domain)
+ log.debug('devices: %s',devices)
+ for device in devices: # for each vbd device
+ str = device.split('/')
+ vbd_type = None;
+ vbd_physical_device = None
+ vbd_media = None
+ vbd_device_path = path + '/' + domain + '/' + device
+ listing = xstransact.List(vbd_device_path)
+ for entry in listing: # for each entry
+ item = path + '/' + entry
+ value = xstransact.Read( vbd_device_path + '/' + entry)
+ log.debug('%s=%s',item,value)
+ if item.find('media-present') != -1:
+ vbd_media = item;
+ vbd_media_path = item
+ if item.find('physical-device') != -1:
+ vbd_physical_device = value;
+ if item.find('type') != -1:
+ vbd_type = value;
+ if vbd_type is not None and vbd_physical_device is not None
and vbd_media is not None :
+ inode = vbd_physical_device.split(':')
+ imajor = parse_hex(inode[0])
+ iminor = parse_hex(inode[1])
+ log.debug("action:%s major:%s- minor:%s- imajor:%s-
iminor:%s- inode: %s",
+ action,major,minor, imajor, iminor, inode)
+ if int(imajor) == int(major) and int(iminor) ==
int(minor):
+ if action == "add":
+ xs_dict = {'media': "1"}
+ xstransact.Write(vbd_device_path,
'media-present', "1" )
+ log.debug("wrote xenstore media-present 1
path:%s",vbd_media_path)
+ else:
+ xstransact.Write(vbd_device_path,
'media-present', "0" )
+ log.debug("wrote xenstore media 0
path:%s",vbd_media_path)
+
+def mylog( fmt, *args):
+ f = open('/tmp/haldaemon.log', 'a')
+ print >>f, "HalDaemon ", fmt % args
+ f.close()
+
+
+def parse_hex(val):
+ try:
+ if isinstance(val, types.StringTypes):
+ return int(val, 16)
+ else:
+ return val
+ except ValueError:
+ return None
+
+if __name__ == "__main__":
+ watcher = HalDaemon()
+ watcher.run()
+ print 'Falling off end'
+
+
Index: xen-4.1.2-testing/tools/python/xen/xend/server/Hald.py
===================================================================
--- /dev/null
+++ xen-4.1.2-testing/tools/python/xen/xend/server/Hald.py
@@ -0,0 +1,125 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2007 Pat Campbell <plc@xxxxxxxxxx>
+# Copyright (C) 2007 Novell Inc.
+#============================================================================
+
+import errno
+import types
+import os
+import sys
+import time
+import signal
+from traceback import print_exc
+
+from xen.xend.XendLogging import log
+
+class Hald:
+ def __init__(self):
+ self.ready = False
+ self.running = True
+
+ def run(self):
+ """Starts the HalDaemon process
+ """
+ self.ready = True
+ try:
+ myfile = self.find("xen/xend/server/HalDaemon.py")
+ args = (["python", myfile ])
+ self.pid = self.daemonize("python", args )
+ #log.debug( "%s %s pid:%d", "Hald.py starting ", args, self.pid )
+ except:
+ self.pid = -1
+ log.debug("Unable to start HalDaemon process")
+
+ def shutdown(self):
+ """Shutdown the HalDaemon process
+ """
+ log.debug("%s pid:%d", "Hald.shutdown()", self.pid)
+ self.running = False
+ self.ready = False
+ if self.pid != -1:
+ try:
+ os.kill(self.pid, signal.SIGINT)
+ except:
+ print_exc()
+
+ def daemonize(self,prog, args):
+ """Runs a program as a daemon with the list of arguments. Returns the
PID
+ of the daemonized program, or returns 0 on error.
+ Copied from xm/create.py instead of importing to reduce coupling
+ """
+ r, w = os.pipe()
+ pid = os.fork()
+
+ if pid == 0:
+ os.close(r)
+ w = os.fdopen(w, 'w')
+ os.setsid()
+ try:
+ pid2 = os.fork()
+ except:
+ pid2 = None
+ if pid2 == 0:
+ os.chdir("/")
+ env = os.environ.copy()
+ env['PYTHONPATH'] = self.getpythonpath()
+ for fd in range(0, 256):
+ try:
+ os.close(fd)
+ except:
+ pass
+ os.open("/dev/null", os.O_RDWR)
+ os.dup2(0, 1)
+ os.dup2(0, 2)
+ os.execvpe(prog, args, env)
+ os._exit(1)
+ else:
+ w.write(str(pid2 or 0))
+ w.close()
+ os._exit(0)
+ os.close(w)
+ r = os.fdopen(r)
+ daemon_pid = int(r.read())
+ r.close()
+ os.waitpid(pid, 0)
+ #log.debug( "daemon_pid: %d", daemon_pid )
+ return daemon_pid
+
+ def getpythonpath(self):
+ str = " "
+ for p in sys.path:
+ if str != " ":
+ str = str + ":" + p
+ else:
+ if str != "":
+ str = p
+ return str
+
+ def find(self,path, matchFunc=os.path.isfile):
+ """Find a module in the sys.path
+ From web page:
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52224
+ """
+ for dirname in sys.path:
+ candidate = os.path.join(dirname, path)
+ if matchFunc(candidate):
+ return candidate
+ raise Error("Can't find file %s" % path)
+
+if __name__ == "__main__":
+ watcher = Hald()
+ watcher.run()
+ time.sleep(10)
+ watcher.shutdown()
Index: xen-4.1.2-testing/tools/python/xen/xend/server/SrvServer.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/server/SrvServer.py
+++ xen-4.1.2-testing/tools/python/xen/xend/server/SrvServer.py
@@ -57,6 +57,7 @@ from xen.web.SrvDir import SrvDir
from SrvRoot import SrvRoot
from XMLRPCServer import XMLRPCServer
+from xen.xend.server.Hald import Hald
xoptions = XendOptions.instance()
@@ -252,6 +253,8 @@ def _loadConfig(servers, root, reload):
if xoptions.get_xend_unix_xmlrpc_server():
servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False))
+ servers.add(Hald())
+
def create():
root = SrvDir()
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/xenstore.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
@@ -18,6 +18,7 @@
#include "exec-all.h"
#include "sysemu.h"
+#include "console.h"
#include "hw.h"
#include "pci.h"
#include "qemu-timer.h"
@@ -595,6 +596,21 @@ void xenstore_parse_domain_config(int hv
#endif
bs = bdrv_new(dev);
+
+ /* if cdrom physical put a watch on media-present */
+ if (bdrv_get_type_hint(bs) == BDRV_TYPE_CDROM) {
+ if (drv && !strcmp(drv, "phy")) {
+ if (pasprintf(&buf, "%s/media-present", bpath) != -1) {
+ if (bdrv_is_inserted(bs))
+ xs_write(xsh, XBT_NULL, buf, "1", strlen("1"));
+ else {
+ xs_write(xsh, XBT_NULL, buf, "0", strlen("0"));
+ }
+ xs_watch(xsh, buf, "media-present");
+ }
+ }
+ }
+
/* check if it is a cdrom */
if (danger_type && !strcmp(danger_type, "cdrom")) {
bdrv_set_type_hint(bs, BDRV_TYPE_CDROM);
@@ -1028,6 +1044,50 @@ static void xenstore_process_vcpu_set_ev
return;
}
+static void xenstore_process_media_change_event(char **vec)
+{
+ char *media_present = NULL;
+ unsigned int len;
+
+ media_present = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len);
+
+ if (media_present) {
+ BlockDriverState *bs;
+ char *buf = NULL, *cp = NULL, *path = NULL, *dev = NULL;
+
+ path = strdup(vec[XS_WATCH_PATH]);
+ cp = strstr(path, "media-present");
+ if (cp){
+ *(cp-1) = '\0';
+ pasprintf(&buf, "%s/dev", path);
+ dev = xs_read(xsh, XBT_NULL, buf, &len);
+ if (dev) {
+ if ( !strncmp(dev, "xvd", 3)) {
+ memmove(dev, dev+1, strlen(dev));
+ dev[0] = 'h';
+ dev[1] = 'd';
+ }
+ bs = bdrv_find(dev);
+ if (!bs) {
+ term_printf("device not found\n");
+ return;
+ }
+ if (strcmp(media_present, "0") == 0 && bs) {
+ bdrv_close(bs);
+ }
+ else if (strcmp(media_present, "1") == 0 &&
+ bs != NULL && bs->drv == NULL) {
+ if (bdrv_open(bs, bs->filename, 0 /* snapshot */) < 0) {
+ fprintf(logfile, "%s() qemu: could not open cdrom disk
'%s'\n",
+ __func__, bs->filename);
+ }
+ bs->media_changed = 1;
+ }
+ }
+ }
+ }
+}
+
void xenstore_process_event(void *opaque)
{
char **vec, *offset, *bpath = NULL, *buf = NULL, *drv = NULL, *image =
NULL;
@@ -1063,6 +1123,11 @@ void xenstore_process_event(void *opaque
xenstore_watch_callbacks[i].cb(vec[XS_WATCH_TOKEN],
xenstore_watch_callbacks[i].opaque);
+ if (!strcmp(vec[XS_WATCH_TOKEN], "media-present")) {
+ xenstore_process_media_change_event(vec);
+ goto out;
+ }
+
hd_index = drive_name_to_index(vec[XS_WATCH_TOKEN]);
if (hd_index == -1) {
fprintf(stderr,"medium change watch on `%s' -"
++++++ change-vnc-passwd.patch ++++++
Add support of change-vnc-password while vm is running.
Signed-off-by: Chunyan Liu <cyliu@xxxxxxxxxx>
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/vl.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/vl.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/vl.c
@@ -200,7 +200,7 @@ DriveInfo drives_table[MAX_DRIVES+1];
int nb_drives;
enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
int vga_ram_size;
-static DisplayState *display_state;
+DisplayState *display_state;
int nographic;
static int curses;
static int sdl;
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/vnc.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/vnc.c
@@ -2591,6 +2591,7 @@ int vnc_display_password(DisplayState *d
if (password && password[0]) {
if (!(vs->password = qemu_strdup(password)))
return -1;
+ vs->auth = VNC_AUTH_VNC;
}
return 0;
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/xenstore.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
@@ -25,6 +25,7 @@
#include "qemu-timer.h"
#include "qemu-xen.h"
+extern DisplayState *display_state;
struct xs_handle *xsh = NULL;
static char *media_filename[MAX_DRIVES+1];
static QEMUTimer *insert_timer = NULL;
@@ -1006,6 +1007,19 @@ static void xenstore_process_dm_command_
} else if (!strncmp(command, "continue", len)) {
fprintf(logfile, "dm-command: continue after state save\n");
xen_pause_requested = 0;
+ } else if (!strncmp(command, "chgvncpasswd", len)) {
+ fprintf(logfile, "dm-command: change vnc passwd\n");
+ if (pasprintf(&path,
+ "/local/domain/0/backend/vfb/%u/0/vncpasswd", domid) == -1) {
+ fprintf(logfile, "out of memory reading dm command parameter\n");
+ goto out;
+ }
+ par = xs_read(xsh, XBT_NULL, path, &len);
+ if (!par)
+ goto out;
+ if (vnc_display_password(display_state, par) == 0)
+ xenstore_record_dm_state("vncpasswdchged");
+ free(par);
} else if (!strncmp(command, "usb-add", len)) {
fprintf(logfile, "dm-command: usb-add a usb device\n");
if (pasprintf(&path,
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1488,6 +1488,20 @@ class XendDomainInfo:
target = max_target
self.setMemoryTarget(target)
+ def chgvncpasswd(self, passwd):
+ if self._stateGet() != DOM_STATE_HALTED:
+ path = '/local/domain/0/backend/vfb/%u/0/' % self.getDomid()
+ xstransact.Write(path, 'vncpasswd', passwd)
+ self.image.signalDeviceModel("chgvncpasswd", "vncpasswdchged")
+
+ for dev_uuid, (dev_type, dev_info) in self.info['devices'].items():
+ if dev_type == 'vfb':
+ dev_info['vncpasswd'] = passwd
+ dev_info['other_config']['vncpasswd'] = passwd
+ self.info.device_update(dev_uuid, cfg_xenapi = dev_info)
+ break
+ xen.xend.XendDomain.instance().managed_config_save(self)
+
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
@param target: In MiB.
Index: xen-4.1.2-testing/tools/python/xen/xend/server/XMLRPCServer.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/server/XMLRPCServer.py
+++ xen-4.1.2-testing/tools/python/xen/xend/server/XMLRPCServer.py
@@ -95,7 +95,7 @@ methods = ['device_create', 'device_conf
'destroyDevice','getDeviceSxprs',
'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown',
'send_sysrq', 'getVCPUInfo', 'waitForDevices',
- 'getRestartCount', 'getBlockDeviceClass']
+ 'getRestartCount', 'getBlockDeviceClass', 'chgvncpasswd']
exclude = ['domain_create', 'domain_restore']
Index: xen-4.1.2-testing/tools/python/xen/xm/main.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xm/main.py
+++ xen-4.1.2-testing/tools/python/xen/xm/main.py
@@ -21,6 +21,7 @@
"""Grand unified management application for Xen.
"""
+import getpass
import atexit
import cmd
import os
@@ -289,6 +290,9 @@ SUBCOMMAND_HELP = {
'getenforce' : ('', 'Returns the current enforcing mode for the Flask
XSM module (Enforcing,Permissive)'),
'setenforce' : ('[ (Enforcing|1) | (Permissive|0) ]',
'Modifies the current enforcing mode for the Flask XSM
module'),
+ #change vnc password
+ 'change-vnc-passwd' : ('<Domain>',\
+ 'Change vnc password'),
}
SUBCOMMAND_OPTIONS = {
@@ -421,6 +425,7 @@ common_commands = [
"usb-del",
"domstate",
"vcpu-set",
+ "change-vnc-passwd",
]
domain_commands = [
@@ -462,6 +467,7 @@ domain_commands = [
"vcpu-list",
"vcpu-pin",
"vcpu-set",
+ "change-vnc-passwd",
]
host_commands = [
@@ -3881,6 +3887,10 @@ def xm_cpupool_migrate(args):
else:
server.xend.cpu_pool.migrate(domname, poolname)
+def xm_chgvncpasswd(args):
+ arg_check(args, "change-vnc-passwd", 1)
+ pwd = getpass.getpass("Enter new password: ")
+ server.xend.domain.chgvncpasswd(args[0], pwd)
commands = {
"shell": xm_shell,
@@ -3993,6 +4003,8 @@ commands = {
"usb-del": xm_usb_del,
#domstate
"domstate": xm_domstate,
+ #change vnc password:
+ "change-vnc-passwd": xm_chgvncpasswd,
}
## The commands supported by a separate argument parser in xend.xm.
++++++ change_home_server.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3132,6 +3132,11 @@ class XendDomainInfo:
self._cleanup_phantom_devs(paths)
self._cleanupVm()
+ if "change_home_server" in self.info:
+ chs = self.info["change_home_server"]
+ if (type(chs) is str and chs == "False") or \
+ (type(chs) is bool and chs is False):
+ self.setChangeHomeServer(None)
if ("transient" in self.info["other_config"] and \
bool(self.info["other_config"]["transient"])) or \
("change_home_server" in self.info and \
++++++ check_device_status.patch ++++++
Improve check_device_status to handle HA cases
In HA environment, sometimes xenstore status has changed but ev.wait() cannot
get the signal, it will wait until timeout, thus incorrect device status is
returned. To fix this problem, we do not depend on ev.wait() result, but read
xenstore directly to get correct device status.
Index: xen-4.1.2-testing/tools/python/xen/xend/server/DevController.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/server/DevController.py
+++ xen-4.1.2-testing/tools/python/xen/xend/server/DevController.py
@@ -149,7 +149,10 @@ class DevController:
(status, err) = self.waitForBackend(devid)
if status == Timeout:
- self.destroyDevice(devid, False)
+ #Clean timeout backend resource
+ dev = self.convertToDeviceNumber(devid)
+ self.writeBackend(dev, HOTPLUG_STATUS_NODE, HOTPLUG_STATUS_ERROR)
+ self.destroyDevice(devid, True)
raise VmError("Device %s (%s) could not be connected. "
"Hotplug scripts not working." %
(devid, self.deviceClass))
@@ -554,7 +557,17 @@ class DevController:
xswatch(statusPath, hotplugStatusCallback, ev, result)
- ev.wait(DEVICE_CREATE_TIMEOUT)
+ for i in range(1, 50):
+ ev.wait(DEVICE_CREATE_TIMEOUT/50)
+ status = xstransact.Read(statusPath)
+ if status is not None:
+ if status == HOTPLUG_STATUS_ERROR:
+ result['status'] = Error
+ elif status == HOTPLUG_STATUS_BUSY:
+ result['status'] = Busy
+ else:
+ result['status'] = Connected
+ break
err = xstransact.Read(backpath, HOTPLUG_ERROR_NODE)
@@ -571,7 +584,12 @@ class DevController:
xswatch(statusPath, deviceDestroyCallback, ev, result)
- ev.wait(DEVICE_DESTROY_TIMEOUT)
+ for i in range(1, 50):
+ ev.wait(DEVICE_DESTROY_TIMEOUT/50)
+ status = xstransact.Read(statusPath)
+ if status is None:
+ result['status'] = Disconnected
+ break
return result['status']
++++++ checkpoint-rename.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendCheckpoint.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendCheckpoint.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendCheckpoint.py
@@ -172,7 +172,7 @@ def save(fd, dominfo, network, live, dst
dominfo.destroy()
dominfo.testDeviceComplete()
try:
- dominfo.setName(domain_name, False)
+ dominfo.setName(domain_name)
except VmError:
# Ignore this. The name conflict (hopefully) arises because we
# are doing localhost migration; if we are doing a suspend of a
++++++ del_usb_xend_entry.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1296,8 +1296,15 @@ class XendDomainInfo:
frontpath =
self.getDeviceController(deviceClass).frontendPath(dev)
backpath = xstransact.Read(frontpath, "backend")
thread.start_new_thread(self.getDeviceController(deviceClass).finishDeviceCleanup,
(backpath, path))
-
- rc = self.getDeviceController(deviceClass).destroyDevice(devid,
force)
+ if deviceClass =='vusb':
+ dev =
self.getDeviceController(deviceClass).convertToDeviceNumber(devid)
+ state = self.getDeviceController(deviceClass).readBackend(dev,
'state')
+ if state == '1':
+ rc =
self.getDeviceController(deviceClass).destroyDevice(devid, True)
+ else:
+ rc =
self.getDeviceController(deviceClass).destroyDevice(devid, force)
+ else:
+ rc =
self.getDeviceController(deviceClass).destroyDevice(devid, force)
if not force and rm_cfg:
# The backend path, other than the device itself,
# has to be passed because its accompanied frontend
++++++ disable-xl-when-using-xend.patch ++++++
Print a warning and exit xl if xend is running. It is not
recommened to use libxenlight in conjunction with legacy xend
toolstack.
xl could be useful even when xend is running, e.g. to debug
xend itself, so add a '-f' option to override the exit.
Index: xen-4.1.2-testing/tools/libxl/xl.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxl/xl.c
+++ xen-4.1.2-testing/tools/libxl/xl.c
@@ -88,12 +88,16 @@ int main(int argc, char **argv)
char *config_file;
void *config_data = 0;
int config_len = 0;
+ int force = 0;
- while ((opt = getopt(argc, argv, "+v")) >= 0) {
+ while ((opt = getopt(argc, argv, "+vf")) >= 0) {
switch (opt) {
case 'v':
if (minmsglevel > 0) minmsglevel--;
break;
+ case 'f':
+ force = 1;
+ break;
default:
fprintf(stderr, "unknown global option\n");
exit(2);
@@ -107,6 +111,18 @@ int main(int argc, char **argv)
exit(1);
}
opterr = 0;
+ /*
+ * On SUSE, if xend is running (and user isn't asking for help),
+ * print a warning and exit unless forced.
+ */
+ if ((system("/usr/sbin/xend status") == 0) && strcmp(cmd, "help")) {
+ if (force == 0) {
+ fprintf(stderr, "WARNING: xend is running! It is not recommended "
+ "using libxenlight in\nconjunction with the legacy xend "
+ "toolstack. Use -f (force) to override\n");
+ exit(1);
+ }
+ }
logger = xtl_createlogger_stdiostream(stderr, minmsglevel, 0);
if (!logger) exit(1);
Index: xen-4.1.2-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.1.2-testing/tools/libxl/xl_cmdimpl.c
@@ -1725,7 +1725,7 @@ void help(const char *command)
struct cmd_spec *cmd;
if (!command || !strcmp(command, "help")) {
- printf("Usage xl [-v] <subcommand> [args]\n\n");
+ printf("Usage xl [-v] [-f] <subcommand> [args]\n\n");
printf("xl full list of subcommands:\n\n");
for (i = 0; i < cmdtable_len; i++)
printf(" %-20s%s\n",
@@ -1733,7 +1733,7 @@ void help(const char *command)
} else {
cmd = cmdtable_lookup(command);
if (cmd) {
- printf("Usage: xl [-v] %s %s\n\n%s.\n\n",
+ printf("Usage: xl [-v] [-f] %s %s\n\n%s.\n\n",
cmd->cmd_name,
cmd->cmd_usage,
cmd->cmd_desc);
++++++ disable_emulated_device.diff ++++++
Index:
xen-4.1.2-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
===================================================================
---
xen-4.1.2-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ xen-4.1.2-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -426,6 +426,11 @@ static int __devinit platform_pci_init(s
platform_mmio = mmio_addr;
platform_mmiolen = mmio_len;
+ /*
+ * Disconnect the emulated devices.
+ */
+ outl(1, (ioaddr + 4));
+
ret = init_hypercall_stubs();
if (ret < 0)
goto out;
++++++ dom-print.patch ++++++
Index: xen-4.1.2-testing/xen/arch/x86/domain.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/domain.c
+++ xen-4.1.2-testing/xen/arch/x86/domain.c
@@ -150,15 +150,30 @@ void dump_pageframe_info(struct domain *
printk("Memory pages belonging to domain %u:\n", d->domain_id);
- if ( d->tot_pages >= 10 )
+ if ( d->tot_pages >= 10 && d->is_dying < DOMDYING_dead )
{
printk(" DomPage list too long to display\n");
}
else
{
+ unsigned long total[PGT_type_mask
+ / (PGT_type_mask & -PGT_type_mask) + 1] = {};
+
spin_lock(&d->page_alloc_lock);
page_list_for_each ( page, &d->page_list )
{
+ unsigned int index = (page->u.inuse.type_info & PGT_type_mask)
+ / (PGT_type_mask & -PGT_type_mask);
+
+ if ( ++total[index] > 16 )
+ {
+ switch ( page->u.inuse.type_info & PGT_type_mask )
+ {
+ case PGT_none:
+ case PGT_writable_page:
+ continue;
+ }
+ }
printk(" DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
++++++ domUloader.py ++++++
#!/usr/bin/env python
# domUloader.py
"""Loader for kernel and (optional) ramdisk from domU filesystem
Given a physical disk (or disk image) for a domU and the path of a kernel and
optional ramdisk, copies the kernel and ramdisk from the domU disk to a
temporary location in dom0.
The --entry parameter specifies the location of the kernel (and optional
ramdisk) within the domU filesystem. dev is the disk as seen by domU.
Filenames are relative to that filesystem.
The disk is passed as the last parameter. It must be a block device or raw
disk image. More complex disk images (QCOW, VMDK, etc) must already be
configured via blktap and presented as a block device.
The script writes an sxpr specifying the locations of the copied kernel and
ramdisk into the file specified by --output (default is stdout).
Limitations:
- It is assumed both kernel and ramdisk are on the same filesystem.
- domUs might use LVM; the script currently does not have support for setting
up LVM mappings for domUs; it's not trivial and we might risk namespace
conflicts. If you want to use LVM inside domUs, set up a small non-LVM boot
partition and specify it in bootentry.
The script uses kpartx (multipath-tools) to create mappings for devices that
are exported as whole disk devices that are partitioned.
(c) 01/2006 Novell Inc
License: GNU GPL
Author: Kurt Garloff <garloff@xxxxxxx>
"""
import os, sys, getopt
from stat import *
from xen.xend import sxp
import tempfile
import time
# Global options
quiet = False
verbose = False
dryrun = False
tmpdir = '/var/lib/xen/tmp'
in_args = ''
# kpartx, left to its own devices, does not consistently pick the
# same partition separator. Explicitly specify it.
kpartx_args = '-p -part'
# Helper functions
def kpartx_has_opt(opt):
""" Return True if kpartx supports option opt, otherwise False"""
have_opt = True
kpartx_cmd = 'kpartx -' + opt + ' 2>&1'
p = os.popen(kpartx_cmd)
for line in p.readlines():
if line.find('invalid option') >= 0:
have_opt = False
break
p.close()
return have_opt
def error(s):
print >> sys.stderr, "domUloader error: %s" % s
def verbose_print(s):
if verbose:
print >> sys.stderr, "domUloader: %s" % s
def traildigits(strg):
"""Return the trailing digits, used to split the partition number off"""
idx = len(strg)-1
while strg[idx].isdigit():
if len == 0:
return strg
idx -= 1
return strg[idx+1:]
def getWholedisk(part):
while len(part) and part[len(part)-1].isdigit():
part = part[:-1]
return part
#def isWholedisk(domUname):
# """Determines whether dev is a wholedisk dev"""
# return not domUname[-1:].isdigit()
# If available, add '-f' option (bnc#613584)
if kpartx_has_opt('f'):
kpartx_args += ' -f'
class Wholedisk:
"Class representing a whole disk that may have partitions"
def __init__(self, vdev, pdev):
"c'tor: set up"
# Initialize object; will not raise:
self.ldev = None
self.vdev = vdev
self.pdev = pdev
self.mapped = 0
self.partitions = []
self.pcount = 0
self.lvm = False
# Finish initialization; may raise:
self.is_blk = (S_ISBLK(os.stat(pdev)[ST_MODE]))
self.pcount = self.scanpartitions()
def physdev(self):
"""Gets the physical device used to access the device from dom0"""
if self.ldev:
return self.ldev
return self.pdev
def findPart(self, vdev):
"Find device dev in list of partitions"
if len(vdev) > 5 and vdev[:5] == "/dev/":
vdev = vdev[5:]
for part in self.partitions:
if vdev == part.vdev:
return part
if len(self.partitions):
return self.partitions[0]
return None
def loopsetup(self):
"""Sets up the loop mapping for a disk image.
Will raise if no loopbacks are available.
"""
if not self.is_blk and not self.ldev:
# Loops through all loopback devices, attempting to
# find a free one to set up. Don't scan for free and
# then try to set it up as a separate step - too racy!
i = 0
while True:
ldev = '/dev/loop%i' % (i)
if not os.path.exists(ldev):
break
i += 1
fd = os.popen("losetup %s '%s' 2> /dev/null" % (ldev,
self.pdev))
if not fd.close():
verbose_print("losetup %s '%s'" % (ldev, self.pdev))
self.ldev = ldev
break
if not self.ldev:
raise RuntimeError("No free loop device found")
def loopclean(self):
"""Delete the loop mapping.
Will never raise.
"""
if self.ldev:
verbose_print("losetup -d %s" % self.ldev)
# Even seemingly innocent queries like "losetup /dev/loop0"
# can temporarily block the loopback and cause transient
# failures deleting the loopback, hence the retry logic.
retries = 10
while retries:
fd = os.popen("losetup -d %s" % self.ldev)
if not fd.close():
self.ldev = None
break
else:
# Mappings may not have been deleted due to race
# between udev and dm - see bnc#379032. Causes
# loop devices to leak. Call kpartx -d again
os.system("kpartx %s -d '%s'" % (kpartx_args,
self.physdev()))
time.sleep(0.1)
retries -= 1
def scanlvmpartitions(self):
pcount = 0
verbose_print("vgchange -ay '%s'" % (self.vdev))
ret = os.system("vgchange -ay '%s' > /dev/null 2>&1" % (self.vdev)) >> 8
if not ret:
self.lvm = True
verbose_print("lvscan | grep '/dev/%s'" % (self.vdev))
fd = os.popen("lvscan | grep '/dev/%s'" % (self.vdev))
for line in fd.readlines():
line = line.strip()
(t1, lvname, t2) = line.split('\'')
pname = lvname[lvname.rfind('/')+1:]
pname = pname.strip()
pname = "/dev/mapper/" + self.vdev + "-" + pname
verbose_print("Found partition: vdev %s, pdev %s" % (self.vdev,
pname))
self.partitions.append(Partition(self, self.vdev, pname))
pcount += 1
fd.close()
verbose_print("vgchange -an '%s'" % (self.vdev))
os.system("vgchange -an '%s' > /dev/null 2>&1" % (self.vdev))
else:
verbose_print("vgchange -ay %s ... failed: -%d" % (self.vdev, ret))
return pcount
def scanpartitions(self):
"""Scan device for partitions (kpartx -l) and set up data structures,
Returns number of partitions found."""
self.loopsetup()
# TODO: We could use fdisk -l instead and look at the type of
# partitions; this way we could also detect LVM and support it.
verbose_print("kpartx %s -l '%s'" % (kpartx_args, self.physdev()))
fd = os.popen("kpartx %s -l '%s'" % (kpartx_args, self.physdev()))
pcount = 0
for line in fd.readlines():
line = line.strip()
verbose_print("kpartx -l: %s" % (line,))
(pname, params) = line.split(' : ')
pname = pname.strip()
pno = int(traildigits(pname))
#if pname.rfind('/') != -1:
# pname = pname[pname.rfind('/')+1:]
#pname = self.pdev[:self.pdev.rfind('/')] + '/' + pname
pname = "/dev/mapper/" + pname
verbose_print("Found partition: vdev %s, pdev %s" % ('%s%i' %
(self.vdev, pno), pname))
self.partitions.append(Partition(self, '%s%i' % (self.vdev, pno),
pname))
pcount += 1
fd.close()
# Try lvm
if not pcount:
pcount = self.scanlvmpartitions()
# Add self to partition table
if not pcount:
if self.ldev:
ref = self
else:
ref = None
self.partitions.append(Partition(ref, self.vdev, self.pdev))
return pcount
def activatepartitions(self):
"Set up loop mapping and device-mapper mappings"
verbose_print("activatepartitions")
if not self.mapped:
self.loopsetup()
if self.pcount:
verbose_print("kpartx %s -a '%s'" % (kpartx_args,
self.physdev()))
fd = os.popen("kpartx %s -a '%s'" % (kpartx_args,
self.physdev()))
fd.close()
if self.pcount and self.lvm:
verbose_print("vgchange -ay '%s'" % (self.vdev))
ret = os.system("vgchange -ay '%s' > /dev/null 2>&1" %
(self.vdev)) >> 8
if not ret:
verbose_print("lvchange -ay '%s'" % (self.vdev))
os.system("lvchange -ay '%s' > /dev/null 2>&1" %
(self.vdev))
self.mapped += 1
def partitionsdeactivated(self):
"Return True if partition mappings have been removed, False otherwise"
for part in self.partitions:
if os.access(part.pdev, os.F_OK):
return False
return True
def deactivatepartitions(self):
"""Remove device-mapper mappings and loop mapping.
Will never raise.
"""
verbose_print("deactivatepartitions")
if not self.mapped:
return
self.mapped -= 1
if not self.mapped:
if self.pcount:
retries = 10
while retries and not self.partitionsdeactivated():
verbose_print("kpartx %s -d '%s'" % (kpartx_args,
self.physdev()))
os.system("kpartx %s -d '%s'" % (kpartx_args,
self.physdev()))
time.sleep(0.1)
retries -= 1
if retries == 0:
error("unable to remove partition mappings with kpartx -d")
if self.pcount and self.lvm:
verbose_print("lvchange -an '%s'" % (self.vdev))
ret = os.system("lvchange -an '%s' > /dev/null 2>&1" %
(self.vdev)) >> 8
if ret:
time.sleep(0.3)
os.system("lvchange -an '/dev/%s' > /dev/null 2>&1" %
(self.vdev))
verbose_print("vgchange -an '%s'" % (self.vdev))
ret = os.system("vgchange -an '%s' > /dev/null 2>&1" %
(self.vdev)) >> 8
if ret:
time.sleep(0.3)
os.system("vgchange -an '%s' > /dev/null 2>&1" %
(self.vdev))
self.loopclean()
def __del__(self):
"d'tor: clean up"
self.deactivatepartitions()
self.loopclean()
def __repr__(self):
"string representation for debugging"
strg = "[" + self.vdev + "," + self.pdev + ","
if self.ldev:
strg += self.ldev
strg += "," + str(self.pcount) + ",mapped %ix]" % self.mapped
return strg
class Partition:
"""Class representing a domU filesystem (partition) that can be
mounted in dom0"""
def __init__(self, whole = None, vdev = None, pdev = None):
"c'tor: setup"
self.wholedisk = whole
self.vdev = vdev
self.pdev = pdev
self.mountpoint = None
def __del__(self):
"d'tor: cleanup"
if self.mountpoint:
self.umount()
# Not needed: Refcounting will take care of it.
#if self.wholedisk:
# self.wholedisk.deactivatepartitions()
def __repr__(self):
"string representation for debugging"
strg = "[" + self.vdev + "," + self.pdev + ","
if self.mountpoint:
strg += "mounted on " + self.mountpoint + ","
else:
strg += "not mounted,"
if self.wholedisk:
return strg + self.wholedisk.__repr__() + "]"
else:
return strg + "]"
def mount(self, fstype = None, options = "ro"):
"mount filesystem, sets self.mountpoint"
if self.mountpoint:
return
if self.wholedisk:
self.wholedisk.activatepartitions()
mtpt = tempfile.mkdtemp(prefix = "%s." % self.vdev, dir = tmpdir)
mopts = ""
if fstype:
mopts += " -t %s" % fstype
if options:
mopts += " -o %s" % options
verbose_print("mount %s '%s' %s" % (mopts, self.pdev, mtpt))
fd = os.popen("mount %s '%s' %s" % (mopts, self.pdev, mtpt))
err = fd.close()
if err:
try:
os.rmdir(mtpt)
except:
pass
raise RuntimeError("Error %i from mount %s '%s' on %s" % \
(err, mopts, self.pdev, mtpt))
self.mountpoint = mtpt
def umount(self):
"""umount filesystem at self.mountpoint"""
if not self.mountpoint:
return
verbose_print("umount %s" % self.mountpoint)
fd = os.popen("umount %s" % self.mountpoint)
err = fd.close()
try:
os.rmdir(self.mountpoint)
except:
pass
if err:
error("Error %i from umount %s" % (err, self.mountpoint))
else:
self.mountpoint = None
if self.wholedisk:
self.wholedisk.deactivatepartitions()
def parseEntry(entry):
"disects bootentry and returns vdev, kernel, ramdisk"
def bad():
raise RuntimeError, "Malformed --entry"
fsspl = entry.split(':')
if len(fsspl) != 2:
bad()
vdev = fsspl[0]
entry = fsspl[1]
enspl = entry.split(',')
if len(enspl) not in (1, 2):
bad()
# Prepend '/' if missing
kernel = enspl[0]
if kernel == '':
bad()
if kernel[0] != '/':
kernel = '/' + kernel
ramdisk = None
if len(enspl) > 1:
ramdisk = enspl[1]
if ramdisk != '' and ramdisk[0] != '/':
ramdisk = '/' + ramdisk
return vdev, kernel, ramdisk
def copyFile(src, dst):
"Wrapper for shutil.filecopy"
import shutil
verbose_print("cp %s %s" % (src, dst))
stat = os.stat(src)
if stat.st_size > 16*1024*1024:
raise RuntimeError("Too large file %s (%s larger than 16MB)" \
% (src, stat.st_size))
try:
shutil.copyfile(src, dst)
except:
os.unlink(dst)
raise()
def copyKernelAndRamdisk(disk, vdev, kernel, ramdisk):
"""Finds vdev in list of partitions, mounts the partition, copies
kernel [and ramdisk] off to dom0 files, umounts the parition again,
and returns sxpr pointing to these copies."""
verbose_print("copyKernelAndRamdisk(%s, %s, %s, %s)" % (disk, vdev, kernel,
ramdisk))
if dryrun:
return "linux (kernel kernel.dummy) (ramdisk ramdisk.dummy)"
part = disk.findPart(vdev)
if not part:
raise RuntimeError("Partition '%s' does not exist" % vdev)
part.mount()
try:
(fd, knm) = tempfile.mkstemp(prefix = "kernel.", dir = tmpdir)
os.close(fd)
copyFile(part.mountpoint + kernel, knm)
except:
os.unlink(knm)
part.umount()
raise
if not quiet:
print "Copy kernel %s from %s to %s for booting" % (kernel, vdev, knm)
sxpr = "linux (kernel %s)" % knm
if ramdisk:
try:
(fd, inm) = tempfile.mkstemp(prefix = "ramdisk.", dir = tmpdir)
os.close(fd)
copyFile(part.mountpoint + ramdisk, inm)
except:
os.unlink(knm)
os.unlink(inm)
part.umount()
raise
sxpr += "(ramdisk %s)" % inm
part.umount()
return sxpr
def main(argv):
"Main routine: Parses options etc."
global quiet, dryrun, verbose, tmpdir, in_args
def usage():
"Help output (usage info)"
global verbose, quiet, dryrun
print >> sys.stderr, "domUloader usage: domUloader [--output=fd]
[--quiet] [--dryrun] [--verbose]\n" +\
"[--args] [--help] --entry=dev:kernel[,ramdisk]
physdisk [virtdisk]\n" +\
"\n" +\
"dev format: hd[a-p][0-9]*, xvd[a-p][0-9]*,
LVM-vgname-lvname\n"
print >> sys.stderr, __doc__
try:
(optlist, args) = getopt.gnu_getopt(argv, 'qvh', \
('entry=', 'output=', 'tmpdir=', 'args=', 'kernel=', 'ramdisk=',
'help', 'quiet', 'dryrun', 'verbose'))
except:
usage()
sys.exit(1)
entry = None
output = None
pdisk = None
vdisk = None
for (opt, oarg) in optlist:
if opt in ('-h', '--help'):
usage()
sys.exit(1)
elif opt in ('-q', '--quiet'):
quiet = True
elif opt in ('-n', '--dryrun'):
dryrun = True
elif opt in ('-v', '--verbose'):
verbose = True
elif opt == '--output':
output = oarg
elif opt == '--entry':
entry = oarg
elif opt == '--tmpdir':
tmpdir = oarg
elif opt == '--args':
in_args = oarg
verbose_print(str(argv))
if args:
if len(args) == 2:
pdisk = args[1]
elif len(args) == 3:
pdisk = args[1]
vdisk = args[2]
if not entry or not pdisk:
usage()
sys.exit(1)
if output is None or output == "-":
fd = sys.stdout.fileno()
else:
fd = os.open(output, os.O_WRONLY)
if not os.access(tmpdir, os.X_OK):
os.mkdir(tmpdir)
os.chmod(tmpdir, 0750)
vdev, kernel, ramdisk = parseEntry(entry)
if vdev[:vdev.find('-')] == "LVM":
vdev = vdev.split('-')[1]
if not vdisk:
vdisk = getWholedisk(vdev)
verbose_print("vdisk not specified; guessing '%s' based on '%s'" %
(vdisk, vdev))
if not vdev.startswith(vdisk):
error("Virtual disk '%s' does not match entry '%s'" % (vdisk, entry))
sys.exit(1)
disk = Wholedisk(vdisk, pdisk)
r = 0
try:
sxpr = copyKernelAndRamdisk(disk, vdev, kernel, ramdisk)
if in_args:
sxpr += "(args '%s')" % in_args
os.write(fd, sxpr)
except Exception, e:
error(str(e))
r = 1
for part in disk.partitions:
part.wholedisk = None
del disk
return r
# Call main if called (and not imported)
if __name__ == "__main__":
r = 1
try:
r = main(sys.argv)
except Exception, e:
error(str(e))
sys.exit(r)
++++++ domu-usb-controller.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendConfig.py
@@ -1874,7 +1874,14 @@ class XendConfig(dict):
ports = sxp.child(dev_sxp, 'port')
for port in ports[1:]:
try:
- num, bus = port
+ # When ['port' ['1','']] is saved into sxp file, it will
become (port (1 ))
+ # If using this sxp file, here variable "port" will be port=1,
+ # we should process it, otherwise, it will report error.
+ if len(port) == 1:
+ num = port[0]
+ bus = ""
+ else:
+ num, bus = port
dev_config['port-%i' % int(num)] = str(bus)
except TypeError:
pass
++++++ etc_pam.d_xen-api ++++++
#%PAM-1.0
auth required pam_listfile.so onerr=fail item=user \
sense=allow file=/etc/xen/xenapiusers
auth include common-auth
account include common-account
password include common-password
session include common-session
++++++ hibernate.patch ++++++
Index: xen-4.1.2-testing/tools/firmware/hvmloader/acpi/dsdt.asl
===================================================================
--- xen-4.1.2-testing.orig/tools/firmware/hvmloader/acpi/dsdt.asl
+++ xen-4.1.2-testing/tools/firmware/hvmloader/acpi/dsdt.asl
@@ -30,21 +30,9 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2,
/*
* S3 (suspend-to-ram), S4 (suspend-to-disc) and S5 (power-off) type codes:
* must match piix4 emulation.
+ * Turn off support for s3 and s4 sleep states to deal with SVVP tests.
+ * This is what MSFT does on HyperV.
*/
- Name (\_S3, Package (0x04)
- {
- 0x01, /* PM1a_CNT.SLP_TYP */
- 0x01, /* PM1b_CNT.SLP_TYP */
- 0x0, /* reserved */
- 0x0 /* reserved */
- })
- Name (\_S4, Package (0x04)
- {
- 0x00, /* PM1a_CNT.SLP_TYP */
- 0x00, /* PM1b_CNT.SLP_TYP */
- 0x00, /* reserved */
- 0x00 /* reserved */
- })
Name (\_S5, Package (0x04)
{
0x00, /* PM1a_CNT.SLP_TYP */
++++++ hotplug.losetup.patch ++++++
Improve busy loop device detection after changeset 22773:02c0af2bf280
The intention is not to find the file to be mounted in the losetup -a
output. What matters are existing mounted files with the same dev:inode
as the new file. So the fix is to apply variable expansion which
happens only without double quotes. Otherwise $dev will contain
newlines for hardlinked files, as mentioned in the commit message from
the changeset above.
losetup -a does also truncate long filenames to 62 chars due to ioctl
limitations. This part is fixed with 2.6.37 where the filename can be
obtained from sysfs. As a result very long filenames will be missed.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
tools/hotplug/Linux/block | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/hotplug/Linux/block
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/block
+++ xen-4.1.2-testing/tools/hotplug/Linux/block
@@ -280,8 +280,8 @@ mount it read-write in a guest domain."
fi
shared_list=$(losetup -a |
- sed -n -e
"s@^\([^:]\+\)\(:[[:blank:]]\[${dev}\]:${inode}[[:blank:]](${file})\)@\1@p" )
- for dev in "$shared_list"
+ sed -n -e
"s@^\([^:]\+\)\(:[[:blank:]]\[0*${dev}\]:${inode}[[:blank:]](.*)\)@\1@p" )
+ for dev in $shared_list
do
if [ -n "$dev" ]
then
++++++ hv_extid_compatibility.patch ++++++
Index: xen-4.1.2-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendConfig.py
@@ -159,6 +159,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
'nographic': int,
'nomigrate': int,
'pae' : int,
+ 'extid': int,
'rtc_timeoffset': int,
'parallel': str,
'serial': str,
@@ -517,6 +518,8 @@ class XendConfig(dict):
if self.is_hvm():
if 'timer_mode' not in self['platform']:
self['platform']['timer_mode'] = 1
+ if 'extid' in self['platform'] and int(self['platform']['extid'])
== 1:
+ self['platform']['viridian'] = 1
if 'viridian' not in self['platform']:
self['platform']['viridian'] = 0
if 'rtc_timeoffset' not in self['platform']:
Index: xen-4.1.2-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.1.2-testing/tools/python/xen/xend/image.py
@@ -839,6 +839,7 @@ class HVMImageHandler(ImageHandler):
self.apic = int(vmConfig['platform'].get('apic', 0))
self.acpi = int(vmConfig['platform'].get('acpi', 0))
+ self.extid = int(vmConfig['platform'].get('extid', 0))
self.guest_os_type = vmConfig['platform'].get('guest_os_type')
self.memory_sharing = int(vmConfig['memory_sharing'])
try:
Index: xen-4.1.2-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xm/create.py
+++ xen-4.1.2-testing/tools/python/xen/xm/create.py
@@ -242,6 +242,10 @@ gopts.var('viridian', val='VIRIDIAN',
use="""Expose Viridian interface to x86 HVM guest?
(Default is 0).""")
+gopts.var('extid', val='EXTID',
+ fn=set_int, default=0,
+ use="Specify extention ID for a HVM domain.")
+
gopts.var('acpi', val='ACPI',
fn=set_int, default=1,
use="Disable or enable ACPI of HVM domain.")
@@ -1106,7 +1110,7 @@ def configure_hvm(config_image, vals):
'timer_mode',
'usb', 'usbdevice',
'vcpus', 'vnc', 'vncconsole', 'vncdisplay', 'vnclisten',
- 'vncunused', 'viridian', 'vpt_align',
+ 'vncunused', 'vpt_align',
'watchdog', 'watchdog_action',
'xauthority', 'xen_extended_power_mgmt', 'xen_platform_pci',
'memory_sharing' ]
@@ -1116,6 +1120,10 @@ def configure_hvm(config_image, vals):
config_image.append([a, vals.__dict__[a]])
if vals.vncpasswd is not None:
config_image.append(['vncpasswd', vals.vncpasswd])
+ if vals.extid and vals.extid == 1:
+ config_image.append(['viridian', vals.extid])
+ elif vals.viridian:
+ config_image.append(['viridian', vals.viridian])
def make_config(vals):
++++++ init.pciback ++++++
#!/bin/bash
#
# Copyright (c) 2001 SuSE GmbH Nuernberg, Germany. All rights reserved.
#
# /etc/init.d/pciback
#
### BEGIN INIT INFO
# Provides: pciback
# Required-Start: $syslog $network
# Should-Start: $null
# Required-Stop: $syslog $network
# Should-Stop: $null
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Description: bind PCI devices to pciback
### END INIT INFO
. /etc/rc.status
. /etc/sysconfig/pciback
rc_reset
load_pciback() {
if ! lsmod | grep -qi "pciback"
then
echo "Loading pciback ..."
modprobe pciback
fi
}
unload_pciback() {
if lsmod | grep -qi "pciback"
then
echo "Unloading pciback ..."
modprobe -r pciback
fi
}
bind_dev_to_pciback() {
for DEVICE in ${XEN_PCI_HIDE_LIST}
do
local DRV=`echo ${DEVICE} | /usr/bin/cut -d "," -f 1`
local PCIID=`echo ${DEVICE} | /usr/bin/cut -d "," -f 2`
if ! ls /sys/bus/pci/drivers/pciback/${PCIID} > /dev/null 2>&1
then
echo "Binding ${PCIID} ..."
if ls /sys/bus/pci/drivers/${DRV}/${PCIID} > /dev/null 2>&1
then
echo -n ${PCIID} > /sys/bus/pci/drivers/${DRV}/unbind
fi
echo -n ${PCIID} > /sys/bus/pci/drivers/pciback/new_slot
echo -n ${PCIID} > /sys/bus/pci/drivers/pciback/bind
fi
done
}
unbind_dev_from_pciback() {
for DEVICE in ${XEN_PCI_HIDE_LIST}
do
local DRV=`echo ${DEVICE} | /usr/bin/cut -d "," -f 1`
local PCIID=`echo ${DEVICE} | /usr/bin/cut -d "," -f 2`
if ls /sys/bus/pci/drivers/pciback/${PCIID} > /dev/null
then
echo "Unbinding ${PCIID} ..."
echo -n ${PCIID} > /sys/bus/pci/drivers/pciback/unbind
fi
done
}
test "uname -r" | grep xen && exit 0
case $1 in
start)
echo "Starting pciback ..."
echo
load_pciback
bind_dev_to_pciback
rc_status -v -r
;;
stop)
echo "Stopping pciback ..."
echo
unbind_dev_from_pciback
unload_pciback
rc_status -v
;;
reload|restart)
echo "Stopping pciback ..."
echo
unbind_dev_from_pciback
unload_pciback
echo "Starting pciback ..."
echo
load_pciback
bind_dev_to_pciback
;;
status)
if lsmod | grep -qi pciback
then
echo
echo "pciback: loaded"
echo
echo "Currently bound devices ..."
echo "-----------------------------"
ls /sys/bus/pci/drivers/pciback | grep ^0000
echo
else
echo "pciback: not loaded"
fi
;;
*)
echo "Usage: $0 [start|stop|restart|reload|status]"
exit 1
;;
esac
++++++ init.xen_loop ++++++
# Increase the number of loopback devices available for vm creation
options loop max_loop=64
++++++ init.xend ++++++
#!/bin/bash
#
# xend Starts and stops the Xen management daemon
#
# chkconfig: 35 98 01
# description: Starts and stops the Xen management daemon
#
### BEGIN INIT INFO
# Provides: xend
# Required-Start: $syslog $remote_fs
# Should-Start: iscsi $time
# Required-Stop: $syslog $remote_fs
# Should-Stop: iscsi $time
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Starts and stops the Xen management daemon
# Description: Starts and stops the Xen management daemon. xend is needed
# to create and manage VMs on Xen.
### END INIT INFO
. /etc/rc.status
rc_reset
XEND=`pidof -x /usr/sbin/xend`
await_daemons_up()
{
i=1
rets=10
xend status
while [ $? -ne 0 -a $i -lt $rets ]; do
sleep 1
echo -n .
i=$(($i + 1))
xend status
done
}
xend_abort()
{
echo -n "xend "
rc_failed $1
rc_status -v
rc_exit
}
cleanup()
{
rm -f /var/lib/xen/tmp/* 2>/dev/null
rm -f /var/lib/xen/xenbl* 2>/dev/null
}
check()
{
if [ "$1" == status ]; then
if [ ! -e /proc/xen/capabilities ]; then
xend_abort 3
fi
else
if [ `id -u` != 0 ]; then
xend_abort 4
fi
if [ ! -e /proc/xen/capabilities ] ||
! grep control_d /proc/xen/capabilities >/dev/null 2>&1;
then
if [ "$1" == stop ] ||
[ "$1" == try-restart ]; then
xend_abort 0
else
xend_abort 6
fi
fi
fi
}
case "$1" in
start)
check $1
echo -n "Starting xend "
if [ ! -z "$XEND" ]; then
echo -n "(already running pid $XEND) "
else
cleanup
fi
xend start
await_daemons_up
;;
stop)
check $1
echo -n "Stopping xend "
if [ -z "$XEND" ]; then
echo -n "(not running) "
xend stop
rc_reset
else
echo -n "(pid $XEND) "
xend stop
cleanup
rc_reset
fi
;;
status)
check $1
echo -n "Checking status of xend "
if [ ! -z "$XEND" ]; then
echo -n "(pid $XEND) "
fi
checkproc /usr/sbin/xend
;;
restart|reload)
check $1
echo -n "Restarting xend "
if [ -z "$XEND" ]; then
echo -n "(not running) "
else
echo -n "(old pid $XEND) "
fi
xend restart
await_daemons_up
;;
try-restart)
check $1
$0 status
if [ $? = 0 ]; then
$0 restart
else
rc_reset
fi
;;
*)
echo "Usage: $0 {start|stop|restart|try-restart|reload|status}"
rc_failed 2
rc_exit
esac
rc_status -v
rc_exit
++++++ init.xendomains ++++++
#!/bin/bash
#
# xendomains Starts and stops Xen VMs
#
# chkconfig: 35 99 00
# description: Starts and stops Xen VMs
#
### BEGIN INIT INFO
# Provides: xendomains
# Required-Start: $syslog $remote_fs xend
# Should-Start: iscsi o2cb ocfs2
# Required-Stop: $syslog $remote_fs xend
# Should-Stop: iscsi
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Starts and stops Xen VMs
# Description: Starts and stops Xen VMs automatically when the
# host starts and stops.
### END INIT INFO
. /etc/rc.status
rc_reset
LOCKFILE=/var/lock/subsys/xendomains
XENDOM_CONFIG=/etc/sysconfig/xendomains
RETCODE_FILE=/tmp/xendomains.rc.$$
xm_cmd=echo
. "$XENDOM_CONFIG"
shopt -s dotglob nullglob
smart_term=1
if [ -z "$esc" ]; then
smart_term=0
rc_timer_on()
{
(trap "exit 0" TERM; sleep $1) & _rc_timer_pid=$!
}
rc_timer_off()
{
if [ -n "$_rc_timer_pid" ]; then
kill -TERM $_rc_timer_pid > /dev/null 2>&1
fi
unset _rc_timer_pid
}
fi
xendomains_abort()
{
echo -n "xendomains "
rc_failed $1
rc_status -v
rc_exit
}
check()
{
XEND=`pidof -x /usr/sbin/xend`
if [ -z "$XEND" ]; then
xm_cmd="xl -f"
else
xm_cmd="xm"
fi
if [ "$1" = status ]; then
if [ ! -e /proc/xen/capabilities ] || [ ! -r "$XENDOM_CONFIG" ]
|| [ -z "$XEND" ]; then
xendomains_abort 3
fi
else
if [ `id -u` != 0 ]; then
xendomains_abort 4
fi
if [ ! -e /proc/xen/capabilities ] || [ -z "$XEND" ] ||
! grep control_d /proc/xen/capabilities >/dev/null 2>&1;
then
if [ "$1" = stop ] ||
[ "$1" = restart ]; then
xendomains_abort 0
else
xendomains_abort 6
fi
fi
if [ ! -r "$XENDOM_CONFIG" ]; then
xendomains_abort 6
fi
fi
}
dir_contains_something()
{
[ -d "$1" ] || return 1
local dirfiles=( "$1"/* )
[ ${#dirfiles[@]} != 0 ]
}
get_name_from_cfg()
{
if grep -q "^name" "$1";then
NM=`grep '^name[ ]*=' "$1" | sed -e 's/^name[ ]*=[
]*['\''"]\([^'\''"]*\)['\''"].*$/\1/'`
elif grep -q "(name " "$1";then
NM=`grep '(name ' "$1" | sed -e 's/^ *//' | cut -d " " -f 2 |
sed -e 's/)//'`
fi
}
running_auto_names()
{
unset AUTONAMES[@]
if ! dir_contains_something "$XENDOMAINS_AUTO"; then
return
fi
for dom in "$XENDOMAINS_AUTO"/*; do
get_name_from_cfg "$dom"
AUTONAMES+=("$NM")
done
}
parseln()
{
name=${1:0:$((${#1}-36))}
name=${name%% *}
rest="${1: -36}"
id=${rest:0:4}
id=`echo $id`
mem=${rest:4:6}
mem=`echo $mem`
vcpu=${rest:10:6}
vcpu=`echo $vcpu`
state=${rest:16:11}
state=`echo $state`
tm=${rest:27}
tm=`echo $tm`
}
xm_list()
{
TERM=vt100 ${xm_cmd} list | grep -v '^Name *ID'
}
is_cfg_running()
{
get_name_from_cfg "$1"
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
if [ "$name" = "$NM" ]; then
[ -z "$state" ] && return 1
return 0
fi
done < <(xm_list)
return 1
}
start()
{
if [ -f "$LOCKFILE" ]; then
echo -n "xendomains already running (lockfile exists)"
rc_reset
rc_status -v
return 0
fi
local printed=0
if [ "$XENDOMAINS_RESTORE" = "true" ] &&
dir_contains_something "$XENDOMAINS_SAVE"; then
mkdir -p $(dirname "$LOCKFILE")
touch "$LOCKFILE"
echo "Restoring saved Xen domains"
printed=1
for dom in "$XENDOMAINS_SAVE"/*; do
echo -n " ${dom##*/}: "
${xm_cmd} restore "$dom" >/dev/null 2>&1
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
rm -f "$dom"
fi
rc_status -v
done
fi
if dir_contains_something "$XENDOMAINS_AUTO"; then
touch "$LOCKFILE"
echo "Starting auto Xen domains"
printed=1
for dom in "$XENDOMAINS_AUTO"/*; do
echo -n " ${dom##*/}: "
if is_cfg_running "$dom"; then
rc_status -s
else
if grep -q "^name" "$dom";then
${xm_cmd} create --quiet --defconfig
"$dom"
elif grep -q "(name .*" "$dom";then
${xm_cmd} create --quiet --config "$dom"
fi
if [ $? -ne 0 ]; then
rc_failed
else
usleep $XENDOMAINS_CREATE_USLEEP
rc_reset
fi
rc_status -v
fi
done
fi
if [ $printed -eq 0 ]; then
echo -n "Starting xendomains"
rc_failed 6 # not configured
rc_status -v
fi
}
is_zombie_state()
{
[ "$1" = "-b---d" ] || [ "$1" = "-----d" ]
}
any_non_zombies()
{
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
[ -z "$state" ] && continue
is_zombie_state "$state" || return 0
done < <(xm_list)
return 1
}
migrate_with_watchdog()
{
(${xm_cmd} migrate "$@" ; echo $? > "$RETCODE_FILE") >/dev/null 2>&1 &
watchdog_xm $!
}
save_with_watchdog()
{
(${xm_cmd} save "$@" ; echo $? > "$RETCODE_FILE") >/dev/null 2>&1 &
watchdog_xm $!
}
shutdown_with_watchdog()
{
(${xm_cmd} shutdown -w "$@" ; echo $? > "$RETCODE_FILE") >/dev/null
2>&1 &
watchdog_xm $!
}
get_return_code()
{
local RC=127
[ -r "$RETCODE_FILE" ] && RC=`head -c10 "$RETCODE_FILE"`
rm -f "$RETCODE_FILE"
return $RC
}
# $1: The PID to wait on.
watchdog_xm()
{
local col=$((COLUMNS-11))
if [ -z "$XENDOMAINS_STOP_MAXWAIT" ] || [ "$XENDOMAINS_STOP_MAXWAIT" =
"0" ]; then
wait $1 >/dev/null 2>&1
get_return_code
return
fi
rc_timer_on $XENDOMAINS_STOP_MAXWAIT $col
while true; do
# Prefer "jobs" over "ps": faster and no false positives
pid=`jobs -l | grep " $1 Running"`
if [ -z "$pid" ]; then
break
fi
pid=`jobs -l | grep " $_rc_timer_pid Running"`
if [ -z "$pid" ]; then
disown $1 # To avoid the "Terminated..." message
kill $1 >/dev/null 2>&1
fi
sleep 1
done
rc_timer_off
if [ $smart_term -ne 0 ]; then
echo -en "\015${esc}[${col}C "
fi
get_return_code
}
stop()
{
echo "Shutting down Xen domains"
if [ "$XENDOMAINS_AUTO_ONLY" = "true" ]; then
running_auto_names
fi
local printed=0
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
[ -z "$state" ] && continue
printed=1
if [ "$XENDOMAINS_AUTO_ONLY" = "true" ]; then
is_auto_domain=0
for n in "${AUTONAMES[@]}"; do
if [ "$name" = "$n" ]; then
is_auto_domain=1
break
fi
done
if [ $is_auto_domain -eq 0 ]; then
echo -n " $name: "
rc_status -s
continue
fi
fi
if [ -n "$XENDOMAINS_SYSRQ" ]; then
for sysrq in $XENDOMAINS_SYSRQ; do
echo -n " $name: "
echo -n "sending sysrq '$sysrq'... "
${xm_cmd} sysrq $id $sysrq
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
fi
rc_status -v
# usleep just ignores empty arg
usleep $XENDOMAINS_USLEEP
done
fi
if is_zombie_state "$state"; then
echo -n " $name: "
echo -n "destroying zombie... "
${xm_cmd} destroy $id
rc_reset
rc_status -v
continue
fi
if [ -n "$XENDOMAINS_MIGRATE" ]; then
echo -n " $name: "
echo -n "migrating... "
migrate_with_watchdog $id "$XENDOMAINS_MIGRATE"
if [ $? -ne 0 ]; then
rc_failed
rc_status -v
else
rc_reset
rc_status -v
continue
fi
fi
if [ -n "$XENDOMAINS_SAVE" ]; then
echo -n " $name: "
echo -n "saving... "
save_with_watchdog $id "$XENDOMAINS_SAVE/$name"
if [ $? -ne 0 ]; then
rm -f "$XENDOMAINS_SAVE/$name"
rc_failed
rc_status -v
else
rc_reset
rc_status -v
continue
fi
fi
if [ -n "$XENDOMAINS_SHUTDOWN" ]; then
echo -n " $name: "
echo -n "shutting down... "
shutdown_with_watchdog $id $XENDOMAINS_SHUTDOWN
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
fi
rc_status -v
fi
done < <(xm_list)
if [ -n "$XENDOMAINS_SHUTDOWN_ALL" ] && any_non_zombies ; then
echo -n " others: shutting down... "
shutdown_with_watchdog $XENDOMAINS_SHUTDOWN_ALL
if [ $? -ne 0 ]; then
rc_failed
else
rc_reset
fi
rc_status -v
fi
if [ $printed -eq 0 ]; then
echo -e "${rc_done_up}"
fi
# Unconditionally delete lock file
rm -f "$LOCKFILE"
}
check_domain_up()
{
while read LN; do
parseln "$LN"
[ "$id" = 0 ] && continue
if [ "$name" = "$1" ]; then
[ -z "$state" ] && return 1
return 0
fi
done < <(xm_list)
return 1
}
check_all_domains_up()
{
any_auto=0
any_save=0
dir_contains_something "$XENDOMAINS_AUTO" && any_auto=1
dir_contains_something "$XENDOMAINS_SAVE" && any_save=1
if [ $any_auto -eq 0 ] && [ $any_save -eq 0 ]; then
rc_reset
rc_status -v
return
fi
echo
if [ $any_auto -ne 0 ]; then
for nm in "$XENDOMAINS_AUTO"/*; do
get_name_from_cfg "$nm"
echo -n " $nm: "
if check_domain_up "$NM"; then
rc_reset
else
rc_failed 2
fi
rc_status -v
done
fi
if [ $any_save -ne 0 ]; then
for nm in "$XENDOMAINS_SAVE"/*; do
echo -n " $nm: "
rc_failed 3