Comment # 6 on bug 1216871 from Petr Vorel
After upgrade to 545 driver both 'systemctl suspend' and 'systemctl hibernate'
still don't work.

'systemctl suspend' does not sleep (the power led is still on) and I had to
power off the laptop. dmesg (see dmesg.2023-11-23.systemctl-suspend.txt) shows
many lines with:
[42875.640166] NVRM kbusVerifyBar2_GM107: MMUTest BAR0 window offset 0x70f000
returned garbage 0x0

'systemctl hibernate' puts correctly laptop to sleep, but there is no output on
display after resume and I see kernel oops in dmesg (I could still ssh to the
system):

[  761.013720] NVRM s_executeBooterUcode_TU102: Booter failed with non-zero
error code: 0xffffffff
[  761.013728] NVRM kgspExecuteBooterLoad_TU102: failed to execute Booter Load:
0xffff
[  761.013735] NVRM nvAssertOkFailedNoLog: Assertion failed: Failure: Generic
Error [NV_ERR_GENERIC] (0x0000FFFF) returned from
kgspExecuteBooterLoad_HAL(pGpu, pKernelGsp,
memdescGetPhysAddr(pKernelGsp->pSRMetaDescriptor, AT_GPU,0)) @
kernel_gsp_tu102.c:1152
[  761.016748] NVRM nvCheckOkFailedNoLog: Check failed: Failure: Generic Error
[NV_ERR_GENERIC] (0x0000FFFF) returned from kgspRestorePowerMgmtState_HAL(pGpu,
pKernelGsp) @ gpu_suspend.c:197
[  761.017864] ------------[ cut here ]------------
[  761.017866] WARNING: CPU: 0 PID: 5064 at
/home/abuild/rpmbuild/BUILD/open-gpu-kernel-modules-545.29.02/obj/default/kernel-open/nvidia/nv.c:4005
nv_restore_user_channels+0x4e/0x1e0 [nvidia]
[  761.017971] Modules linked in: ccm cmac algif_hash algif_skcipher af_alg
snd_usb_audio ch341 usbserial snd_usbmidi_lib snd_ump snd_rawmidi
snd_seq_device af_packet nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib
nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat
nf_tables ebtable_nat ebtable_broute ip6table_nat ip6table_mangle ip6table_raw
ip6table_security iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4
iptable_mangle iptable_raw iptable_security nfnetlink ebtable_filter ebtables
ip6table_filter ip6_tables iptable_filter bpfilter qrtr bnep nvidia_drm(O)
nvidia_modeset(O) btusb btrtl btintel btbcm btmtk bluetooth uvcvideo
videobuf2_vmalloc uvc videobuf2_memops videobuf2_v4l2 videodev videobuf2_common
mc ecdh_generic joydev binfmt_misc nvidia_uvm(O) xfs snd_ctl_led
snd_soc_skl_hda_dsp snd_soc_intel_hda_dsp_common snd_soc_hdac_hdmi
snd_sof_probes snd_hda_codec_realtek snd_hda_codec_generic snd_soc_dmic
snd_sof_pci_intel_tgl snd_sof_intel_hda_common soundwire_intel
[  761.017999]  snd_sof_intel_hda_mlink soundwire_cadence snd_sof_intel_hda
snd_sof_pci snd_sof_xtensa_dsp snd_sof nls_iso8859_1 snd_sof_utils
snd_soc_hdac_hda nls_cp437 snd_hda_ext_core vfat snd_soc_acpi_intel_match fat
snd_soc_acpi soundwire_generic_allocation soundwire_bus iwlmvm snd_soc_core
snd_compress intel_uncore_frequency intel_uncore_frequency_common
snd_pcm_dmaengine intel_tcc_cooling mac80211 libarc4 x86_pkg_temp_thermal
intel_powerclamp snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi coretemp
snd_hda_codec snd_hda_core spi_nor iTCO_wdt kvm_intel iwlwifi snd_hwdep
pmt_telemetry intel_pmc_bxt mtd mei_hdcp mei_wdt mei_pxp iTCO_vendor_support
intel_rapl_msr pmt_class nvidia(O) kvm snd_pcm thinkpad_acpi
processor_thermal_device_pci think_lmi processor_thermal_device thunderbolt igc
pcspkr irqbypass firmware_attributes_class wmi_bmof cfg80211 ledtrig_audio
mei_me i2c_i801 processor_thermal_rfim spi_intel_pci snd_timer platform_profile
processor_thermal_mbox spi_intel processor_thermal_rapl i2c_smbus mei thermal
[  761.018025]  snd intel_rapl_common intel_vsec rfkill fan soundcore
int3403_thermal ac int340x_thermal_zone intel_hid int3400_thermal
intel_pmc_core acpi_tad sparse_keymap acpi_thermal_rel acpi_pad
tiny_power_button fuse configfs efi_pstore dmi_sysfs ip_tables x_tables
dm_crypt essiv authenc trusted asn1_encoder tee hid_generic usbhid
crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic gf128mul
ghash_clmulni_intel xhci_pci rtsx_pci_sdmmc sha512_ssse3 xhci_pci_renesas
xhci_hcd mmc_core aesni_intel ucsi_acpi nvme typec_ucsi video crypto_simd
cryptd nvme_core usbcore roles rtsx_pci typec button battery wmi
pinctrl_alderlake serio_raw br_netfilter btrfs bridge stp llc dm_multipath
scsi_dh_rdac scsi_dh_emc scsi_dh_alua sd_mod t10_pi sg scsi_mod blake2b_generic
libcrc32c scsi_common crc32c_intel xor msr raid6_pq dm_mirror dm_region_hash
dm_log dm_mod bbswitch(O) efivarfs
[  761.018055] CPU: 0 PID: 5064 Comm: nvidia-sleep.sh Tainted: G           O   
   6.6.1-1-default #1 openSUSE Tumbleweed
0c6504f7d2c054731662677f280b3e0e68eca996
[  761.018058] Hardware name: LENOVO 21D7S22N08/21D7S22N08, BIOS N3FET36W (1.21
) 05/31/2023
[  761.018058] RIP: 0010:nv_restore_user_channels+0x4e/0x1e0 [nvidia]
[  761.018122] Code: 24 38 06 00 00 4c 89 ef e8 bf ab 56 ce f6 43 10 01 74 73
48 89 de 31 ff e8 ef d7 0f 00 41 89 c6 85 c0 0f 84 4b 01 00 00 31 ed <0f> 0b 49
81 c4 60 07 00 00 4c 89 e7 e8 91 ab 56 ce be 01 00 00 00
[  761.018123] RSP: 0018:ffffc90001c7bd38 EFLAGS: 00010246
[  761.018125] RAX: 000000000000000f RBX: ffff888109660000 RCX:
0000000000000000
[  761.018126] RDX: ffffc90001c7bcb8 RSI: 0000000000000282 RDI:
ffffc90001c7bc78
[  761.018126] RBP: 0000000000000000 R08: 0000000000000000 R09:
000000000003f0e0
[  761.018127] R10: ffffffffc17663b0 R11: ffffffffc17663f0 R12:
ffff888109660000
[  761.018127] R13: ffff888109660638 R14: 000000000000000f R15:
0000000000000000
[  761.018128] FS:  00007f56ef2b9580(0000) GS:ffff88884f400000(0000)
knlGS:0000000000000000
[  761.018129] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  761.018130] CR2: 000055af2d3cfc18 CR3: 00000001469fc000 CR4:
0000000000f50ef0
[  761.018130] PKRU: 55555554
[  761.018131] Call Trace:
[  761.018133]  <TASK>
[  761.018134]  ? nv_restore_user_channels+0x4e/0x1e0 [nvidia
c5d1169f64f374e73bddb2a1970ccca0c527acfc]
[  761.018190]  ? __warn+0x81/0x130
[  761.018193]  ? nv_restore_user_channels+0x4e/0x1e0 [nvidia
c5d1169f64f374e73bddb2a1970ccca0c527acfc]
[  761.018250]  ? report_bug+0x171/0x1a0
[  761.018253]  ? handle_bug+0x3c/0x80
[  761.018255]  ? exc_invalid_op+0x17/0x70
[  761.018257]  ? asm_exc_invalid_op+0x1a/0x20
[  761.018260]  ? nv_restore_user_channels+0x4e/0x1e0 [nvidia
c5d1169f64f374e73bddb2a1970ccca0c527acfc]
[  761.018318]  ? nv_restore_user_channels+0x41/0x1e0 [nvidia
c5d1169f64f374e73bddb2a1970ccca0c527acfc]
[  761.018374]  nv_set_system_power_state+0xe9/0x470 [nvidia
c5d1169f64f374e73bddb2a1970ccca0c527acfc]
[  761.018432]  nv_procfs_write_suspend+0xd7/0x150 [nvidia
c5d1169f64f374e73bddb2a1970ccca0c527acfc]
[  761.018497]  proc_reg_write+0x5a/0xa0
[  761.018500]  vfs_write+0xeb/0x3e0
[  761.018503]  ksys_write+0x67/0xe0
[  761.018505]  do_syscall_64+0x5d/0x90
[  761.018507]  ? syscall_exit_to_user_mode+0x2b/0x40
[  761.018508]  ? do_syscall_64+0x6c/0x90
[  761.018509]  ? do_syscall_64+0x6c/0x90
[  761.018510]  ? exc_page_fault+0x71/0x160
[  761.018511]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
[  761.018514] RIP: 0033:0x7f56ef10afb4
[  761.018545] Code: 84 00 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90
90 f3 0f 1e fa 90 90 80 3d 75 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00
f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 48 89 54 24 18 48
[  761.018546] RSP: 002b:00007ffc56ec1b08 EFLAGS: 00000202 ORIG_RAX:
0000000000000001
[  761.018548] RAX: ffffffffffffffda RBX: 0000000000000007 RCX:
00007f56ef10afb4
[  761.018548] RDX: 0000000000000007 RSI: 000055cf1a7e0630 RDI:
0000000000000001
[  761.018549] RBP: 000055cf1a7e0630 R08: 0000000000000410 R09:
0000000000000001
[  761.018549] R10: 0000000000000004 R11: 0000000000000202 R12:
0000000000000007
[  761.018550] R13: 00007f56ef1ec5c0 R14: 00007f56ef1e9f20 R15:
0000000000000000
[  761.018551]  </TASK>
[  761.018552] ---[ end trace 0000000000000000 ]---

$ uname -a
Linux p16 6.6.1-1-default #1 SMP PREEMPT_DYNAMIC Thu Nov  9 05:27:56 UTC 2023
(1fcc265) x86_64 x86_64 x86_64 GNU/Linux

$ rpm -qa |grep -i nvidia | sort
kernel-firmware-nvidia-gspx-G06-545.29.02-1.1.x86_64
kernel-firmware-nvidia-20231107-1.1.noarch
libnvidia-egl-wayland1-1.1.12-1.2.x86_64
libva-nvidia-driver-0.0.10-1.1.x86_64
nvidia-compute-G06-32bit-545.29.02-18.1.x86_64
nvidia-compute-G06-545.29.02-18.1.x86_64
nvidia-gl-G06-32bit-545.29.02-18.1.x86_64
nvidia-gl-G06-545.29.02-18.1.x86_64
nvidia-open-driver-G06-signed-kmp-default-545.29.02_k6.6.1_1-1.1.x86_64
nvidia-video-G06-32bit-545.29.02-18.1.x86_64
nvidia-video-G06-545.29.02-18.1.x86_64

$ for i in /sys/power/state /sys/power/mem_sleep /sys/power/disk
/sys/power/image_size /sys/power/resume; do echo "== $i =="; cat $i; echo; done
== /sys/power/state ==
freeze mem disk

== /sys/power/mem_sleep ==
[s2idle]

== /sys/power/disk ==
[platform] shutdown reboot suspend test_resume

== /sys/power/image_size ==
13347745792

== /sys/power/resume ==
254:1

NOTE: I have also journalctl logs, IMHO not needed, but let me know if you want
to have it.


You are receiving this mail because: