android_kernel_samsung_sm8650/drivers/vhost/vdpa.c
Greg Kroah-Hartman f1bc13cb9d This is the 6.1.64 stable release
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmVmHpsACgkQONu9yGCS
 aT5uvw//SzcE0GImnHnfeN7iXtpFE9O0fhTxsjZCi8/HTXmGWPtQgWscd9y81bAd
 EHBVr456GXqd6KuIF+03g/r/FYinwWqK375meLfaybw1vSBP+fZttrEGqz6nTnYD
 yqOxw2bqgz8Xjp63UeNHD6mifpBvVtuAvzrfO1E2Ie/U1OU2uKdjRRv0iijKNeWN
 liOYTXaddIkVfZR0z6dVTl0hb5dPWsxNmF77kfVpKz4ALIHJcO13DlUuKtQz6Sb6
 0ElmJpuonHuUxHzb8e9LLsFy3IvbBqomSscwcd0tngtdUTzhMYFIZLjg2+WQ9Ovq
 raMGqvS/bKsoyoTBNKL83QB2NyXQb3vkfL0NgLsq9IwDl+r96mP9ctANYGwSjhND
 o/4sa/fbMFzeInA8Rzh7i56RCNstOBKApJPhBzWuY0f/6b1BZpvZaONyX3fFksWO
 dMeYT16GgO4lhQXnG3O6mtDT8eoZ1fLf7ZdGEZ2NktcOzXYelNc4aXJke7qdlIop
 CVxM+Ur+juj+DJymo59a6baXjEgIROdHq83N3CZwetGviPHneGqgYc0K7ETtA33H
 sH/0KGYAT8SzzjMlnXB0lpjp68WViJfzzo9Wxdf2aDZbL3SdI14GPKMUeDqqeSyU
 8bB2Hb4ItccRFW9RriiE3BPGnLGu7PDTkn5TgXDG/bDX54Cb5DQ=
 =YPzI
 -----END PGP SIGNATURE-----

Merge 6.1.64 into android14-6.1-lts

Changes in 6.1.64
	locking/ww_mutex/test: Fix potential workqueue corruption
	lib/generic-radix-tree.c: Don't overflow in peek()
	perf/core: Bail out early if the request AUX area is out of bound
	srcu: Fix srcu_struct node grpmask overflow on 64-bit systems
	selftests/lkdtm: Disable CONFIG_UBSAN_TRAP in test config
	clocksource/drivers/timer-imx-gpt: Fix potential memory leak
	clocksource/drivers/timer-atmel-tcb: Fix initialization on SAM9 hardware
	smp,csd: Throw an error if a CSD lock is stuck for too long
	cpu/hotplug: Don't offline the last non-isolated CPU
	workqueue: Provide one lock class key per work_on_cpu() callsite
	x86/mm: Drop the 4 MB restriction on minimal NUMA node memory size
	wifi: plfxlc: fix clang-specific fortify warning
	wifi: mac80211_hwsim: fix clang-specific fortify warning
	wifi: mac80211: don't return unset power in ieee80211_get_tx_power()
	atl1c: Work around the DMA RX overflow issue
	bpf: Detect IP == ksym.end as part of BPF program
	wifi: ath9k: fix clang-specific fortify warnings
	wifi: ath10k: fix clang-specific fortify warning
	net: annotate data-races around sk->sk_tx_queue_mapping
	net: annotate data-races around sk->sk_dst_pending_confirm
	wifi: ath10k: Don't touch the CE interrupt registers after power up
	vsock: read from socket's error queue
	bpf: Ensure proper register state printing for cond jumps
	Bluetooth: btusb: Add date->evt_skb is NULL check
	Bluetooth: Fix double free in hci_conn_cleanup
	ACPI: EC: Add quirk for HP 250 G7 Notebook PC
	tsnep: Fix tsnep_request_irq() format-overflow warning
	platform/chrome: kunit: initialize lock for fake ec_dev
	platform/x86: thinkpad_acpi: Add battery quirk for Thinkpad X120e
	drm/gma500: Fix call trace when psb_gem_mm_init() fails
	drm/komeda: drop all currently held locks if deadlock happens
	drm/amdgpu: not to save bo in the case of RAS err_event_athub
	drm/amdkfd: Fix a race condition of vram buffer unref in svm code
	drm/amd: Update `update_pcie_parameters` functions to use uint8_t arguments
	drm/amd/display: use full update for clip size increase of large plane source
	string.h: add array-wrappers for (v)memdup_user()
	kernel: kexec: copy user-array safely
	kernel: watch_queue: copy user-array safely
	drm_lease.c: copy user-array safely
	drm: vmwgfx_surface.c: copy user-array safely
	drm/msm/dp: skip validity check for DP CTS EDID checksum
	drm/amd: Fix UBSAN array-index-out-of-bounds for SMU7
	drm/amd: Fix UBSAN array-index-out-of-bounds for Polaris and Tonga
	drm/amdgpu: Fix potential null pointer derefernce
	drm/panel: fix a possible null pointer dereference
	drm/panel/panel-tpo-tpg110: fix a possible null pointer dereference
	drm/radeon: fix a possible null pointer dereference
	drm/amdgpu/vkms: fix a possible null pointer dereference
	drm/panel: st7703: Pick different reset sequence
	drm/amdkfd: Fix shift out-of-bounds issue
	drm/amdgpu: Fix a null pointer access when the smc_rreg pointer is NULL
	arm64: dts: ls208xa: use a pseudo-bus to constrain usb dma size
	selftests/efivarfs: create-read: fix a resource leak
	ASoC: soc-card: Add storage for PCI SSID
	ASoC: SOF: Pass PCI SSID to machine driver
	crypto: pcrypt - Fix hungtask for PADATA_RESET
	ASoC: SOF: ipc4: handle EXCEPTION_CAUGHT notification from firmware
	RDMA/hfi1: Use FIELD_GET() to extract Link Width
	scsi: hisi_sas: Set debugfs_dir pointer to NULL after removing debugfs
	scsi: ibmvfc: Remove BUG_ON in the case of an empty event pool
	fs/jfs: Add check for negative db_l2nbperpage
	fs/jfs: Add validity check for db_maxag and db_agpref
	jfs: fix array-index-out-of-bounds in dbFindLeaf
	jfs: fix array-index-out-of-bounds in diAlloc
	HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround
	ARM: 9320/1: fix stack depot IRQ stack filter
	ALSA: hda: Fix possible null-ptr-deref when assigning a stream
	PCI: tegra194: Use FIELD_GET()/FIELD_PREP() with Link Width fields
	PCI: mvebu: Use FIELD_PREP() with Link Width
	atm: iphase: Do PCI error checks on own line
	PCI: Do error check on own line to split long "if" conditions
	scsi: libfc: Fix potential NULL pointer dereference in fc_lport_ptp_setup()
	PCI: Use FIELD_GET() to extract Link Width
	PCI: Extract ATS disabling to a helper function
	PCI: Disable ATS for specific Intel IPU E2000 devices
	misc: pci_endpoint_test: Add Device ID for R-Car S4-8 PCIe controller
	PCI: Use FIELD_GET() in Sapphire RX 5600 XT Pulse quirk
	ASoC: Intel: soc-acpi-cht: Add Lenovo Yoga Tab 3 Pro YT3-X90 quirk
	crypto: hisilicon/qm - prevent soft lockup in receive loop
	HID: Add quirk for Dell Pro Wireless Keyboard and Mouse KM5221W
	exfat: support handle zero-size directory
	mfd: intel-lpss: Add Intel Lunar Lake-M PCI IDs
	iio: adc: stm32-adc: harden against NULL pointer deref in stm32_adc_probe()
	thunderbolt: Apply USB 3.x bandwidth quirk only in software connection manager
	tty: vcc: Add check for kstrdup() in vcc_probe()
	usb: dwc3: core: configure TX/RX threshold for DWC3_IP
	soundwire: dmi-quirks: update HP Omen match
	f2fs: fix error handling of __get_node_page
	usb: gadget: f_ncm: Always set current gadget in ncm_bind()
	9p/trans_fd: Annotate data-racy writes to file::f_flags
	9p: v9fs_listxattr: fix %s null argument warning
	i3c: mipi-i3c-hci: Fix out of bounds access in hci_dma_irq_handler
	i2c: fix memleak in i2c_new_client_device()
	i2c: sun6i-p2wi: Prevent potential division by zero
	virtio-blk: fix implicit overflow on virtio_max_dma_size
	i3c: master: mipi-i3c-hci: Fix a kernel panic for accessing DAT_data.
	media: gspca: cpia1: shift-out-of-bounds in set_flicker
	media: vivid: avoid integer overflow
	gfs2: ignore negated quota changes
	gfs2: fix an oops in gfs2_permission
	media: cobalt: Use FIELD_GET() to extract Link Width
	media: ccs: Fix driver quirk struct documentation
	media: imon: fix access to invalid resource for the second interface
	drm/amd/display: Avoid NULL dereference of timing generator
	kgdb: Flush console before entering kgdb on panic
	i2c: dev: copy userspace array safely
	ASoC: ti: omap-mcbsp: Fix runtime PM underflow warnings
	drm/qxl: prevent memory leak
	ALSA: hda/realtek: Add quirk for ASUS UX7602ZM
	drm/amdgpu: fix software pci_unplug on some chips
	pwm: Fix double shift bug
	mtd: rawnand: tegra: add missing check for platform_get_irq()
	wifi: iwlwifi: Use FW rate for non-data frames
	sched/core: Optimize in_task() and in_interrupt() a bit
	SUNRPC: ECONNRESET might require a rebind
	mtd: rawnand: intel: check return value of devm_kasprintf()
	mtd: rawnand: meson: check return value of devm_kasprintf()
	NFSv4.1: fix handling NFS4ERR_DELAY when testing for session trunking
	SUNRPC: Add an IS_ERR() check back to where it was
	NFSv4.1: fix SP4_MACH_CRED protection for pnfs IO
	SUNRPC: Fix RPC client cleaned up the freed pipefs dentries
	gfs2: Silence "suspicious RCU usage in gfs2_permission" warning
	vhost-vdpa: fix use after free in vhost_vdpa_probe()
	net: set SOCK_RCU_FREE before inserting socket into hashtable
	ipvlan: add ipvlan_route_v6_outbound() helper
	tty: Fix uninit-value access in ppp_sync_receive()
	net: hns3: fix add VLAN fail issue
	net: hns3: add barrier in vf mailbox reply process
	net: hns3: fix incorrect capability bit display for copper port
	net: hns3: fix out-of-bounds access may occur when coalesce info is read via debugfs
	net: hns3: fix variable may not initialized problem in hns3_init_mac_addr()
	net: hns3: fix VF reset fail issue
	net: hns3: fix VF wrong speed and duplex issue
	tipc: Fix kernel-infoleak due to uninitialized TLV value
	net: mvneta: fix calls to page_pool_get_stats
	ppp: limit MRU to 64K
	xen/events: fix delayed eoi list handling
	ptp: annotate data-race around q->head and q->tail
	bonding: stop the device in bond_setup_by_slave()
	net: ethernet: cortina: Fix max RX frame define
	net: ethernet: cortina: Handle large frames
	net: ethernet: cortina: Fix MTU max setting
	af_unix: fix use-after-free in unix_stream_read_actor()
	netfilter: nf_conntrack_bridge: initialize err to 0
	netfilter: nf_tables: fix pointer math issue in nft_byteorder_eval()
	net: stmmac: fix rx budget limit check
	net: stmmac: avoid rx queue overrun
	net/mlx5e: fix double free of encap_header
	net/mlx5e: fix double free of encap_header in update funcs
	net/mlx5e: Fix pedit endianness
	net/mlx5e: Reduce the size of icosq_str
	net/mlx5e: Check return value of snprintf writing to fw_version buffer
	net/mlx5e: Check return value of snprintf writing to fw_version buffer for representors
	macvlan: Don't propagate promisc change to lower dev in passthru
	tools/power/turbostat: Fix a knl bug
	tools/power/turbostat: Enable the C-state Pre-wake printing
	cifs: spnego: add ';' in HOST_KEY_LEN
	cifs: fix check of rc in function generate_smb3signingkey
	i915/perf: Fix NULL deref bugs with drm_dbg() calls
	media: venus: hfi: add checks to perform sanity on queue pointers
	perf intel-pt: Fix async branch flags
	powerpc/perf: Fix disabling BHRB and instruction sampling
	randstruct: Fix gcc-plugin performance mode to stay in group
	bpf: Fix check_stack_write_fixed_off() to correctly spill imm
	bpf: Fix precision tracking for BPF_ALU | BPF_TO_BE | BPF_END
	scsi: mpt3sas: Fix loop logic
	scsi: megaraid_sas: Increase register read retry rount from 3 to 30 for selected registers
	scsi: qla2xxx: Fix system crash due to bad pointer access
	crypto: x86/sha - load modules based on CPU features
	x86/cpu/hygon: Fix the CPU topology evaluation for real
	KVM: x86: hyper-v: Don't auto-enable stimer on write from user-space
	KVM: x86: Ignore MSR_AMD64_TW_CFG access
	KVM: x86: Clear bit12 of ICR after APIC-write VM-exit
	audit: don't take task_lock() in audit_exe_compare() code path
	audit: don't WARN_ON_ONCE(!current->mm) in audit_exe_compare()
	proc: sysctl: prevent aliased sysctls from getting passed to init
	tty/sysrq: replace smp_processor_id() with get_cpu()
	tty: serial: meson: fix hard LOCKUP on crtscts mode
	hvc/xen: fix console unplug
	hvc/xen: fix error path in xen_hvc_init() to always register frontend driver
	hvc/xen: fix event channel handling for secondary consoles
	PCI/sysfs: Protect driver's D3cold preference from user space
	mm/damon/sysfs: remove requested targets when online-commit inputs
	mm/damon/sysfs: update monitoring target regions for online input commit
	watchdog: move softlockup_panic back to early_param
	mm/damon/lru_sort: avoid divide-by-zero in hot threshold calculation
	mm/damon/ops-common: avoid divide-by-zero during region hotness calculation
	mm/damon: implement a function for max nr_accesses safe calculation
	mm/damon/sysfs: check error from damon_sysfs_update_target()
	ACPI: resource: Do IRQ override on TongFang GMxXGxx
	regmap: Ensure range selector registers are updated after cache sync
	wifi: ath11k: fix temperature event locking
	wifi: ath11k: fix dfs radar event locking
	wifi: ath11k: fix htt pktlog locking
	wifi: ath11k: fix gtk offload status event locking
	mmc: meson-gx: Remove setting of CMD_CFG_ERROR
	genirq/generic_chip: Make irq_remove_generic_chip() irqdomain aware
	KEYS: trusted: tee: Refactor register SHM usage
	KEYS: trusted: Rollback init_trusted() consistently
	PCI: keystone: Don't discard .remove() callback
	PCI: keystone: Don't discard .probe() callback
	arm64: Restrict CPU_BIG_ENDIAN to GNU as or LLVM IAS 15.x or newer
	parisc/pdc: Add width field to struct pdc_model
	parisc/power: Add power soft-off when running on qemu
	clk: socfpga: Fix undefined behavior bug in struct stratix10_clock_data
	clk: qcom: ipq8074: drop the CLK_SET_RATE_PARENT flag from PLL clocks
	clk: qcom: ipq6018: drop the CLK_SET_RATE_PARENT flag from PLL clocks
	ksmbd: handle malformed smb1 message
	ksmbd: fix slab out of bounds write in smb_inherit_dacl()
	mmc: vub300: fix an error code
	mmc: sdhci_am654: fix start loop index for TAP value parsing
	mmc: Add quirk MMC_QUIRK_BROKEN_CACHE_FLUSH for Micron eMMC Q2J54A
	PCI/ASPM: Fix L1 substate handling in aspm_attr_store_common()
	PCI: kirin: Don't discard .remove() callback
	PCI: exynos: Don't discard .remove() callback
	wifi: wilc1000: use vmm_table as array in wilc struct
	svcrdma: Drop connection after an RDMA Read error
	rcu/tree: Defer setting of jiffies during stall reset
	arm64: dts: qcom: ipq6018: Fix hwlock index for SMEM
	PM: hibernate: Use __get_safe_page() rather than touching the list
	PM: hibernate: Clean up sync_read handling in snapshot_write_next()
	rcu: kmemleak: Ignore kmemleak false positives when RCU-freeing objects
	btrfs: don't arbitrarily slow down delalloc if we're committing
	arm64: dts: qcom: ipq8074: Fix hwlock index for SMEM
	firmware: qcom_scm: use 64-bit calling convention only when client is 64-bit
	ACPI: FPDT: properly handle invalid FPDT subtables
	arm64: dts: qcom: ipq6018: Fix tcsr_mutex register size
	mfd: qcom-spmi-pmic: Fix reference leaks in revid helper
	mfd: qcom-spmi-pmic: Fix revid implementation
	ima: annotate iint mutex to avoid lockdep false positive warnings
	ima: detect changes to the backing overlay file
	netfilter: nf_tables: remove catchall element in GC sync path
	netfilter: nf_tables: split async and sync catchall in two functions
	selftests/resctrl: Remove duplicate feature check from CMT test
	selftests/resctrl: Move _GNU_SOURCE define into Makefile
	selftests/resctrl: Reduce failures due to outliers in MBA/MBM tests
	hid: lenovo: Resend all settings on reset_resume for compact keyboards
	ASoC: codecs: wsa-macro: fix uninitialized stack variables with name prefix
	jbd2: fix potential data lost in recovering journal raced with synchronizing fs bdev
	quota: explicitly forbid quota files from being encrypted
	kernel/reboot: emergency_restart: Set correct system_state
	i2c: core: Run atomic i2c xfer when !preemptible
	tracing: Have the user copy of synthetic event address use correct context
	driver core: Release all resources during unbind before updating device links
	mcb: fix error handling for different scenarios when parsing
	dmaengine: stm32-mdma: correct desc prep when channel running
	s390/cmma: fix detection of DAT pages
	mm/cma: use nth_page() in place of direct struct page manipulation
	mm/memory_hotplug: use pfn math in place of direct struct page manipulation
	mtd: cfi_cmdset_0001: Byte swap OTP info
	i3c: master: cdns: Fix reading status register
	i3c: master: svc: fix race condition in ibi work thread
	i3c: master: svc: fix wrong data return when IBI happen during start frame
	i3c: master: svc: fix ibi may not return mandatory data byte
	i3c: master: svc: fix check wrong status register in irq handler
	i3c: master: svc: fix SDA keep low when polling IBIWON timeout happen
	parisc: Prevent booting 64-bit kernels on PA1.x machines
	parisc/pgtable: Do not drop upper 5 address bits of physical address
	parisc/power: Fix power soft-off when running on qemu
	xhci: Enable RPM on controllers that support low-power states
	fs: add ctime accessors infrastructure
	smb3: fix creating FIFOs when mounting with "sfu" mount option
	smb3: fix touch -h of symlink
	smb3: fix caching of ctime on setxattr
	smb: client: fix use-after-free bug in cifs_debug_data_proc_show()
	smb: client: fix potential deadlock when releasing mids
	cifs: reconnect helper should set reconnect for the right channel
	cifs: force interface update before a fresh session setup
	cifs: do not reset chan_max if multichannel is not supported at mount
	xfs: recovery should not clear di_flushiter unconditionally
	btrfs: zoned: wait for data BG to be finished on direct IO allocation
	ALSA: info: Fix potential deadlock at disconnection
	ALSA: hda/realtek: Enable Mute LED on HP 255 G8
	ALSA: hda/realtek - Add Dell ALC295 to pin fall back table
	ALSA: hda/realtek - Enable internal speaker of ASUS K6500ZC
	ALSA: hda/realtek: Enable Mute LED on HP 255 G10
	ALSA: hda/realtek: Add quirks for HP Laptops
	pmdomain: bcm: bcm2835-power: check if the ASB register is equal to enable
	pmdomain: imx: Make imx pgc power domain also set the fwnode
	cpufreq: stats: Fix buffer overflow detection in trans_stats()
	clk: visconti: remove unused visconti_pll_provider::regmap
	clk: visconti: Fix undefined behavior bug in struct visconti_pll_provider
	Bluetooth: btusb: Add Realtek RTL8852BE support ID 0x0cb8:0xc559
	bluetooth: Add device 0bda:887b to device tables
	bluetooth: Add device 13d3:3571 to device tables
	Bluetooth: btusb: Add RTW8852BE device 13d3:3570 to device tables
	Bluetooth: btusb: Add 0bda:b85b for Fn-Link RTL8852BE
	drm/amd/display: enable dsc_clk even if dsc_pg disabled
	cxl/region: Validate region mode vs decoder mode
	cxl/region: Cleanup target list on attach error
	cxl/region: Move region-position validation to a helper
	cxl/region: Do not try to cleanup after cxl_region_setup_targets() fails
	i3c: master: svc: add NACK check after start byte sent
	i3c: master: svc: fix random hot join failure since timeout error
	cxl: Unify debug messages when calling devm_cxl_add_port()
	cxl/mem: Move devm_cxl_add_endpoint() from cxl_core to cxl_mem
	tools/testing/cxl: Define a fixed volatile configuration to parse
	cxl/region: Fix x1 root-decoder granularity calculations
	Revert ncsi: Propagate carrier gain/loss events to the NCSI controller
	Revert "i2c: pxa: move to generic GPIO recovery"
	lsm: fix default return value for vm_enough_memory
	lsm: fix default return value for inode_getsecctx
	sbsa_gwdt: Calculate timeout with 64-bit math
	i2c: designware: Disable TX_EMPTY irq while waiting for block length byte
	s390/ap: fix AP bus crash on early config change callback invocation
	net: ethtool: Fix documentation of ethtool_sprintf()
	net: dsa: lan9303: consequently nested-lock physical MDIO
	net: phylink: initialize carrier state at creation
	i2c: i801: fix potential race in i801_block_transaction_byte_by_byte
	f2fs: do not return EFSCORRUPTED, but try to run online repair
	f2fs: avoid format-overflow warning
	media: lirc: drop trailing space from scancode transmit
	media: sharp: fix sharp encoding
	media: venus: hfi_parser: Add check to keep the number of codecs within range
	media: venus: hfi: fix the check to handle session buffer requirement
	media: venus: hfi: add checks to handle capabilities from firmware
	media: ccs: Correctly initialise try compose rectangle
	drm/mediatek/dp: fix memory leak on ->get_edid callback audio detection
	drm/mediatek/dp: fix memory leak on ->get_edid callback error path
	dm-verity: don't use blocking calls from tasklets
	nfsd: fix file memleak on client_opens_release
	LoongArch: Mark __percpu functions as always inline
	riscv: mm: Update the comment of CONFIG_PAGE_OFFSET
	riscv: correct pt_level name via pgtable_l5/4_enabled
	riscv: kprobes: allow writing to x0
	mmc: sdhci-pci-gli: A workaround to allow GL9750 to enter ASPM L1.2
	mm: fix for negative counter: nr_file_hugepages
	mm: kmem: drop __GFP_NOFAIL when allocating objcg vectors
	mptcp: deal with large GSO size
	mptcp: add validity check for sending RM_ADDR
	mptcp: fix setsockopt(IP_TOS) subflow locking
	r8169: fix network lost after resume on DASH systems
	r8169: add handling DASH when DASH is disabled
	mmc: sdhci-pci-gli: GL9750: Mask the replay timer timeout of AER
	media: qcom: camss: Fix pm_domain_on sequence in probe
	media: qcom: camss: Fix vfe_get() error jump
	media: qcom: camss: Fix VFE-17x vfe_disable_output()
	media: qcom: camss: Fix VFE-480 vfe_disable_output()
	media: qcom: camss: Fix missing vfe_lite clocks check
	media: qcom: camss: Fix invalid clock enable bit disjunction
	media: qcom: camss: Fix csid-gen2 for test pattern generator
	Revert "net: r8169: Disable multicast filter for RTL8168H and RTL8107E"
	ext4: apply umask if ACL support is disabled
	ext4: correct offset of gdb backup in non meta_bg group to update_backups
	ext4: mark buffer new if it is unwritten to avoid stale data exposure
	ext4: correct return value of ext4_convert_meta_bg
	ext4: correct the start block of counting reserved clusters
	ext4: remove gdb backup copy for meta bg in setup_new_flex_group_blocks
	ext4: add missed brelse in update_backups
	ext4: properly sync file size update after O_SYNC direct IO
	drm/amd/pm: Handle non-terminated overdrive commands.
	drm/i915: Bump GLK CDCLK frequency when driving multiple pipes
	drm/i915: Fix potential spectre vulnerability
	drm/amd/pm: Fix error of MACO flag setting code
	drm/amdgpu/smu13: drop compute workload workaround
	drm/amdgpu: don't use pci_is_thunderbolt_attached()
	drm/amdgpu: don't use ATRM for external devices
	drm/amdgpu: fix error handling in amdgpu_bo_list_get()
	drm/amdgpu: lower CS errors to debug severity
	drm/amd/display: fix a NULL pointer dereference in amdgpu_dm_i2c_xfer()
	drm/amd/display: Enable fast plane updates on DCN3.2 and above
	drm/amd/display: Change the DMCUB mailbox memory location from FB to inbox
	powerpc/powernv: Fix fortify source warnings in opal-prd.c
	tracing: Have trace_event_file have ref counters
	Input: xpad - add VID for Turtle Beach controllers
	mmc: sdhci-pci-gli: GL9755: Mask the replay timer timeout of AER
	cxl/port: Fix NULL pointer access in devm_cxl_add_port()
	RISC-V: drop error print from riscv_hartid_to_cpuid()
	Linux 6.1.64

Change-Id: I9284282aeae5d0f9da957a58147efe0114f8e60a
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2023-12-12 18:41:13 +00:00

1491 lines
34 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2018-2020 Intel Corporation.
* Copyright (C) 2020 Red Hat, Inc.
*
* Author: Tiwei Bie <tiwei.bie@intel.com>
* Jason Wang <jasowang@redhat.com>
*
* Thanks Michael S. Tsirkin for the valuable comments and
* suggestions. And thanks to Cunming Liang and Zhihong Wang for all
* their supports.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/iommu.h>
#include <linux/uuid.h>
#include <linux/vdpa.h>
#include <linux/nospec.h>
#include <linux/vhost.h>
#include "vhost.h"
enum {
VHOST_VDPA_BACKEND_FEATURES =
(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
(1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
(1ULL << VHOST_BACKEND_F_IOTLB_ASID),
};
#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
#define VHOST_VDPA_IOTLB_BUCKETS 16
struct vhost_vdpa_as {
struct hlist_node hash_link;
struct vhost_iotlb iotlb;
u32 id;
};
struct vhost_vdpa {
struct vhost_dev vdev;
struct iommu_domain *domain;
struct vhost_virtqueue *vqs;
struct completion completion;
struct vdpa_device *vdpa;
struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
struct device dev;
struct cdev cdev;
atomic_t opened;
u32 nvqs;
int virtio_id;
int minor;
struct eventfd_ctx *config_ctx;
int in_batch;
struct vdpa_iova_range range;
u32 batch_asid;
};
static DEFINE_IDA(vhost_vdpa_ida);
static dev_t vhost_vdpa_major;
static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
struct vhost_iotlb *iotlb, u64 start,
u64 last, u32 asid);
static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
{
struct vhost_vdpa_as *as = container_of(iotlb, struct
vhost_vdpa_as, iotlb);
return as->id;
}
static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
{
struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
struct vhost_vdpa_as *as;
hlist_for_each_entry(as, head, hash_link)
if (as->id == asid)
return as;
return NULL;
}
static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
{
struct vhost_vdpa_as *as = asid_to_as(v, asid);
if (!as)
return NULL;
return &as->iotlb;
}
static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
{
struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
struct vhost_vdpa_as *as;
if (asid_to_as(v, asid))
return NULL;
if (asid >= v->vdpa->nas)
return NULL;
as = kmalloc(sizeof(*as), GFP_KERNEL);
if (!as)
return NULL;
vhost_iotlb_init(&as->iotlb, 0, 0);
as->id = asid;
hlist_add_head(&as->hash_link, head);
return as;
}
static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
u32 asid)
{
struct vhost_vdpa_as *as = asid_to_as(v, asid);
if (as)
return as;
return vhost_vdpa_alloc_as(v, asid);
}
static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
{
struct vhost_vdpa_as *as = asid_to_as(v, asid);
if (!as)
return -EINVAL;
hlist_del(&as->hash_link);
vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
kfree(as);
return 0;
}
static void handle_vq_kick(struct vhost_work *work)
{
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
poll.work);
struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
const struct vdpa_config_ops *ops = v->vdpa->config;
ops->kick_vq(v->vdpa, vq - v->vqs);
}
static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
{
struct vhost_virtqueue *vq = private;
struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
if (call_ctx)
eventfd_signal(call_ctx, 1);
return IRQ_HANDLED;
}
static irqreturn_t vhost_vdpa_config_cb(void *private)
{
struct vhost_vdpa *v = private;
struct eventfd_ctx *config_ctx = v->config_ctx;
if (config_ctx)
eventfd_signal(config_ctx, 1);
return IRQ_HANDLED;
}
static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
{
struct vhost_virtqueue *vq = &v->vqs[qid];
const struct vdpa_config_ops *ops = v->vdpa->config;
struct vdpa_device *vdpa = v->vdpa;
int ret, irq;
if (!ops->get_vq_irq)
return;
irq = ops->get_vq_irq(vdpa, qid);
if (irq < 0)
return;
irq_bypass_unregister_producer(&vq->call_ctx.producer);
if (!vq->call_ctx.ctx)
return;
vq->call_ctx.producer.token = vq->call_ctx.ctx;
vq->call_ctx.producer.irq = irq;
ret = irq_bypass_register_producer(&vq->call_ctx.producer);
if (unlikely(ret))
dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n",
qid, vq->call_ctx.producer.token, ret);
}
static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
{
struct vhost_virtqueue *vq = &v->vqs[qid];
irq_bypass_unregister_producer(&vq->call_ctx.producer);
}
static int vhost_vdpa_reset(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
v->in_batch = 0;
return vdpa_reset(vdpa);
}
static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u32 device_id;
device_id = ops->get_device_id(vdpa);
if (copy_to_user(argp, &device_id, sizeof(device_id)))
return -EFAULT;
return 0;
}
static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u8 status;
status = ops->get_status(vdpa);
if (copy_to_user(statusp, &status, sizeof(status)))
return -EFAULT;
return 0;
}
static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u8 status, status_old;
u32 nvqs = v->nvqs;
int ret;
u16 i;
if (copy_from_user(&status, statusp, sizeof(status)))
return -EFAULT;
status_old = ops->get_status(vdpa);
/*
* Userspace shouldn't remove status bits unless reset the
* status to 0.
*/
if (status != 0 && (status_old & ~status) != 0)
return -EINVAL;
if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
for (i = 0; i < nvqs; i++)
vhost_vdpa_unsetup_vq_irq(v, i);
if (status == 0) {
ret = vdpa_reset(vdpa);
if (ret)
return ret;
} else
vdpa_set_status(vdpa, status);
if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
for (i = 0; i < nvqs; i++)
vhost_vdpa_setup_vq_irq(v, i);
return 0;
}
static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
struct vhost_vdpa_config *c)
{
struct vdpa_device *vdpa = v->vdpa;
size_t size = vdpa->config->get_config_size(vdpa);
if (c->len == 0 || c->off > size)
return -EINVAL;
if (c->len > size - c->off)
return -E2BIG;
return 0;
}
static long vhost_vdpa_get_config(struct vhost_vdpa *v,
struct vhost_vdpa_config __user *c)
{
struct vdpa_device *vdpa = v->vdpa;
struct vhost_vdpa_config config;
unsigned long size = offsetof(struct vhost_vdpa_config, buf);
u8 *buf;
if (copy_from_user(&config, c, size))
return -EFAULT;
if (vhost_vdpa_config_validate(v, &config))
return -EINVAL;
buf = kvzalloc(config.len, GFP_KERNEL);
if (!buf)
return -ENOMEM;
vdpa_get_config(vdpa, config.off, buf, config.len);
if (copy_to_user(c->buf, buf, config.len)) {
kvfree(buf);
return -EFAULT;
}
kvfree(buf);
return 0;
}
static long vhost_vdpa_set_config(struct vhost_vdpa *v,
struct vhost_vdpa_config __user *c)
{
struct vdpa_device *vdpa = v->vdpa;
struct vhost_vdpa_config config;
unsigned long size = offsetof(struct vhost_vdpa_config, buf);
u8 *buf;
if (copy_from_user(&config, c, size))
return -EFAULT;
if (vhost_vdpa_config_validate(v, &config))
return -EINVAL;
buf = vmemdup_user(c->buf, config.len);
if (IS_ERR(buf))
return PTR_ERR(buf);
vdpa_set_config(vdpa, config.off, buf, config.len);
kvfree(buf);
return 0;
}
static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
return ops->suspend;
}
static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u64 features;
features = ops->get_device_features(vdpa);
if (copy_to_user(featurep, &features, sizeof(features)))
return -EFAULT;
return 0;
}
static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_dev *d = &v->vdev;
u64 actual_features;
u64 features;
int i;
/*
* It's not allowed to change the features after they have
* been negotiated.
*/
if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
return -EBUSY;
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
if (vdpa_set_features(vdpa, features))
return -EINVAL;
/* let the vqs know what has been configured */
actual_features = ops->get_driver_features(vdpa);
for (i = 0; i < d->nvqs; ++i) {
struct vhost_virtqueue *vq = d->vqs[i];
mutex_lock(&vq->mutex);
vq->acked_features = actual_features;
mutex_unlock(&vq->mutex);
}
return 0;
}
static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u16 num;
num = ops->get_vq_num_max(vdpa);
if (copy_to_user(argp, &num, sizeof(num)))
return -EFAULT;
return 0;
}
static void vhost_vdpa_config_put(struct vhost_vdpa *v)
{
if (v->config_ctx) {
eventfd_ctx_put(v->config_ctx);
v->config_ctx = NULL;
}
}
static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
{
struct vdpa_callback cb;
int fd;
struct eventfd_ctx *ctx;
cb.callback = vhost_vdpa_config_cb;
cb.private = v;
if (copy_from_user(&fd, argp, sizeof(fd)))
return -EFAULT;
ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
swap(ctx, v->config_ctx);
if (!IS_ERR_OR_NULL(ctx))
eventfd_ctx_put(ctx);
if (IS_ERR(v->config_ctx)) {
long ret = PTR_ERR(v->config_ctx);
v->config_ctx = NULL;
return ret;
}
v->vdpa->config->set_config_cb(v->vdpa, &cb);
return 0;
}
static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
{
struct vhost_vdpa_iova_range range = {
.first = v->range.first,
.last = v->range.last,
};
if (copy_to_user(argp, &range, sizeof(range)))
return -EFAULT;
return 0;
}
static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u32 size;
size = ops->get_config_size(vdpa);
if (copy_to_user(argp, &size, sizeof(size)))
return -EFAULT;
return 0;
}
static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
{
struct vdpa_device *vdpa = v->vdpa;
if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
return -EFAULT;
return 0;
}
/* After a successful return of ioctl the device must not process more
* virtqueue descriptors. The device can answer to read or writes of config
* fields as if it were not suspended. In particular, writing to "queue_enable"
* with a value of 1 will not make the device start processing buffers.
*/
static long vhost_vdpa_suspend(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
if (!ops->suspend)
return -EOPNOTSUPP;
return ops->suspend(vdpa);
}
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct vdpa_vq_state vq_state;
struct vdpa_callback cb;
struct vhost_virtqueue *vq;
struct vhost_vring_state s;
u32 idx;
long r;
r = get_user(idx, (u32 __user *)argp);
if (r < 0)
return r;
if (idx >= v->nvqs)
return -ENOBUFS;
idx = array_index_nospec(idx, v->nvqs);
vq = &v->vqs[idx];
switch (cmd) {
case VHOST_VDPA_SET_VRING_ENABLE:
if (copy_from_user(&s, argp, sizeof(s)))
return -EFAULT;
ops->set_vq_ready(vdpa, idx, s.num);
return 0;
case VHOST_VDPA_GET_VRING_GROUP:
if (!ops->get_vq_group)
return -EOPNOTSUPP;
s.index = idx;
s.num = ops->get_vq_group(vdpa, idx);
if (s.num >= vdpa->ngroups)
return -EIO;
else if (copy_to_user(argp, &s, sizeof(s)))
return -EFAULT;
return 0;
case VHOST_VDPA_SET_GROUP_ASID:
if (copy_from_user(&s, argp, sizeof(s)))
return -EFAULT;
if (s.num >= vdpa->nas)
return -EINVAL;
if (!ops->set_group_asid)
return -EOPNOTSUPP;
return ops->set_group_asid(vdpa, idx, s.num);
case VHOST_GET_VRING_BASE:
r = ops->get_vq_state(v->vdpa, idx, &vq_state);
if (r)
return r;
if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
vq->last_avail_idx = vq_state.packed.last_avail_idx |
(vq_state.packed.last_avail_counter << 15);
vq->last_used_idx = vq_state.packed.last_used_idx |
(vq_state.packed.last_used_counter << 15);
} else {
vq->last_avail_idx = vq_state.split.avail_index;
}
break;
}
r = vhost_vring_ioctl(&v->vdev, cmd, argp);
if (r)
return r;
switch (cmd) {
case VHOST_SET_VRING_ADDR:
if (ops->set_vq_address(vdpa, idx,
(u64)(uintptr_t)vq->desc,
(u64)(uintptr_t)vq->avail,
(u64)(uintptr_t)vq->used))
r = -EINVAL;
break;
case VHOST_SET_VRING_BASE:
if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
} else {
vq_state.split.avail_index = vq->last_avail_idx;
}
r = ops->set_vq_state(vdpa, idx, &vq_state);
break;
case VHOST_SET_VRING_CALL:
if (vq->call_ctx.ctx) {
cb.callback = vhost_vdpa_virtqueue_cb;
cb.private = vq;
} else {
cb.callback = NULL;
cb.private = NULL;
}
ops->set_vq_cb(vdpa, idx, &cb);
vhost_vdpa_setup_vq_irq(v, idx);
break;
case VHOST_SET_VRING_NUM:
ops->set_vq_num(vdpa, idx, vq->num);
break;
}
return r;
}
static long vhost_vdpa_unlocked_ioctl(struct file *filep,
unsigned int cmd, unsigned long arg)
{
struct vhost_vdpa *v = filep->private_data;
struct vhost_dev *d = &v->vdev;
void __user *argp = (void __user *)arg;
u64 __user *featurep = argp;
u64 features;
long r = 0;
if (cmd == VHOST_SET_BACKEND_FEATURES) {
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
BIT_ULL(VHOST_BACKEND_F_SUSPEND)))
return -EOPNOTSUPP;
if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
!vhost_vdpa_can_suspend(v))
return -EOPNOTSUPP;
vhost_set_backend_features(&v->vdev, features);
return 0;
}
mutex_lock(&d->mutex);
switch (cmd) {
case VHOST_VDPA_GET_DEVICE_ID:
r = vhost_vdpa_get_device_id(v, argp);
break;
case VHOST_VDPA_GET_STATUS:
r = vhost_vdpa_get_status(v, argp);
break;
case VHOST_VDPA_SET_STATUS:
r = vhost_vdpa_set_status(v, argp);
break;
case VHOST_VDPA_GET_CONFIG:
r = vhost_vdpa_get_config(v, argp);
break;
case VHOST_VDPA_SET_CONFIG:
r = vhost_vdpa_set_config(v, argp);
break;
case VHOST_GET_FEATURES:
r = vhost_vdpa_get_features(v, argp);
break;
case VHOST_SET_FEATURES:
r = vhost_vdpa_set_features(v, argp);
break;
case VHOST_VDPA_GET_VRING_NUM:
r = vhost_vdpa_get_vring_num(v, argp);
break;
case VHOST_VDPA_GET_GROUP_NUM:
if (copy_to_user(argp, &v->vdpa->ngroups,
sizeof(v->vdpa->ngroups)))
r = -EFAULT;
break;
case VHOST_VDPA_GET_AS_NUM:
if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
r = -EFAULT;
break;
case VHOST_SET_LOG_BASE:
case VHOST_SET_LOG_FD:
r = -ENOIOCTLCMD;
break;
case VHOST_VDPA_SET_CONFIG_CALL:
r = vhost_vdpa_set_config_call(v, argp);
break;
case VHOST_GET_BACKEND_FEATURES:
features = VHOST_VDPA_BACKEND_FEATURES;
if (vhost_vdpa_can_suspend(v))
features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
if (copy_to_user(featurep, &features, sizeof(features)))
r = -EFAULT;
break;
case VHOST_VDPA_GET_IOVA_RANGE:
r = vhost_vdpa_get_iova_range(v, argp);
break;
case VHOST_VDPA_GET_CONFIG_SIZE:
r = vhost_vdpa_get_config_size(v, argp);
break;
case VHOST_VDPA_GET_VQS_COUNT:
r = vhost_vdpa_get_vqs_count(v, argp);
break;
case VHOST_VDPA_SUSPEND:
r = vhost_vdpa_suspend(v);
break;
default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp);
if (r == -ENOIOCTLCMD)
r = vhost_vdpa_vring_ioctl(v, cmd, argp);
break;
}
mutex_unlock(&d->mutex);
return r;
}
static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
struct vhost_iotlb_map *map, u32 asid)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
if (ops->dma_map) {
ops->dma_unmap(vdpa, asid, map->start, map->size);
} else if (ops->set_map == NULL) {
iommu_unmap(v->domain, map->start, map->size);
}
}
static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
u64 start, u64 last, u32 asid)
{
struct vhost_dev *dev = &v->vdev;
struct vhost_iotlb_map *map;
struct page *page;
unsigned long pfn, pinned;
while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
pinned = PFN_DOWN(map->size);
for (pfn = PFN_DOWN(map->addr);
pinned > 0; pfn++, pinned--) {
page = pfn_to_page(pfn);
if (map->perm & VHOST_ACCESS_WO)
set_page_dirty_lock(page);
unpin_user_page(page);
}
atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
vhost_vdpa_general_unmap(v, map, asid);
vhost_iotlb_map_free(iotlb, map);
}
}
static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
u64 start, u64 last, u32 asid)
{
struct vhost_iotlb_map *map;
struct vdpa_map_file *map_file;
while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
map_file = (struct vdpa_map_file *)map->opaque;
fput(map_file->file);
kfree(map_file);
vhost_vdpa_general_unmap(v, map, asid);
vhost_iotlb_map_free(iotlb, map);
}
}
static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
struct vhost_iotlb *iotlb, u64 start,
u64 last, u32 asid)
{
struct vdpa_device *vdpa = v->vdpa;
if (vdpa->use_va)
return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
}
static int perm_to_iommu_flags(u32 perm)
{
int flags = 0;
switch (perm) {
case VHOST_ACCESS_WO:
flags |= IOMMU_WRITE;
break;
case VHOST_ACCESS_RO:
flags |= IOMMU_READ;
break;
case VHOST_ACCESS_RW:
flags |= (IOMMU_WRITE | IOMMU_READ);
break;
default:
WARN(1, "invalidate vhost IOTLB permission\n");
break;
}
return flags | IOMMU_CACHE;
}
static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
{
struct vhost_dev *dev = &v->vdev;
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u32 asid = iotlb_to_asid(iotlb);
int r = 0;
r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
pa, perm, opaque);
if (r)
return r;
if (ops->dma_map) {
r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
} else if (ops->set_map) {
if (!v->in_batch)
r = ops->set_map(vdpa, asid, iotlb);
} else {
r = iommu_map(v->domain, iova, pa, size,
perm_to_iommu_flags(perm));
}
if (r) {
vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
return r;
}
if (!vdpa->use_va)
atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
return 0;
}
static void vhost_vdpa_unmap(struct vhost_vdpa *v,
struct vhost_iotlb *iotlb,
u64 iova, u64 size)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u32 asid = iotlb_to_asid(iotlb);
vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
if (ops->set_map) {
if (!v->in_batch)
ops->set_map(vdpa, asid, iotlb);
}
}
static int vhost_vdpa_va_map(struct vhost_vdpa *v,
struct vhost_iotlb *iotlb,
u64 iova, u64 size, u64 uaddr, u32 perm)
{
struct vhost_dev *dev = &v->vdev;
u64 offset, map_size, map_iova = iova;
struct vdpa_map_file *map_file;
struct vm_area_struct *vma;
int ret = 0;
mmap_read_lock(dev->mm);
while (size) {
vma = find_vma(dev->mm, uaddr);
if (!vma) {
ret = -EINVAL;
break;
}
map_size = min(size, vma->vm_end - uaddr);
if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
goto next;
map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
if (!map_file) {
ret = -ENOMEM;
break;
}
offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
map_file->offset = offset;
map_file->file = get_file(vma->vm_file);
ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
perm, map_file);
if (ret) {
fput(map_file->file);
kfree(map_file);
break;
}
next:
size -= map_size;
uaddr += map_size;
map_iova += map_size;
}
if (ret)
vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
mmap_read_unlock(dev->mm);
return ret;
}
static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
struct vhost_iotlb *iotlb,
u64 iova, u64 size, u64 uaddr, u32 perm)
{
struct vhost_dev *dev = &v->vdev;
struct page **page_list;
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM;
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
unsigned long lock_limit, sz2pin, nchunks, i;
u64 start = iova;
long pinned;
int ret = 0;
/* Limit the use of memory for bookkeeping */
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
if (perm & VHOST_ACCESS_WO)
gup_flags |= FOLL_WRITE;
npages = PFN_UP(size + (iova & ~PAGE_MASK));
if (!npages) {
ret = -EINVAL;
goto free;
}
mmap_read_lock(dev->mm);
lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
goto unlock;
}
cur_base = uaddr & PAGE_MASK;
iova &= PAGE_MASK;
nchunks = 0;
while (npages) {
sz2pin = min_t(unsigned long, npages, list_size);
pinned = pin_user_pages(cur_base, sz2pin,
gup_flags, page_list, NULL);
if (sz2pin != pinned) {
if (pinned < 0) {
ret = pinned;
} else {
unpin_user_pages(page_list, pinned);
ret = -ENOMEM;
}
goto out;
}
nchunks++;
if (!last_pfn)
map_pfn = page_to_pfn(page_list[0]);
for (i = 0; i < pinned; i++) {
unsigned long this_pfn = page_to_pfn(page_list[i]);
u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
/* Pin a contiguous chunk of memory */
csize = PFN_PHYS(last_pfn - map_pfn + 1);
ret = vhost_vdpa_map(v, iotlb, iova, csize,
PFN_PHYS(map_pfn),
perm, NULL);
if (ret) {
/*
* Unpin the pages that are left unmapped
* from this point on in the current
* page_list. The remaining outstanding
* ones which may stride across several
* chunks will be covered in the common
* error path subsequently.
*/
unpin_user_pages(&page_list[i],
pinned - i);
goto out;
}
map_pfn = this_pfn;
iova += csize;
nchunks = 0;
}
last_pfn = this_pfn;
}
cur_base += PFN_PHYS(pinned);
npages -= pinned;
}
/* Pin the rest chunk */
ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
PFN_PHYS(map_pfn), perm, NULL);
out:
if (ret) {
if (nchunks) {
unsigned long pfn;
/*
* Unpin the outstanding pages which are yet to be
* mapped but haven't due to vdpa_map() or
* pin_user_pages() failure.
*
* Mapped pages are accounted in vdpa_map(), hence
* the corresponding unpinning will be handled by
* vdpa_unmap().
*/
WARN_ON(!last_pfn);
for (pfn = map_pfn; pfn <= last_pfn; pfn++)
unpin_user_page(pfn_to_page(pfn));
}
vhost_vdpa_unmap(v, iotlb, start, size);
}
unlock:
mmap_read_unlock(dev->mm);
free:
free_page((unsigned long)page_list);
return ret;
}
static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
struct vhost_iotlb *iotlb,
struct vhost_iotlb_msg *msg)
{
struct vdpa_device *vdpa = v->vdpa;
if (msg->iova < v->range.first || !msg->size ||
msg->iova > U64_MAX - msg->size + 1 ||
msg->iova + msg->size - 1 > v->range.last)
return -EINVAL;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
msg->iova + msg->size - 1))
return -EEXIST;
if (vdpa->use_va)
return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
msg->uaddr, msg->perm);
return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
msg->perm);
}
static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
struct vhost_iotlb_msg *msg)
{
struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_iotlb *iotlb = NULL;
struct vhost_vdpa_as *as = NULL;
int r = 0;
mutex_lock(&dev->mutex);
r = vhost_dev_check_owner(dev);
if (r)
goto unlock;
if (msg->type == VHOST_IOTLB_UPDATE ||
msg->type == VHOST_IOTLB_BATCH_BEGIN) {
as = vhost_vdpa_find_alloc_as(v, asid);
if (!as) {
dev_err(&v->dev, "can't find and alloc asid %d\n",
asid);
r = -EINVAL;
goto unlock;
}
iotlb = &as->iotlb;
} else
iotlb = asid_to_iotlb(v, asid);
if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
if (v->in_batch && v->batch_asid != asid) {
dev_info(&v->dev, "batch id %d asid %d\n",
v->batch_asid, asid);
}
if (!iotlb)
dev_err(&v->dev, "no iotlb for asid %d\n", asid);
r = -EINVAL;
goto unlock;
}
switch (msg->type) {
case VHOST_IOTLB_UPDATE:
r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
break;
case VHOST_IOTLB_INVALIDATE:
vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
break;
case VHOST_IOTLB_BATCH_BEGIN:
v->batch_asid = asid;
v->in_batch = true;
break;
case VHOST_IOTLB_BATCH_END:
if (v->in_batch && ops->set_map)
ops->set_map(vdpa, asid, iotlb);
v->in_batch = false;
break;
default:
r = -EINVAL;
break;
}
unlock:
mutex_unlock(&dev->mutex);
return r;
}
static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct vhost_vdpa *v = file->private_data;
struct vhost_dev *dev = &v->vdev;
return vhost_chr_write_iter(dev, from);
}
static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct device *dma_dev = vdpa_get_dma_dev(vdpa);
struct bus_type *bus;
int ret;
/* Device want to do DMA by itself */
if (ops->set_map || ops->dma_map)
return 0;
bus = dma_dev->bus;
if (!bus)
return -EFAULT;
if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY))
return -ENOTSUPP;
v->domain = iommu_domain_alloc(bus);
if (!v->domain)
return -EIO;
ret = iommu_attach_device(v->domain, dma_dev);
if (ret)
goto err_attach;
return 0;
err_attach:
iommu_domain_free(v->domain);
v->domain = NULL;
return ret;
}
static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
struct device *dma_dev = vdpa_get_dma_dev(vdpa);
if (v->domain) {
iommu_detach_device(v->domain, dma_dev);
iommu_domain_free(v->domain);
}
v->domain = NULL;
}
static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
{
struct vdpa_iova_range *range = &v->range;
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
if (ops->get_iova_range) {
*range = ops->get_iova_range(vdpa);
} else if (v->domain && v->domain->geometry.force_aperture) {
range->first = v->domain->geometry.aperture_start;
range->last = v->domain->geometry.aperture_end;
} else {
range->first = 0;
range->last = ULLONG_MAX;
}
}
static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
{
struct vhost_vdpa_as *as;
u32 asid;
for (asid = 0; asid < v->vdpa->nas; asid++) {
as = asid_to_as(v, asid);
if (as)
vhost_vdpa_remove_as(v, asid);
}
vhost_vdpa_free_domain(v);
vhost_dev_cleanup(&v->vdev);
kfree(v->vdev.vqs);
}
static int vhost_vdpa_open(struct inode *inode, struct file *filep)
{
struct vhost_vdpa *v;
struct vhost_dev *dev;
struct vhost_virtqueue **vqs;
int r, opened;
u32 i, nvqs;
v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
opened = atomic_cmpxchg(&v->opened, 0, 1);
if (opened)
return -EBUSY;
nvqs = v->nvqs;
r = vhost_vdpa_reset(v);
if (r)
goto err;
vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
if (!vqs) {
r = -ENOMEM;
goto err;
}
dev = &v->vdev;
for (i = 0; i < nvqs; i++) {
vqs[i] = &v->vqs[i];
vqs[i]->handle_kick = handle_vq_kick;
}
vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
vhost_vdpa_process_iotlb_msg);
r = vhost_vdpa_alloc_domain(v);
if (r)
goto err_alloc_domain;
vhost_vdpa_set_iova_range(v);
filep->private_data = v;
return 0;
err_alloc_domain:
vhost_vdpa_cleanup(v);
err:
atomic_dec(&v->opened);
return r;
}
static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
{
u32 i;
for (i = 0; i < v->nvqs; i++)
vhost_vdpa_unsetup_vq_irq(v, i);
}
static int vhost_vdpa_release(struct inode *inode, struct file *filep)
{
struct vhost_vdpa *v = filep->private_data;
struct vhost_dev *d = &v->vdev;
mutex_lock(&d->mutex);
filep->private_data = NULL;
vhost_vdpa_clean_irq(v);
vhost_vdpa_reset(v);
vhost_dev_stop(&v->vdev);
vhost_vdpa_config_put(v);
vhost_vdpa_cleanup(v);
mutex_unlock(&d->mutex);
atomic_dec(&v->opened);
complete(&v->completion);
return 0;
}
#ifdef CONFIG_MMU
static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
{
struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct vdpa_notification_area notify;
struct vm_area_struct *vma = vmf->vma;
u16 index = vma->vm_pgoff;
notify = ops->get_vq_notification(vdpa, index);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
PFN_DOWN(notify.addr), PAGE_SIZE,
vma->vm_page_prot))
return VM_FAULT_SIGBUS;
return VM_FAULT_NOPAGE;
}
static const struct vm_operations_struct vhost_vdpa_vm_ops = {
.fault = vhost_vdpa_fault,
};
static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
{
struct vhost_vdpa *v = vma->vm_file->private_data;
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct vdpa_notification_area notify;
unsigned long index = vma->vm_pgoff;
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
if ((vma->vm_flags & VM_SHARED) == 0)
return -EINVAL;
if (vma->vm_flags & VM_READ)
return -EINVAL;
if (index > 65535)
return -EINVAL;
if (!ops->get_vq_notification)
return -ENOTSUPP;
/* To be safe and easily modelled by userspace, We only
* support the doorbell which sits on the page boundary and
* does not share the page with other registers.
*/
notify = ops->get_vq_notification(vdpa, index);
if (notify.addr & (PAGE_SIZE - 1))
return -EINVAL;
if (vma->vm_end - vma->vm_start != notify.size)
return -ENOTSUPP;
vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_ops = &vhost_vdpa_vm_ops;
return 0;
}
#endif /* CONFIG_MMU */
static const struct file_operations vhost_vdpa_fops = {
.owner = THIS_MODULE,
.open = vhost_vdpa_open,
.release = vhost_vdpa_release,
.write_iter = vhost_vdpa_chr_write_iter,
.unlocked_ioctl = vhost_vdpa_unlocked_ioctl,
#ifdef CONFIG_MMU
.mmap = vhost_vdpa_mmap,
#endif /* CONFIG_MMU */
.compat_ioctl = compat_ptr_ioctl,
};
static void vhost_vdpa_release_dev(struct device *device)
{
struct vhost_vdpa *v =
container_of(device, struct vhost_vdpa, dev);
ida_simple_remove(&vhost_vdpa_ida, v->minor);
kfree(v->vqs);
kfree(v);
}
static int vhost_vdpa_probe(struct vdpa_device *vdpa)
{
const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_vdpa *v;
int minor;
int i, r;
/* We can't support platform IOMMU device with more than 1
* group or as
*/
if (!ops->set_map && !ops->dma_map &&
(vdpa->ngroups > 1 || vdpa->nas > 1))
return -EOPNOTSUPP;
v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!v)
return -ENOMEM;
minor = ida_simple_get(&vhost_vdpa_ida, 0,
VHOST_VDPA_DEV_MAX, GFP_KERNEL);
if (minor < 0) {
kfree(v);
return minor;
}
atomic_set(&v->opened, 0);
v->minor = minor;
v->vdpa = vdpa;
v->nvqs = vdpa->nvqs;
v->virtio_id = ops->get_device_id(vdpa);
device_initialize(&v->dev);
v->dev.release = vhost_vdpa_release_dev;
v->dev.parent = &vdpa->dev;
v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
GFP_KERNEL);
if (!v->vqs) {
r = -ENOMEM;
goto err;
}
r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
if (r)
goto err;
cdev_init(&v->cdev, &vhost_vdpa_fops);
v->cdev.owner = THIS_MODULE;
r = cdev_device_add(&v->cdev, &v->dev);
if (r)
goto err;
init_completion(&v->completion);
vdpa_set_drvdata(vdpa, v);
for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
INIT_HLIST_HEAD(&v->as[i]);
return 0;
err:
put_device(&v->dev);
return r;
}
static void vhost_vdpa_remove(struct vdpa_device *vdpa)
{
struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
int opened;
cdev_device_del(&v->cdev, &v->dev);
do {
opened = atomic_cmpxchg(&v->opened, 0, 1);
if (!opened)
break;
wait_for_completion(&v->completion);
} while (1);
put_device(&v->dev);
}
static struct vdpa_driver vhost_vdpa_driver = {
.driver = {
.name = "vhost_vdpa",
},
.probe = vhost_vdpa_probe,
.remove = vhost_vdpa_remove,
};
static int __init vhost_vdpa_init(void)
{
int r;
r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
"vhost-vdpa");
if (r)
goto err_alloc_chrdev;
r = vdpa_register_driver(&vhost_vdpa_driver);
if (r)
goto err_vdpa_register_driver;
return 0;
err_vdpa_register_driver:
unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
err_alloc_chrdev:
return r;
}
module_init(vhost_vdpa_init);
static void __exit vhost_vdpa_exit(void)
{
vdpa_unregister_driver(&vhost_vdpa_driver);
unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
}
module_exit(vhost_vdpa_exit);
MODULE_VERSION("0.0.1");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");