Merge keystone/android14-6.1-keystone-qcom-release.6.1.1 (925907e) into msm-pineapple

* refs/heads/tmp-925907e:
  ANDROID: GKI: Remove CONFIG_LOCALVERSION="-mainline" on 6.1 branch
  ANDROID: gki_defconfig: sample large page_alloc allocations with HW_TAGS KASAN
  FROMLIST: kasan: allow sampling page_alloc allocations for HW_TAGS
  ANDROID: fscrypt, blk-crypto: drop HW-wrapped key compatibility check
  ANDROID: GKI: Enable CONFIG_NF_CONNTRACK_PROCFS
  ANDROID: mm: arm64: Allow remapping logical mappings as uncached
  ANDROID: fuse-bpf: Fix crash from assuming iter is kvec
  ANDROID: fuse-bpf: Add /sys/fs flags for fuse-bpf version
  ANDROID: fuse-bpf: Make sure to declare functions
  ANDROID: fuse-bpf v1.1
  ANDROID: KVM: arm64: Add helper for pKVM modules addr conversion
  ANDROID: timer: Add vendor hook for timer calc index
  ANDROID: KVM: arm64: Fix calculation for number of relocs in .hyp.reloc
  ANDROID: KVM: arm64: Ignore modules with empty .hyp.text section
  Revert "ANDROID: KVM: arm64: Make gen-hyprel emit delimiters"
  ANDROID: KVM: arm64: Resolve hyp module addresses using ELF sections
  ANDROID: dma-buf: Add vendorhook to allow mmaping more memory than a DMA-BUF holds
  ANDROID: fips140: add fips140_lab_util program
  ANDROID: fips140: add kernel crypto module
  ANDROID: arm64: simd: omit capability check in may_use_simd()
  ANDROID: arm64: disable LSE when building the FIPS140 module
  ANDROID: arm64: only permit certain alternatives in the FIPS140 module
  ANDROID: jump_label: disable jump labels in fips140.ko
  ANDROID: crypto: define fips_enabled to 1 in fips140.ko
  ANDROID: crypto: lib/aes - add vendor hooks for AES library routines
  ANDROID: crypto: lib/sha256 - add vendor hook for sha256() routine
  ANDROID: kbuild: customize module linker script for fips140 module
  ANDROID: GKI: Remove usage of __GENKSYMS__ in vendor hooks source
  ANDROID: iommu: Add vendor hook to alloc_iova()
  ANDROID: iommu: Add vendor hook to select alloc_iova algorithm
  ANDROID: iommu: Add a vendor field in iova_domain
  ANDROID: usb: gadget: f_accessory: update SS/SSP descriptors
  ANDROID: kbuild: Search external devicetree path when running clean target
  Revert "ANDROID: KVM: arm64: Coalesce host stage2 entries on ownership reclaim"
  ANDROID: KVM: arm64: Keep the pKVM private range under 1GiB
  ANDROID: KVM: arm64: Specify stage-2-protected regions in DT
  ANDROID: KVM: arm64: Introduce concept of pKVM moveable regions
  ANDROID: KVM: arm64: Correctly flag MMIO pages as PKVM_PAGE_RESTRICTED_PROT
  ANDROID: KVM: arm64: Introduce default_host_prot()
  ANDROID: KVM: arm64: Introduce a hyp panic module notifier
  ANDROID: KVM: arm64: Expose linear map APIs to pKVM modules
  ANDROID: scheduler: add vendor-specific wake flag
  ANDROID: Add a vendor hook that allow a module to modify the wake flag
  ANDROID: futex: Add vendor hook for wait queue
  ANDROID: rwsem: Add vendor hook to the rw-semaphore
  FROMLIST: sched/pelt: Introduce PELT multiplier
  ANDROID: GKI: Export clocksource_mmio_init
  ANDROID: update "fscrypt: add support for hardware-wrapped keys" to v7
  ANDROID: update "dm: add support for passing through derive_sw_secret"
  ANDROID: update "block: add basic hardware-wrapped key support" to v7
  ANDROID: dm-default-key: update for blk-crypto changes
  UPSTREAM: blk-crypto: Add a missing include directive
  UPSTREAM: blk-crypto: move internal only declarations to blk-crypto-internal.h
  BACKPORT: blk-crypto: add a blk_crypto_config_supported_natively helper
  BACKPORT: blk-crypto: don't use struct request_queue for public interfaces
  f2fs: let's avoid panic if extent_tree is not created
  f2fs: should use a temp extent_info for lookup
  f2fs: don't mix to use union values in extent_info
  f2fs: initialize extent_cache parameter
  f2fs: fix to avoid NULL pointer dereference in f2fs_issue_flush()
  ANDROID: update the BRANCH constant
  ANDROID: inline isolate_and_split_free_page
  ANDROID: mm: compaction: fix isolate_and_split_free_page() redefinition
  ANDROID: implement wrapper for reverse migration
  ANDROID: KVM: Remove function_nocfi() leftover in pKVM modules
  ANDROID: KVM: arm64: Always declare pKVM module loading functions
  ANDROID: GKI: Source GKI_BUILD_CONFIG_FRAGMENT after setting all variables
  ANDROID: cpuidle: export cpuidle_driver_state_disabled
  UPSTREAM: mm/madvise: fix madvise_pageout for private file mappings
  ANDROID: KVM: arm64: Allow trap handling from pKVM modules
  ANDROID: KVM: arm64: Notify pKVM modules of PSCI events
  ANDROID: KVM: arm64: Allow handling illegal aborts from pKVM modules
  ANDROID: KVM: arm64: Allow SMC handling from pKVM modules
  fscrypt: add additional documentation for SM4 support
  fscrypt: remove unused Speck definitions
  fscrypt: Add SM4 XTS/CTS symmetric algorithm support
  blk-crypto: Add support for SM4-XTS blk crypto mode
  blk-crypto: pass a gendisk to blk_crypto_sysfs_{,un}register
  fscrypt: add comment for fscrypt_valid_enc_modes_v1()
  blk-crypto: Add a missing include directive
  blk-crypto: move internal only declarations to blk-crypto-internal.h
  blk-crypto: add a blk_crypto_config_supported_natively helper
  blk-crypto: don't use struct request_queue for public interfaces
  fscrypt: pass super_block to fscrypt_put_master_key_activeref()
  Linux 6.1.1
  KEYS: encrypted: fix key instantiation with user-provided data
  cifs: fix oops during encryption
  usb: dwc3: pci: Update PCIe device ID for USB3 controller on CPU sub-system for Raptor Lake
  usb: typec: ucsi: Resume in separate work
  igb: Initialize mailbox message for VF reset
  staging: r8188eu: fix led register settings
  xhci: Apply XHCI_RESET_TO_DEFAULT quirk to ADL-N
  ALSA: hda/realtek: fix mute/micmute LEDs for a HP ProBook
  USB: serial: f81534: fix division by zero on line-speed change
  USB: serial: f81232: fix division by zero on line-speed change
  USB: serial: cp210x: add Kamstrup RF sniffer PIDs
  USB: serial: option: add Quectel EM05-G modem
  usb: gadget: uvc: Prevent buffer overflow in setup handler
  udf: Fix extending file within last block
  udf: Do not bother looking for prealloc extents if i_lenExtents matches i_size
  udf: Fix preallocation discarding at indirect extent boundary
  udf: Discard preallocation before extending file with a hole
  irqchip/ls-extirq: Fix endianness detection
  mips: ralink: mt7621: do not use kzalloc too early
  mips: ralink: mt7621: soc queries and tests as functions
  mips: ralink: mt7621: define MT7621_SYSC_BASE with __iomem
  PCI: mt7621: Add sentinel to quirks table
  libbpf: Fix uninitialized warning in btf_dump_dump_type_data
  x86/vdso: Conditionally export __vdso_sgx_enter_enclave()
  Revert "ANDROID: GKI: remove CONFIG_CMDLINE_EXTEND from arm64 gki_defconfig"
  ANDROID: Revert "arm64: Drop support for CMDLINE_EXTEND"
  ANDROID: of: Support CONFIG_CMDLINE_EXTEND config option
  FROMGIT: asm-generic/io: Add _RET_IP_ to MMIO trace for more accurate debug info
  ANDROID: firmware_loader: Add support for customer firmware paths
  ANDROID: GKI: Enable CONFIG_RT_SOFTIRQ_AWARE_SCHED
  FROMLIST: trace: Add trace points for tasklet entry/exit
  FROMLIST: softirq: defer softirq processing to ksoftirqd if CPU is busy with RT
  FROMLIST: sched: Avoid placing RT threads on cores handling long softirqs
  FROMLIST: softirq: Add generic accessor to percpu softirq_pending data
  ANDROID: sched/cpuset: Add vendor hook to change tasks affinity
  ANDROID: cpuset: Make cpusets restore on hotplug
  fsverity: simplify fsverity_get_digest()
  fsverity: stop using PG_error to track error status
  f2fs: reset wait_ms to default if any of the victims have been selected
  f2fs: fix some format WARNING in debug.c and sysfs.c
  f2fs: don't call f2fs_issue_discard_timeout() when discard_cmd_cnt is 0 in f2fs_put_super()
  f2fs: fix iostat parameter for discard
  f2fs: Fix spelling mistake in label: free_bio_enrty_cache -> free_bio_entry_cache
  f2fs: add block_age-based extent cache
  f2fs: allocate the extent_cache by default
  f2fs: refactor extent_cache to support for read and more
  f2fs: remove unnecessary __init_extent_tree
  f2fs: move internal functions into extent_cache.c
  f2fs: specify extent cache for read explicitly
  f2fs: introduce f2fs_is_readonly() for readability
  f2fs: remove F2FS_SET_FEATURE() and F2FS_CLEAR_FEATURE() macro
  f2fs: do some cleanup for f2fs module init
  MAINTAINERS: Add f2fs bug tracker link
  f2fs: remove the unused flush argument to change_curseg
  f2fs: open code allocate_segment_by_default
  f2fs: remove struct segment_allocation default_salloc_ops
  f2fs: introduce discard_urgent_util sysfs node
  f2fs: define MIN_DISCARD_GRANULARITY macro
  f2fs: init discard policy after thread wakeup
  f2fs: avoid victim selection from previous victim section
  f2fs: truncate blocks in batch in __complete_revoke_list()
  f2fs: make __queue_discard_cmd() return void
  f2fs: fix description about discard_granularity node
  f2fs: move set_file_temperature into f2fs_new_inode
  f2fs: fix to enable compress for newly created file if extension matches
  f2fs: set zstd compress level correctly
  f2fs: change type for 'sbi->readdir_ra'
  f2fs: cleanup for 'f2fs_tuning_parameters' function
  f2fs: fix to alloc_mode changed after remount on a small volume device
  f2fs: remove submit label in __submit_discard_cmd()
  f2fs: fix to do sanity check on i_extra_isize in is_alive()
  f2fs: introduce F2FS_IOC_START_ATOMIC_REPLACE
  f2fs: fix to set flush_merge opt and show noflush_merge
  f2fs: initialize locks earlier in f2fs_fill_super()
  f2fs: optimize iteration over sparse directories
  f2fs: fix to avoid accessing uninitialized spinlock
  f2fs: correct i_size change for atomic writes
  f2fs: add proc entry to show discard_plist info
  f2fs: allow to read node block after shutdown
  f2fs: replace ternary operator with max()
  f2fs: replace gc_urgent_high_remaining with gc_remaining_trials
  f2fs: add missing bracket in doc
  f2fs: use sysfs_emit instead of sprintf
  f2fs: introduce gc_mode sysfs node
  f2fs: fix to destroy sbi->post_read_wq in error path of f2fs_fill_super()
  f2fs: fix return val in f2fs_start_ckpt_thread()
  f2fs: fix the msg data type
  f2fs: fix the assign logic of iocb
  f2fs: Fix typo in comments
  f2fs: introduce max_ordered_discard sysfs node
  f2fs: allow to set compression for inlined file
  f2fs: add barrier mount option
  f2fs: fix normal discard process
  f2fs: cleanup in f2fs_create_flush_cmd_control()
  f2fs: fix gc mode when gc_urgent_high_remaining is 1
  f2fs: remove batched_trim_sections node
  f2fs: support fault injection for f2fs_is_valid_blkaddr()
  f2fs: fix to invalidate dcc->f2fs_issue_discard in error path
  f2fs: Fix the race condition of resize flag between resizefs
  f2fs: let's avoid to get cp_rwsem twice by f2fs_evict_inode by d_invalidate
  f2fs: should put a page when checking the summary info
  ANDROID: GKI: Update GKI modules protected exports
  ANDROID: GKI: Add list of protected GKI modules
  ANDROID: GKI: Only protect exports if KMI symbols are present
  ANDROID: GKI: Protect exports of protected GKI modules
  UPSTREAM: crypto: algboss - compile out test-related code when tests disabled
  UPSTREAM: crypto: kdf - silence noisy self-test
  UPSTREAM: crypto: kdf - skip self-test when tests disabled
  UPSTREAM: crypto: api - compile out crypto_boot_test_finished when tests disabled
  UPSTREAM: crypto: algboss - optimize registration of internal algorithms
  UPSTREAM: crypto: api - optimize algorithm registration when self-tests disabled
  ANDROID: KVM: arm64: Add support for non-cacheable mappings
  ANDROID: KVM: arm64: Don't filter out KVM_FUNC_MMIO_GUARD_MAP hypercalls
  ANDROID: KVM: arm64: Coalesce host stage2 entries on ownership reclaim
  ANDROID: KVM: arm64: Move kvm_pte_table to the common header
  ANDROID: KVM: arm64: Have different callbacks for PTE manipulation
  ANDROID: KVM: arm64: Move PTE attributes definitions to the common header
  ANDROID: KVM: arm64: Split stage2_put_pte function
  ANDROID: KVM: arm64: Pass the pagetable struct as an argument to the freewalker
  ANDROID: KVM: arm64: Fix link with CONFIG_MODULES=n
  ANDROID: KVM: arm64: Fix build with CONFIG_MODULES=n
  ANDROID: KVM: arm64: Block module loading based on cmdline or HVC
  ANDROID: KVM: arm64: Support unaligned fixmap in the nVHE hyp
  ANDROID: KVM: arm64: Add support for custom hypercall registration
  ANDROID: KVM: arm64: Return a token for a pKVM module registration
  ANDROID: KVM: arm64: Introduce hyp_protect_host_page()
  ANDROID: KVM: arm64: Add a permission fault handler
  ANDROID: KVM: arm64: Introduce PKVM_PAGE_RESTRICTED_PROT
  ANDROID: KVM: arm64: Expose kvm_flush_dcache_to_poc() in module_ops
  ANDROID: KVM: arm64: Expose hyp fixmap helpers in module_ops
  ANDROID: KVM: arm64: Expose puts and putx64 in pKVM ABI
  ANDROID: KVM: arm64: Add serial framework for pKVM
  ANDROID: KVM: arm64: Expose __pkvm_create_private_mapping to pKVM modules
  ANDROID: KVM: arm64: Include .note.gnu.property in .hyp.rodata
  ANDROID: KVM: arm64: Allow loading modules to the pKVM hypervisor
  ANDROID: KVM: arm64: Refactor nvhe Makefile
  ANDROID: KVM: arm64: Make gen-hyprel emit delimiters
  ANDROID: KVM: arm64: Move gen-hyprel into a tool directory
  ANDROID: KVM: arm64: Add mapping removal interface for nVHE hyp
  ANDROID: arm64: patching: Add aarch64_addr_write()
  ANDROID: arm64: patching: Refactor __aarch64_insn_write()
  ANDROID: KVM: arm64: Use correct pkvm owners type
  ANDROID: KVM: arm64: s2mpu: S2MPU V9 code
  ANDROID: KVM: arm64: s2mpu: Add MMIO and defines for V9 S2MPU
  ANDROID: KVM: arm64: s2mpu: rename versions to match major arch
  ANDROID: KVM: arm64: s2mpu: Abstract register initialization with version_ops
  ANDROID: KVM: arm64: s2mpu: Pass driver version during init
  ANDROID: KVM: arm64: s2mpu: Add SMPT and MPT functions to pgtable abstraction
  ANDROID: KVM: arm64: s2mpu: Abstract page table ops
  ANDROID: KVM: arm64: iommu: Support dynamic driver registration in IOMMU layer
  ANDROID: KVM: arm64: Use 32-bit function ID for PSCI MEM_PROTECT call
  Revert "ANDROID: virtio_balloon: New module parameter "pkvm""
  ANDROID: KVM: arm64: s2mpu: Fix SYNC latency regression
  ANDROID: KVM: arm64: iommu: Add host_stage2_idmap_complete
  ANDROID: KVM: arm64: Don't update IOMMUs unnecessarily
  ANDROID: KVM: arm64: s2mpu: Add SysMMU_SYNC timeout
  ANDROID: KVM: arm64: s2mpu: Allow r/o access to control regs
  ANDROID: KVM: arm64: s2mpu: Allow reading MPTC entries
  ANDROID: KVM: arm64: s2mpu: Allow L1ENTRY_* r/o access
  ANDROID: KVM: arm64: s2mpu: Refactor DABT handler
  ANDROID: KVM: arm64: s2mpu: Extract L1ENTRY_* consts
  ANDROID: KVM: arm64: s2mpu: Initialize MPTs to PROT_RW
  ANDROID: KVM: arm64: iommu: Optimize snapshot_host_stage2
  ANDROID: KVM: arm64: iommu: Fix upper bound of PT walk
  ANDROID: KVM: arm64: iommu: Add pkvm_iommu_finalize
  ANDROID: KVM: arm64: iommu: No powered check in DABT handler
  ANDROID: KVM: arm64: s2mpu: Create SysMMU_SYNC driver
  ANDROID: KVM: arm64: iommu: Create parent/child relation
  ANDROID: KVM: arm64: iommu: Run validate() on struct pkvm_iommu
  ANDROID: KVM: arm64: iommu: Create private mapping last
  ANDROID: KVM: arm64: iommu: Free memory on registration error
  ANDROID: KVM: arm64: iommu: Harden __pkvm_iommu_pm_notify
  ANDROID: KVM: arm64: Remove unused IOMMU hooks, kvm_iommu_ops
  ANDROID: KVM: arm64: s2mpu: Implement host stage2 idmap callbacks
  ANDROID: KVM: arm64: s2mpu: Move mpt_update_flags into FMPT
  ANDROID: KVM: arm64: s2mpu: Replace DABT handler with callback
  ANDROID: KVM: arm64: s2mpu: Replace SMC handler with PM callbacks
  ANDROID: KVM: arm64: s2mpu: Add driver initializer
  ANDROID: KVM: arm64: s2mpu: Remove host_stage2_adjust_mmio_range
  ANDROID: KVM: arm64: s2mpu: Replace struct s2mpu with pkvm_iommu
  ANDROID: KVM: arm64: s2mpu: Remove all EL1 code
  ANDROID: KVM: arm64: s2mpu: Move SFR init to EL2
  ANDROID: KVM: arm64: iommu: Snapshot host stage-2 at driver init
  ANDROID: KVM: arm64: iommu: Host stage-2 idmap callbacks
  ANDROID: KVM: arm64: iommu: DABT handler callback
  ANDROID: KVM: arm64: iommu: Suspend/resume callbacks
  ANDROID: KVM: arm64: iommu: Register device hypcall
  ANDROID: KVM: arm64: iommu: Avoid mapping devices in host stage-2
  ANDROID: KVM: arm64: iommu: Driver initialization hypcall
  ANDROID: KVM: arm64: Fix host MMIO DABT handler IPA
  ANDROID: KVM: arm64: Wait on S2MPU.STATUS after invalidation
  ANDROID: KVM: arm64: Remove kernel-doc in S2MPU driver
  ANDROID: KVM: arm64: Initialize pkvm_pgtable.mm_ops earlier
  ANDROID: KVM: arm64: Mark select_iommu_ops static
  ANDROID: Enable KVM_S2MPU in gki_defconfig
  ANDROID: KVM: arm64: Unmap S2MPU MMIO registers from host stage-2
  ANDROID: KVM: arm64: Implement MMIO handler in S2MPU driver
  ANDROID: KVM: arm64: Modify S2MPU MPT in 'host_stage2_set_owner'
  ANDROID: KVM: arm64: Set up S2MPU Memory Protection Table
  ANDROID: KVM: arm64: Reprogram S2MPUs in 'host_smc_handler'
  ANDROID: KVM: arm64: Enable S2MPUs in __pkvm_init_stage2_iommu
  ANDROID: KVM: arm64: Copy S2MPU configuration to hyp
  ANDROID: KVM: arm64: Implement IRQ handler for S2MPU faults
  ANDROID: KVM: arm64: Allocate context IDs for valid VIDs
  ANDROID: KVM: arm64: Read and check S2MPU_VERSION
  ANDROID: KVM: arm64: Parse S2MPU MMIO region
  ANDROID: KVM: arm64: Create empty S2MPU driver
  ANDROID: KVM: arm64: Add 'host_stage2_adjust_mmio_range' to kvm_iommu_ops
  ANDROID: KVM: arm64: Add 'host_mmio_dabt_handler' to kvm_iommu_ops
  ANDROID: KVM: arm64: Add 'host_stage2_set_owner' to kvm_iommu_ops
  ANDROID: KVM: arm64: Add 'host_smc_handler' to kvm_iommu_ops
  ANDROID: KVM: arm64: Introduce IOMMU driver infrastructure
  ANDROID: KVM: arm64: Update pKVM hyp state series to v6
  ANDROID: KVM: arm64: Add protected_shared_mem statistic
  ANDROID: KVM: arm64: count KVM s2 mmu usage in nVHE protected mode
  ANDROID: KVM: arm64: Add protected_hyp_mem VM statistic
  ANDROID: KVM: arm64: Fix sparse __percpu warning
  ANDROID: KVM: arm64: Relax SMCCC version check during FF-A proxy init
  ANDROID: KVM: arm64: Increase size of FF-A buffer
  BACKPORT: FROMLIST: KVM: arm64: pkvm: Add support for fragmented FF-A descriptors
  FROMLIST: KVM: arm64: Handle FFA_MEM_LEND calls from the host
  FROMLIST: KVM: arm64: Handle FFA_MEM_RECLAIM calls from the host
  FROMLIST: KVM: arm64: Handle FFA_MEM_SHARE calls from the host
  BACKPORT: FROMLIST: KVM: arm64: Add FF-A helpers to share/unshare memory with secure world
  FROMLIST: KVM: arm64: Handle FFA_RXTX_MAP and FFA_RXTX_UNMAP calls from the host
  FROMLIST: KVM: arm64: Allocate pages for hypervisor FF-A mailboxes
  FROMLIST: KVM: arm64: Handle FFA_FEATURES call from the host
  BACKPORT: FROMLIST: KVM: arm64: Probe FF-A version and host/hyp partition ID during init
  FROMLIST: KVM: arm64: Block unsafe FF-A calls from the host
  FROMLIST: firmware: arm_ffa: Move comment before the field it is documenting
  FROMLIST: firmware: arm_ffa: Move constants to header file
  ANDROID: KVM: arm64: Issue CMOs when tearing down shadow pages
  ANDROID: KVM: arm64: Use PSCI MEM_PROTECT to zap guest pages on reset
  ANDROID: KVM: arm64: Check IPA range for pvmfw during guest donation
  ANDROID: KVM: arm64: Use fixmap when poisoning pvmfw pages
  ANDROID: KVM: arm64: Rename pkvm_clear_pvmfw_pages()
  ANDROID: KVM: arm64: Rename hyp_zero_page() and make available as helper
  ANDROID: KVM: arm64: Don't check for hyp_fixmap_map() returning NULL
  ANDROID: virtio_balloon: Do not clear VIRTIO_F_ACCESS_PLATFORM
  ANDROID: virtio_balloon: New module parameter "pkvm"
  ANDROID: KVM: arm64: Introduce kvm_has_memrelinquish_services
  ANDROID: KVM: arm64: Flush nVHE hyp_vcpu memcache
  ANDROID: KVM: arm64: Avoid unnecessary unmap walk in MEM_RELINQUISH hypercall
  ANDROID: KVM: arm64: Strictly check page type in MEM_RELINQUISH hypercall
  ANDROID: KVM: Include prototype for page_relinquish before definition
  ANDROID: arm64: ioremap/iounmap use stage-2 granule size
  ANDROID: arm64: Check if pfn is valid for all ioremap loop iterations
  ANDROID: arm64: Auto-enroll MMIO guard on protected vms
  ANDROID: KVM: arm64: Add some documentation for the MMIO guard feature
  ANDROID: KVM: arm64: Plumb MMIO checking into the fault handling
  ANDROID: KVM: arm64: pkvm: Wire MMIO guard hypercalls
  ANDROID: KVM: arm64: pkvm: Add MMIO guard infrastructure
  ANDROID: KVM: arm64: Introduce KVM_ARCH_FLAG_MMIO_GUARD flag
  ANDROID: KVM: arm64: Expose topup_hyp_memcache() to the rest of KVM
  ANDROID: KVM: arm64: Define MMIO guard hypercalls
  ANDROID: KVM: arm64: FAR_EL2 mask as a define
  ANDROID: KVM: arm64: Turn kvm_pgtable_stage2_set_owner into kvm_pgtable_stage2_annotate
  ANDROID: memory relinquish: Fix build dependencies
  ANDROID: KVM: arm64: Monitor Debug support for non-protected guests
  ANDROID: KVM: arm64: Factor out code for saving/restoring guest debug regs
  ANDROID: KVM: arm64: Flush the vcpu iflags for non-protected VMs
  ANDROID: virtio_balloon: Do not translate reported pages through DMA API
  ANDROID: KVM: arm64: balloon: Notify hyp before reporting free pages to host
  ANDROID: KVM: arm64: memory balloon: Notify hyp when ballooning
  ANDROID: Define mem_relinquish interface for releasing memory to a hypervisor.
  ANDROID: KVM: arm64: Implement MEM_RELINQUISH SMCCC hypercall
  ANDROID: KVM: arm64: Turn llist of pinned pages into an rb-tree
  FROMLIST: KVM: arm64: pkvm: Fixup boot mode to reflect that the kernel resumes from EL1
  ANDROID: KVM: arm64: Add .hyp.data section
  ANDROID: KVM: arm64: relay entropy requests from protected guests directly to secure
  ANDROID: KVM: arm64: Introduce KVM_CAP_ARM_PROTECTED_VM to set/query PVM firmware
  ANDROID: KVM: arm64: Reset primary vCPU according to PVM firmware boot protocol
  ANDROID: KVM: arm64: Copy pvmfw into guest pages during donation from the host
  ANDROID: KVM: arm64: Clear pvmfw pages on clean host shutdown
  ANDROID: KVM: arm64: Ignore length of 0 in kvm_flush_dcache_to_poc()
  ANDROID: KVM: arm64: Unmap PVM firmware from host stage-2 during de-privilege
  ANDROID: KVM: arm64: Parse reserved-memory node for pkvm guest firmware region
  ANDROID: Documentation: KVM: Add some documentation for Protected KVM on arm64
  ANDROID: BACKPORT: KVM: arm64: Introduce KVM_VM_TYPE_ARM_PROTECTED machine type for PVMs
  ANDROID: KVM: arm64: Expose memory sharing hypercalls to protected guests
  ANDROID: KVM: arm64: Reformat/beautify PTP hypercall documentation
  ANDROID: KVM: arm64: Document the KVM/arm64-specific calls in hypercalls.rst
  ANDROID: KVM: arm64: Rename firmware pseudo-register documentation file
  ANDROID: KVM: arm64: Extend memory sharing to allow guest-to-host transitions
  ANDROID: KVM: arm64: Avoid BBM when changing only s/w bits in Stage-2 PTE
  ANDROID: KVM: arm64: Support TLB invalidation in guest context
  ANDROID: KVM: arm64: Inject SIGSEGV on illegal accesses
  ANDROID: KVM: arm64: Refactor enter_exception64()
  ANDROID: KVM: arm64: Add is_pkvm_initialized() helper
  ANDROID: KVM: arm64: Don't expose TLBI hypercalls after de-privilege
  ANDROID: KVM: arm64: Handle PSCI for protected VMs in EL2
  ANDROID: KVM: arm64: Factor out vcpu_reset code for core registers and PSCI
  ANDROID: KVM: arm64: Move some kvm_psci functions to a shared header
  ANDROID: KVM: arm64: Move pstate reset values to kvm_arm.h
  ANDROID: KVM: arm64: Add HVC handling for protected guests at EL2
  ANDROID: KVM: arm64: Track the SVE state in the hypervisor vcpu structure
  ANDROID: KVM: arm64: Initialize hypervisor vm state at EL2
  ANDROID: KVM: arm64: Refactor kvm_vcpu_enable_ptrauth() for hyp use
  ANDROID: KVM: arm64: Do not update virtual timer state for protected VMs
  ANDROID: KVM: arm64: Move vgic state between host and hypervisor vcpu structures
  ANDROID: KVM: arm64: Add EL2 entry/exit handlers for pKVM guests
  ANDROID: KVM: arm64: Donate memory to protected guests
  ANDROID: KVM: arm64: Force injection of a data abort on NISV MMIO exit
  ANDROID: KVM: arm64: Advertise GICv3 sysreg interface to protected guests
  ANDROID: KVM: arm64: Fix initializing traps in protected mode
  ANDROID: KVM: arm64: Move pkvm_vcpu_init_traps to hyp vcpu init
  ANDROID: KVM: arm64: Reset sysregs for protected VMs
  ANDROID: KVM: arm64: Refactor reset_mpidr to extract its computation
  ANDROID: KVM: arm64: Do not support MTE for protected VMs
  ANDROID: KVM: arm64: Restrict protected VM capabilities
  ANDROID: KVM: arm64: Trap debug break and watch from guest
  ANDROID: KVM: arm64: Check directly whether the vcpu is protected
  ANDROID: KVM: arm64: Reduce host/hyp vcpu state copying
  ANDROID: KVM: arm64: Lazy host FP save/restore
  ANDROID: KVM: arm64: Reintroduce __sve_save_state
  ANDROID: KVM: arm64: Introduce lazy-ish state sync for non-protected VMs
  ANDROID: KVM: arm64: Introduce per-EC entry/exit handlers
  ANDROID: KVM: arm64: Ensure that TLBs and I-cache are private to each vcpu
  ANDROID: KVM: arm64: Add hyp per_cpu variable to track current physical cpu number
  ANDROID: KVM: arm64: Skip __kvm_adjust_pc() for protected vcpus
  ANDROID: KVM: arm64: Add current host and hyp vCPU lookup primitive
  ANDROID: KVM: arm64: Introduce the pkvm_vcpu_{load,put} hypercalls
  ANDROID: KVM: arm64: Add the {flush,sync}_hyp_timer_state() primitives
  ANDROID: KVM: arm64: Introduce predicates to check for protected state
  ANDROID: KVM: arm64: Add the {flush,sync}_hyp_vgic_state() primitives
  ANDROID: KVM: arm64: Simplify vgic-v3 hypercalls
  ANDROID: KVM: arm64: Make vcpu_{read,write}_sys_reg available to HYP code
  ANDROID: KVM: arm64: Split up nvhe/fixed_config.h
  ANDROID: KVM: arm64: Extend memory donation to allow host-to-guest transitions
  ANDROID: KVM: arm64: Handle guest stage-2 page-tables entirely at EL2
  ANDROID: KVM: arm64: Disallow dirty logging and RO memslots with pKVM
  ANDROID: KVM: arm64: Do not allow memslot changes after first VM run under pKVM
  ANDROID: KVM: arm64: Check for PTE validity when checking for executable/cacheable
  ANDROID: KVM: arm64: Extend memory sharing to allow host-to-guest transitions
  ANDROID: KVM: arm64: Provide a hypercall for the host to reclaim guest memory
  ANDROID: KVM: arm64: Add PC_UPDATE_REQ flags covering all PC updates
  ANDROID: KVM: arm64: Add vcpu flag copy primitive
  ANDROID: KVM: arm64: Repurpose a byte of 'order' for flags in 'struct hyp_page'
  FROMLIST: KVM: arm64: Use the pKVM hyp vCPU structure in handle___kvm_vcpu_run()
  FROMLIST: KVM: arm64: Don't unnecessarily map host kernel sections at EL2
  FROMLIST: KVM: arm64: Explicitly map 'kvm_vgic_global_state' at EL2
  FROMLIST: KVM: arm64: Maintain a copy of 'kvm_arm_vmid_bits' at EL2
  FROMLIST: KVM: arm64: Unmap 'kvm_arm_hyp_percpu_base' from the host
  FROMLIST: BACKPORT: KVM: arm64: Return guest memory from EL2 via dedicated teardown memcache
  FROMLIST: KVM: arm64: Instantiate guest stage-2 page-tables at EL2
  FROMLIST: KVM: arm64: Consolidate stage-2 initialisation into a single function
  FROMLIST: KVM: arm64: Add generic hyp_memcache helpers
  FROMLIST: KVM: arm64: Provide I-cache invalidation by virtual address at EL2
  FROMLIST: KVM: arm64: Initialise hypervisor copies of host symbols unconditionally
  FROMLIST: KVM: arm64: Add per-cpu fixmap infrastructure at EL2
  FROMLIST: KVM: arm64: Instantiate pKVM hypervisor VM and vCPU structures from EL1
  FROMLIST: KVM: arm64: Add infrastructure to create and track pKVM instances at EL2
  FROMLIST: KVM: arm64: Rename 'host_kvm' to 'host_mmu'
  FROMLIST: KVM: arm64: Add hyp_spinlock_t static initializer
  FROMLIST: KVM: arm64: Include asm/kvm_mmu.h in nvhe/mem_protect.h
  FROMLIST: KVM: arm64: Add helpers to pin memory shared with the hypervisor at EL2
  FROMLIST: KVM: arm64: Prevent the donation of no-map pages
  FROMLIST: KVM: arm64: Implement do_donate() helper for donating memory
  FROMLIST: KVM: arm64: Unify identifiers used to distinguish host and hypervisor
  FROMLIST: KVM: arm64: Fix-up hyp stage-1 refcounts for all pages mapped at EL2
  FROMLIST: KVM: arm64: Back the hypervisor 'struct hyp_page' array for all memory
  FROMLIST: KVM: arm64: Allow attaching of non-coalescable pages to a hyp pool
  FROMLIST: KVM: arm64: Move hyp refcount manipulation helpers to common header file
  ANDROID: arm64: Register earlycon fixmap with the MMIO guard
  ANDROID: arm64: Add a helper to retrieve the PTE of a fixmap
  ANDROID: BACKPORT: arm64: Enroll into KVM's MMIO guard if required
  ANDROID: BACKPORT: arm64: Implement ioremap/iounmap hooks calling into KVM's MMIO guard
  ANDROID: mm/vmalloc: Add arch-specific callbacks to track io{remap,unmap} physical pages
  ANDROID: BACKPORT: arm64: mm: Implement memory encryption API using KVM sharing hypercalls
  ANDROID: drivers: hv: Include memory encryption header
  FROMLIST: firmware/smccc: Call arch-specific hook on discovering KVM services
  ANDROID: GKI: Enable CONFIG_CFI_CLANG

 Conflicts:
	drivers/android/vendor_hooks.c
	include/linux/compaction.h
	include/trace/hooks/timer.h
	mm/compaction.c

Change-Id: Ie45fe12a1d6b67f4edb8c23ebb4409754b063385
Upstream-Build: ks_qcom-android14-6.1-keystone-qcom-release@9501271 UKQ2.230118.001
Signed-off-by: jianzhou <quic_jianzhou@quicinc.com>#
This commit is contained in:
jianzhou 2023-02-21 21:29:07 -08:00
commit 5eefc0e780
333 changed files with 72332 additions and 148921 deletions

View File

@ -327,6 +327,23 @@ copy_to_dist_dir(
flat = True,
)
kernel_build(
name = "fips140",
outs = [],
base_kernel = ":kernel_aarch64",
build_config = "build.config.gki.aarch64.fips140",
module_outs = ["crypto/fips140.ko"],
)
copy_to_dist_dir(
name = "fips140_dist",
data = [
":fips140",
],
dist_dir = "out/fips140/dist",
flat = True,
)
# allmodconfig build tests.
# These are build tests only, so:
# - outs are intentionally set to empty to not copy anything to DIST_DIR

View File

@ -99,6 +99,12 @@ Description: Controls the issue rate of discard commands that consist of small
checkpoint is triggered, and issued during the checkpoint.
By default, it is disabled with 0.
What: /sys/fs/f2fs/<disk>/max_ordered_discard
Date: October 2022
Contact: "Yangtao Li" <frank.li@vivo.com>
Description: Controls the maximum ordered discard, the unit size is one block(4KB).
Set it to 16 by default.
What: /sys/fs/f2fs/<disk>/max_discard_request
Date: December 2021
Contact: "Konstantin Vyshetsky" <vkon@google.com>
@ -132,7 +138,8 @@ Contact: "Chao Yu" <yuchao0@huawei.com>
Description: Controls discard granularity of inner discard thread. Inner thread
will not issue discards with size that is smaller than granularity.
The unit size is one block(4KB), now only support configuring
in range of [1, 512]. Default value is 4(=16KB).
in range of [1, 512]. Default value is 16.
For small devices, default value is 1.
What: /sys/fs/f2fs/<disk>/umount_discard_timeout
Date: January 2019
@ -235,7 +242,7 @@ Description: Shows total written kbytes issued to disk.
What: /sys/fs/f2fs/<disk>/features
Date: July 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: <deprecated: should use /sys/fs/f2fs/<disk>/feature_list/
Description: <deprecated: should use /sys/fs/f2fs/<disk>/feature_list/>
Shows all enabled features in current device.
Supported features:
encryption, blkzoned, extra_attr, projquota, inode_checksum,
@ -592,10 +599,10 @@ Description: With "mode=fragment:block" mount options, we can scatter block allo
in the length of 1..<max_fragment_hole> by turns. This value can be set
between 1..512 and the default value is 4.
What: /sys/fs/f2fs/<disk>/gc_urgent_high_remaining
Date: December 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: You can set the trial count limit for GC urgent high mode with this value.
What: /sys/fs/f2fs/<disk>/gc_remaining_trials
Date: October 2022
Contact: "Yangtao Li" <frank.li@vivo.com>
Description: You can set the trial count limit for GC urgent and idle mode with this value.
If GC thread gets to the limit, the mode will turn back to GC normal mode.
By default, the value is zero, which means there is no limit like before.
@ -634,3 +641,31 @@ Date: July 2022
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Show the accumulated total revoked atomic write block count after boot.
If you write "0" here, you can initialize to "0".
What: /sys/fs/f2fs/<disk>/gc_mode
Date: October 2022
Contact: "Yangtao Li" <frank.li@vivo.com>
Description: Show the current gc_mode as a string.
This is a read-only entry.
What: /sys/fs/f2fs/<disk>/discard_urgent_util
Date: November 2022
Contact: "Yangtao Li" <frank.li@vivo.com>
Description: When space utilization exceeds this, do background DISCARD aggressively.
Does DISCARD forcibly in a period of given min_discard_issue_time when the number
of discards is not 0 and set discard granularity to 1.
Default: 80
What: /sys/fs/f2fs/<disk>/hot_data_age_threshold
Date: November 2022
Contact: "Ping Xiong" <xiongping1@xiaomi.com>
Description: When DATA SEPARATION is on, it controls the age threshold to indicate
the data blocks as hot. By default it was initialized as 262144 blocks
(equals to 1GB).
What: /sys/fs/f2fs/<disk>/warm_data_age_threshold
Date: November 2022
Contact: "Ping Xiong" <xiongping1@xiaomi.com>
Description: When DATA SEPARATION is on, it controls the age threshold to indicate
the data blocks as warm. By default it was initialized as 2621440 blocks
(equals to 10GB).

View File

@ -0,0 +1,19 @@
What: /sys/fs/fuse/features/fuse_bpf
Date: December 2022
Contact: Paul Lawrence <paullawrence@google.com>
Description:
Read-only file that contains the word 'supported' if fuse-bpf is
supported, does not exist otherwise
What: /sys/fs/fuse/bpf_prog_type_fuse
Date: December 2022
Contact: Paul Lawrence <paullawrence@google.com>
Description:
bpf_prog_type_fuse defines the program type of bpf programs that
may be passed to fuse-bpf. For upstream bpf program types, this
is a constant defined in a contiguous array of constants.
bpf_prog_type_fuse is appended to the end of the list, so it may
change and therefore its value must be read from this file.
Contents is ASCII decimal representation of bpf_prog_type_fuse

View File

@ -2184,6 +2184,9 @@
1 - Bypass the IOMMU for DMA.
unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
ioremap_guard [ARM64] enable the KVM MMIO guard functionality
if available.
io7= [HW] IO7 for Marvel-based Alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
@ -2529,7 +2532,9 @@
protected guests.
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
state is kept private from the host. See
Documentation/virt/kvm/arm/pkvm.rst for more
information about this mode of operation.
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation

View File

@ -142,7 +142,7 @@ Therefore, we also introduce *blk-crypto-fallback*, which is an implementation
of inline encryption using the kernel crypto API. blk-crypto-fallback is built
into the block layer, so it works on any block device without any special setup.
Essentially, when a bio with an encryption context is submitted to a
request_queue that doesn't support that encryption context, the block layer will
block_device that doesn't support that encryption context, the block layer will
handle en/decryption of the bio using blk-crypto-fallback.
For encryption, the data cannot be encrypted in-place, as callers usually rely
@ -187,7 +187,7 @@ API presented to users of the block layer
``blk_crypto_config_supported()`` allows users to check ahead of time whether
inline encryption with particular crypto settings will work on a particular
request_queue -- either via hardware or via blk-crypto-fallback. This function
block_device -- either via hardware or via blk-crypto-fallback. This function
takes in a ``struct blk_crypto_config`` which is like blk_crypto_key, but omits
the actual bytes of the key and instead just contains the algorithm, data unit
size, etc. This function can be useful if blk-crypto-fallback is disabled.
@ -195,7 +195,7 @@ size, etc. This function can be useful if blk-crypto-fallback is disabled.
``blk_crypto_init_key()`` allows users to initialize a blk_crypto_key.
Users must call ``blk_crypto_start_using_key()`` before actually starting to use
a blk_crypto_key on a request_queue (even if ``blk_crypto_config_supported()``
a blk_crypto_key on a block_device (even if ``blk_crypto_config_supported()``
was called earlier). This is needed to initialize blk-crypto-fallback if it
will be needed. This must not be called from the data path, as this may have to
allocate resources, which may deadlock in that case.
@ -207,7 +207,7 @@ for en/decryption. Users don't need to worry about freeing the bio_crypt_ctx
later, as that happens automatically when the bio is freed or reset.
Finally, when done using inline encryption with a blk_crypto_key on a
request_queue, users must call ``blk_crypto_evict_key()``. This ensures that
block_device, users must call ``blk_crypto_evict_key()``. This ensures that
the key is evicted from all keyslots it may be programmed into and unlinked from
any kernel data structures it may be linked into.
@ -221,9 +221,9 @@ as follows:
5. ``blk_crypto_evict_key()`` (after all I/O has completed)
6. Zeroize the blk_crypto_key (this has no dedicated function)
If a blk_crypto_key is being used on multiple request_queues, then
If a blk_crypto_key is being used on multiple block_devices, then
``blk_crypto_config_supported()`` (if used), ``blk_crypto_start_using_key()``,
and ``blk_crypto_evict_key()`` must be called on each request_queue.
and ``blk_crypto_evict_key()`` must be called on each block_device.
API presented to device drivers
===============================
@ -388,8 +388,8 @@ such as in file-based encryption. Key wrapping is a commonly used technique.)
The key which wraps (encrypts) hardware-wrapped keys is a hardware-internal key
that is never exposed to software; it is either a persistent key (a "long-term
wrapping key") or a per-boot key (an "ephemeral wrapping key"). The long-term
wrapped form of the key is what is initially unlocked, but it is discarded as
soon as it is converted into an ephemerally-wrapped key. In-use
wrapped form of the key is what is initially unlocked, but it is erased from
memory as soon as it is converted into an ephemerally-wrapped key. In-use
hardware-wrapped keys are always ephemerally-wrapped, not long-term wrapped.
As inline encryption hardware can only be used to encrypt/decrypt data on-disk,
@ -442,8 +442,8 @@ The components are:
for cryptographic applications that require up to a 256-bit security strength.
Some use cases (e.g. full-disk encryption) won't require the software secret.
Example: in the case of fscrypt, the fscrypt master key (the key used to unlock
a particular set of encrypted directories) is made hardware-wrapped. The inline
Example: in the case of fscrypt, the fscrypt master key (the key that protects a
particular set of encrypted directories) is made hardware-wrapped. The inline
encryption key is used as the file contents encryption key, while the software
secret (rather than the master key directly) is used to key fscrypt's KDF
(HKDF-SHA512) to derive other subkeys such as filenames encryption keys.
@ -512,5 +512,6 @@ the hardware RNG and its use to generate the key, as well as the testing of the
"import" mode as that should cover all parts other than the key generation.
For an example of a test that verifies the ciphertext written to disk in the
"import" mode, see `Android's vts_kernel_encryption_test
"import" mode, see the fscrypt hardware-wrapped key tests in xfstests, or
`Android's vts_kernel_encryption_test
<https://android.googlesource.com/platform/test/vts-testcase/kernel/+/refs/heads/master/encryption/>`_.

View File

@ -140,6 +140,23 @@ disabling KASAN altogether or controlling its features:
- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
allocations (default: ``on``).
- ``kasan.page_alloc.sample=<sampling interval>`` makes KASAN tag only every
Nth page_alloc allocation with the order equal or greater than
``kasan.page_alloc.sample.order``, where N is the value of the ``sample``
parameter (default: ``1``, or tag every such allocation).
This parameter is intended to mitigate the performance overhead introduced
by KASAN.
Note that enabling this parameter makes Hardware Tag-Based KASAN skip checks
of allocations chosen by sampling and thus miss bad accesses to these
allocations. Use the default value for accurate bug detection.
- ``kasan.page_alloc.sample.order=<minimum page order>`` specifies the minimum
order of allocations that are affected by sampling (default: ``3``).
Only applies when ``kasan.page_alloc.sample`` is set to a value greater
than ``1``.
This parameter is intended to allow sampling only large page_alloc
allocations, which is the biggest source of the performance overhead.
Error reports
~~~~~~~~~~~~~

View File

@ -25,10 +25,14 @@ a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs).
- git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
For reporting bugs and sending patches, please use the following mailing list:
For sending patches, please use the following mailing list:
- linux-f2fs-devel@lists.sourceforge.net
For reporting bugs, please use the following f2fs bug tracker link:
- https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs
Background and Design issues
============================
@ -154,6 +158,8 @@ nobarrier This option can be used if underlying storage guarantees
If this option is set, no cache_flush commands are issued
but f2fs still guarantees the write ordering of all the
data writes.
barrier If this option is set, cache_flush commands are allowed to be
issued.
fastboot This option is used when a system wants to reduce mount
time as much as possible, even though normal performance
can be sacrificed.
@ -199,6 +205,7 @@ fault_type=%d Support configuring fault injection type, should be
FAULT_SLAB_ALLOC 0x000008000
FAULT_DQUOT_INIT 0x000010000
FAULT_LOCK_OP 0x000020000
FAULT_BLKADDR 0x000040000
=================== ===========
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
@ -340,6 +347,10 @@ memory=%s Control memory mode. This supports "normal" and "low" modes.
Because of the nature of low memory devices, in this mode, f2fs
will try to save memory sometimes by sacrificing performance.
"normal" mode is the default mode and same as before.
age_extent_cache Enable an age extent cache based on rb-tree. It records
data block update frequency of the extent per inode, in
order to provide better temperature hints for data block
allocation.
======================== ============================================================
Debugfs Entries

View File

@ -338,6 +338,7 @@ Currently, the following pairs of encryption modes are supported:
- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
- Adiantum for both contents and filenames
- AES-256-XTS for contents and AES-256-HCTR2 for filenames (v2 policies only)
- SM4-XTS for contents and SM4-CTS-CBC for filenames (v2 policies only)
If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
@ -369,6 +370,12 @@ CONFIG_CRYPTO_HCTR2 must be enabled. Also, fast implementations of XCTR and
POLYVAL should be enabled, e.g. CRYPTO_POLYVAL_ARM64_CE and
CRYPTO_AES_ARM64_CE_BLK for ARM64.
SM4 is a Chinese block cipher that is an alternative to AES. It has
not seen as much security review as AES, and it only has a 128-bit key
size. It may be useful in cases where its use is mandated.
Otherwise, it should not be used. For SM4 support to be available, it
also needs to be enabled in the kernel crypto API.
New encryption modes can be added relatively easily, without changes
to individual filesystems. However, authenticated encryption (AE)
modes are not currently supported because of the difficulty of dealing

View File

@ -350,7 +350,8 @@ Load an encrypted key "evm" from saved blob::
Instantiate an encrypted key "evm" using user-provided decrypted data::
$ keyctl add encrypted evm "new default user:kmk 32 `cat evm_decrypted_data.blob`" @u
$ evmkey=$(dd if=/dev/urandom bs=1 count=32 | xxd -c32 -p)
$ keyctl add encrypted evm "new default user:kmk 32 $evmkey" @u
794890253
$ keyctl print 794890253

View File

@ -6427,6 +6427,13 @@ Note that KVM does not skip the faulting instruction as it does for
KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state
if it decides to decode and emulate the instruction.
This feature isn't available to protected VMs, as userspace does not
have access to the state that is required to perform the emulation.
Instead, a data abort exception is directly injected in the guest.
Note that although KVM_CAP_ARM_NISV_TO_USER will be reported if
queried outside of a protected VM context, the feature will not be
exposed if queried on a protected VM file descriptor.
::
/* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */

View File

@ -0,0 +1,138 @@
.. SPDX-License-Identifier: GPL-2.0
=======================================
ARM firmware pseudo-registers interface
=======================================
KVM handles the hypercall services as requested by the guests. New hypercall
services are regularly made available by the ARM specification or by KVM (as
vendor services) if they make sense from a virtualization point of view.
This means that a guest booted on two different versions of KVM can observe
two different "firmware" revisions. This could cause issues if a given guest
is tied to a particular version of a hypercall service, or if a migration
causes a different version to be exposed out of the blue to an unsuspecting
guest.
In order to remedy this situation, KVM exposes a set of "firmware
pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
interface. These registers can be saved/restored by userspace, and set
to a convenient value as required.
The following registers are defined:
* KVM_REG_ARM_PSCI_VERSION:
KVM implements the PSCI (Power State Coordination Interface)
specification in order to provide services such as CPU on/off, reset
and power-off to the guest.
- Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
(and thus has already been initialized)
- Returns the current PSCI version on GET_ONE_REG (defaulting to the
highest PSCI version implemented by KVM and compatible with v0.2)
- Allows any PSCI version implemented by KVM and compatible with
v0.2 to be set with SET_ONE_REG
- Affects the whole VM (even if the register view is per-vcpu)
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
Holds the state of the firmware support to mitigate CVE-2017-5715, as
offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_1 in [1].
Accepted values are:
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
KVM does not offer
firmware support for the workaround. The mitigation status for the
guest is unknown.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
The workaround HVC call is
available to the guest and required for the mitigation.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
The workaround HVC call
is available to the guest, but it is not needed on this VCPU.
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
Holds the state of the firmware support to mitigate CVE-2018-3639, as
offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_2 in [1]_.
Accepted values are:
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
A workaround is not
available. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
The workaround state is
unknown. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
The workaround is available,
and can be disabled by a vCPU. If
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
this vCPU.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
The workaround is always active on this vCPU or it is not needed.
Bitmap Feature Firmware Registers
---------------------------------
Contrary to the above registers, the following registers exposes the
hypercall services in the form of a feature-bitmap to the userspace. This
bitmap is translated to the services that are available to the guest.
There is a register defined per service call owner and can be accessed via
GET/SET_ONE_REG interface.
By default, these registers are set with the upper limit of the features
that are supported. This way userspace can discover all the usable
hypercall services via GET_ONE_REG. The user-space can write-back the
desired bitmap back via SET_ONE_REG. The features for the registers that
are untouched, probably because userspace isn't aware of them, will be
exposed as is to the guest.
Note that KVM will not allow the userspace to configure the registers
anymore once any of the vCPUs has run at least once. Instead, it will
return a -EBUSY.
The pseudo-firmware bitmap register are as follows:
* KVM_REG_ARM_STD_BMAP:
Controls the bitmap of the ARM Standard Secure Service Calls.
The following bits are accepted:
Bit-0: KVM_REG_ARM_STD_BIT_TRNG_V1_0:
The bit represents the services offered under v1.0 of ARM True Random
Number Generator (TRNG) specification, ARM DEN0098.
* KVM_REG_ARM_STD_HYP_BMAP:
Controls the bitmap of the ARM Standard Hypervisor Service Calls.
The following bits are accepted:
Bit-0: KVM_REG_ARM_STD_HYP_BIT_PV_TIME:
The bit represents the Paravirtualized Time service as represented by
ARM DEN0057A.
* KVM_REG_ARM_VENDOR_HYP_BMAP:
Controls the bitmap of the Vendor specific Hypervisor Service Calls.
The following bits are accepted:
Bit-0: KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT
The bit represents the ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID
and ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID function-ids.
Bit-1: KVM_REG_ARM_VENDOR_HYP_BIT_PTP:
The bit represents the Precision Time Protocol KVM service.
Errors:
======= =============================================================
-ENOENT Unknown register accessed.
-EBUSY Attempt a 'write' to the register after the VM has started.
-EINVAL Invalid bitmap written to the register.
======= =============================================================
.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf

View File

@ -1,138 +1,118 @@
.. SPDX-License-Identifier: GPL-2.0
=======================
ARM Hypercall Interface
=======================
===============================================
KVM/arm64-specific hypercalls exposed to guests
===============================================
KVM handles the hypercall services as requested by the guests. New hypercall
services are regularly made available by the ARM specification or by KVM (as
vendor services) if they make sense from a virtualization point of view.
This file documents the KVM/arm64-specific hypercalls which may be
exposed by KVM/arm64 to guest operating systems. These hypercalls are
issued using the HVC instruction according to version 1.1 of the Arm SMC
Calling Convention (DEN0028/C):
This means that a guest booted on two different versions of KVM can observe
two different "firmware" revisions. This could cause issues if a given guest
is tied to a particular version of a hypercall service, or if a migration
causes a different version to be exposed out of the blue to an unsuspecting
guest.
https://developer.arm.com/docs/den0028/c
In order to remedy this situation, KVM exposes a set of "firmware
pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
interface. These registers can be saved/restored by userspace, and set
to a convenient value as required.
All KVM/arm64-specific hypercalls are allocated within the "Vendor
Specific Hypervisor Service Call" range with a UID of
``28b46fb6-2ec5-11e9-a9ca-4b564d003a74``. This UID should be queried by the
guest using the standard "Call UID" function for the service range in
order to determine that the KVM/arm64-specific hypercalls are available.
The following registers are defined:
``ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID``
---------------------------------------------
* KVM_REG_ARM_PSCI_VERSION:
Provides a discovery mechanism for other KVM/arm64 hypercalls.
KVM implements the PSCI (Power State Coordination Interface)
specification in order to provide services such as CPU on/off, reset
and power-off to the guest.
+---------------------+-------------------------------------------------------------+
| Presence: | Mandatory for the KVM/arm64 UID |
+---------------------+-------------------------------------------------------------+
| Calling convention: | HVC32 |
+---------------------+----------+--------------------------------------------------+
| Function ID: | (uint32) | 0x86000000 |
+---------------------+----------+--------------------------------------------------+
| Arguments: | None |
+---------------------+----------+----+---------------------------------------------+
| Return Values: | (uint32) | R0 | Bitmap of available function numbers 0-31 |
| +----------+----+---------------------------------------------+
| | (uint32) | R1 | Bitmap of available function numbers 32-63 |
| +----------+----+---------------------------------------------+
| | (uint32) | R2 | Bitmap of available function numbers 64-95 |
| +----------+----+---------------------------------------------+
| | (uint32) | R3 | Bitmap of available function numbers 96-127 |
+---------------------+----------+----+---------------------------------------------+
- Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
(and thus has already been initialized)
- Returns the current PSCI version on GET_ONE_REG (defaulting to the
highest PSCI version implemented by KVM and compatible with v0.2)
- Allows any PSCI version implemented by KVM and compatible with
v0.2 to be set with SET_ONE_REG
- Affects the whole VM (even if the register view is per-vcpu)
``ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID``
----------------------------------------
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
Holds the state of the firmware support to mitigate CVE-2017-5715, as
offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_1 in [1].
See ptp_kvm.rst
Accepted values are:
``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``
----------------------------------
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
KVM does not offer
firmware support for the workaround. The mitigation status for the
guest is unknown.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
The workaround HVC call is
available to the guest and required for the mitigation.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
The workaround HVC call
is available to the guest, but it is not needed on this VCPU.
Query the memory protection parameters for a protected virtual machine.
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
Holds the state of the firmware support to mitigate CVE-2018-3639, as
offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_2 in [1]_.
+---------------------+-------------------------------------------------------------+
| Presence: | Optional; protected guests only. |
+---------------------+-------------------------------------------------------------+
| Calling convention: | HVC64 |
+---------------------+----------+--------------------------------------------------+
| Function ID: | (uint32) | 0xC6000002 |
+---------------------+----------+----+---------------------------------------------+
| Arguments: | (uint64) | R1 | Reserved / Must be zero |
| +----------+----+---------------------------------------------+
| | (uint64) | R2 | Reserved / Must be zero |
| +----------+----+---------------------------------------------+
| | (uint64) | R3 | Reserved / Must be zero |
+---------------------+----------+----+---------------------------------------------+
| Return Values: | (int64) | R0 | ``INVALID_PARAMETER (-3)`` on error, else |
| | | | memory protection granule in bytes |
+---------------------+----------+----+---------------------------------------------+
Accepted values are:
``ARM_SMCCC_KVM_FUNC_MEM_SHARE``
--------------------------------
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
A workaround is not
available. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
The workaround state is
unknown. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
The workaround is available,
and can be disabled by a vCPU. If
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
this vCPU.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
The workaround is always active on this vCPU or it is not needed.
Share a region of memory with the KVM host, granting it read, write and execute
permissions. The size of the region is equal to the memory protection granule
advertised by ``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``.
+---------------------+-------------------------------------------------------------+
| Presence: | Optional; protected guests only. |
+---------------------+-------------------------------------------------------------+
| Calling convention: | HVC64 |
+---------------------+----------+--------------------------------------------------+
| Function ID: | (uint32) | 0xC6000003 |
+---------------------+----------+----+---------------------------------------------+
| Arguments: | (uint64) | R1 | Base IPA of memory region to share |
| +----------+----+---------------------------------------------+
| | (uint64) | R2 | Reserved / Must be zero |
| +----------+----+---------------------------------------------+
| | (uint64) | R3 | Reserved / Must be zero |
+---------------------+----------+----+---------------------------------------------+
| Return Values: | (int64) | R0 | ``SUCCESS (0)`` |
| | | +---------------------------------------------+
| | | | ``INVALID_PARAMETER (-3)`` |
+---------------------+----------+----+---------------------------------------------+
Bitmap Feature Firmware Registers
---------------------------------
``ARM_SMCCC_KVM_FUNC_MEM_UNSHARE``
----------------------------------
Contrary to the above registers, the following registers exposes the
hypercall services in the form of a feature-bitmap to the userspace. This
bitmap is translated to the services that are available to the guest.
There is a register defined per service call owner and can be accessed via
GET/SET_ONE_REG interface.
Revoke access permission from the KVM host to a memory region previously shared
with ``ARM_SMCCC_KVM_FUNC_MEM_SHARE``. The size of the region is equal to the
memory protection granule advertised by ``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``.
By default, these registers are set with the upper limit of the features
that are supported. This way userspace can discover all the usable
hypercall services via GET_ONE_REG. The user-space can write-back the
desired bitmap back via SET_ONE_REG. The features for the registers that
are untouched, probably because userspace isn't aware of them, will be
exposed as is to the guest.
Note that KVM will not allow the userspace to configure the registers
anymore once any of the vCPUs has run at least once. Instead, it will
return a -EBUSY.
The pseudo-firmware bitmap register are as follows:
* KVM_REG_ARM_STD_BMAP:
Controls the bitmap of the ARM Standard Secure Service Calls.
The following bits are accepted:
Bit-0: KVM_REG_ARM_STD_BIT_TRNG_V1_0:
The bit represents the services offered under v1.0 of ARM True Random
Number Generator (TRNG) specification, ARM DEN0098.
* KVM_REG_ARM_STD_HYP_BMAP:
Controls the bitmap of the ARM Standard Hypervisor Service Calls.
The following bits are accepted:
Bit-0: KVM_REG_ARM_STD_HYP_BIT_PV_TIME:
The bit represents the Paravirtualized Time service as represented by
ARM DEN0057A.
* KVM_REG_ARM_VENDOR_HYP_BMAP:
Controls the bitmap of the Vendor specific Hypervisor Service Calls.
The following bits are accepted:
Bit-0: KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT
The bit represents the ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID
and ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID function-ids.
Bit-1: KVM_REG_ARM_VENDOR_HYP_BIT_PTP:
The bit represents the Precision Time Protocol KVM service.
Errors:
======= =============================================================
-ENOENT Unknown register accessed.
-EBUSY Attempt a 'write' to the register after the VM has started.
-EINVAL Invalid bitmap written to the register.
======= =============================================================
.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
+---------------------+-------------------------------------------------------------+
| Presence: | Optional; protected guests only. |
+---------------------+-------------------------------------------------------------+
| Calling convention: | HVC64 |
+---------------------+----------+--------------------------------------------------+
| Function ID: | (uint32) | 0xC6000004 |
+---------------------+----------+----+---------------------------------------------+
| Arguments: | (uint64) | R1 | Base IPA of memory region to unshare |
| +----------+----+---------------------------------------------+
| | (uint64) | R2 | Reserved / Must be zero |
| +----------+----+---------------------------------------------+
| | (uint64) | R3 | Reserved / Must be zero |
+---------------------+----------+----+---------------------------------------------+
| Return Values: | (int64) | R0 | ``SUCCESS (0)`` |
| | | +---------------------------------------------+
| | | | ``INVALID_PARAMETER (-3)`` |
+---------------------+----------+----+---------------------------------------------+

View File

@ -7,7 +7,10 @@ ARM
.. toctree::
:maxdepth: 2
fw-pseudo-registers
hyp-abi
hypercalls
pkvm
pvtime
ptp_kvm
mmio-guard

View File

@ -0,0 +1,74 @@
.. SPDX-License-Identifier: GPL-2.0
==============
KVM MMIO guard
==============
KVM implements device emulation by handling translation faults to any
IPA range that is not contained in a memory slot. Such a translation
fault is in most cases passed on to userspace (or in rare cases to the
host kernel) with the address, size and possibly data of the access
for emulation.
Should the guest exit with an address that is not one that corresponds
to an emulatable device, userspace may take measures that are not the
most graceful as far as the guest is concerned (such as terminating it
or delivering a fatal exception).
There is also an element of trust: by forwarding the request to
userspace, the kernel assumes that the guest trusts userspace to do
the right thing.
The KVM MMIO guard offers a way to mitigate this last point: a guest
can request that only certain regions of the IPA space are valid as
MMIO. Only these regions will be handled as an MMIO, and any other
will result in an exception being delivered to the guest.
This relies on a set of hypercalls defined in the KVM-specific range,
using the HVC64 calling convention.
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO
============== ======== ================================
Function ID: (uint32) 0xC6000002
Arguments: none
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
(uint64) Protection Granule (PG) size in
bytes (r0)
============== ======== ================================
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL
============== ======== ==============================
Function ID: (uint32) 0xC6000003
Arguments: none
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
RET_SUCCESS(0) (r0)
============== ======== ==============================
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP
============== ======== ====================================
Function ID: (uint32) 0xC6000004
Arguments: (uint64) The base of the PG-sized IPA range
that is allowed to be accessed as
MMIO. Must be aligned to the PG size
(r1)
(uint64) Index in the MAIR_EL1 register
providing the memory attribute that
is used by the guest (r2)
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
RET_SUCCESS(0) (r0)
============== ======== ====================================
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP
============== ======== ======================================
Function ID: (uint32) 0xC6000005
Arguments: (uint64) PG-sized IPA range aligned to the PG
size which has been previously mapped.
Must be aligned to the PG size and
have been previously mapped (r1)
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
RET_SUCCESS(0) (r0)
============== ======== ======================================

View File

@ -0,0 +1,96 @@
.. SPDX-License-Identifier: GPL-2.0
Protected virtual machines (pKVM)
=================================
Introduction
------------
Protected KVM (pKVM) is a KVM/arm64 extension which uses the two-stage
translation capability of the Armv8 MMU to isolate guest memory from the host
system. This allows for the creation of a confidential computing environment
without relying on whizz-bang features in hardware, but still allowing room for
complementary technologies such as memory encryption and hardware-backed
attestation.
The major implementation change brought about by pKVM is that the hypervisor
code running at EL2 is now largely independent of (and isolated from) the rest
of the host kernel running at EL1 and therefore additional hypercalls are
introduced to manage manipulation of guest stage-2 page tables, creation of VM
data structures and reclamation of memory on teardown. An immediate consequence
of this change is that the host itself runs with an identity mapping enabled
at stage-2, providing the hypervisor code with a mechanism to restrict host
access to an arbitrary physical page.
Enabling pKVM
-------------
The pKVM hypervisor is enabled by booting the host kernel at EL2 with
"``kvm-arm.mode=protected``" on the command-line. Once enabled, VMs can be spawned
in either protected or non-protected state, although the hypervisor is still
responsible for managing most of the VM metadata in either case.
Limitations
-----------
Enabling pKVM places some significant limitations on KVM guests, regardless of
whether they are spawned in protected state. It is therefore recommended only
to enable pKVM if protected VMs are required, with non-protected state acting
primarily as a debug and development aid.
If you're still keen, then here is an incomplete list of caveats that apply
to all VMs running under pKVM:
- Guest memory cannot be file-backed (with the exception of shmem/memfd) and is
pinned as it is mapped into the guest. This prevents the host from
swapping-out, migrating, merging or generally doing anything useful with the
guest pages. It also requires that the VMM has either ``CAP_IPC_LOCK`` or
sufficient ``RLIMIT_MEMLOCK`` to account for this pinned memory.
- GICv2 is not supported and therefore GICv3 hardware is required in order
to expose a virtual GICv3 to the guest.
- Read-only memslots are unsupported and therefore dirty logging cannot be
enabled.
- Memslot configuration is fixed once a VM has started running, with subsequent
move or deletion requests being rejected with ``-EPERM``.
- There are probably many others.
Since the host is unable to tear down the hypervisor when pKVM is enabled,
hibernation (``CONFIG_HIBERNATION``) and kexec (``CONFIG_KEXEC``) will fail
with ``-EBUSY``.
If you are not happy with these limitations, then please don't enable pKVM :)
VM creation
-----------
When pKVM is enabled, protected VMs can be created by specifying the
``KVM_VM_TYPE_ARM_PROTECTED`` flag in the machine type identifier parameter
passed to ``KVM_CREATE_VM``.
Protected VMs are instantiated according to a fixed vCPU configuration
described by the ID register definitions in
``arch/arm64/include/asm/kvm_pkvm.h``. Only a subset of the architectural
features that may be available to the host are exposed to the guest and the
capabilities advertised by ``KVM_CHECK_EXTENSION`` are limited accordingly,
with the vCPU registers being initialised to their architecturally-defined
values.
Where not defined by the architecture, the registers of a protected vCPU
are reset to zero with the exception of the PC and X0 which can be set
either by the ``KVM_SET_ONE_REG`` interface or by a call to PSCI ``CPU_ON``.
VM runtime
----------
By default, memory pages mapped into a protected guest are inaccessible to the
host and any attempt by the host to access such a page will result in the
injection of an abort at EL1 by the hypervisor. For accesses originating from
EL0, the host will then terminate the current task with a ``SIGSEGV``.
pKVM exposes additional hypercalls to protected guests, primarily for the
purpose of establishing shared-memory regions with the host for communication
and I/O. These hypercalls are documented in hypercalls.rst.

View File

@ -7,19 +7,29 @@ PTP_KVM is used for high precision time sync between host and guests.
It relies on transferring the wall clock and counter value from the
host to the guest using a KVM-specific hypercall.
* ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: 0x86000001
``ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID``
----------------------------------------
This hypercall uses the SMC32/HVC32 calling convention:
Retrieve current time information for the specific counter. There are no
endianness restrictions.
ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID
============== ======== =====================================
Function ID: (uint32) 0x86000001
Arguments: (uint32) KVM_PTP_VIRT_COUNTER(0)
KVM_PTP_PHYS_COUNTER(1)
Return Values: (int32) NOT_SUPPORTED(-1) on error, or
(uint32) Upper 32 bits of wall clock time (r0)
(uint32) Lower 32 bits of wall clock time (r1)
(uint32) Upper 32 bits of counter (r2)
(uint32) Lower 32 bits of counter (r3)
Endianness: No Restrictions.
============== ======== =====================================
+---------------------+-------------------------------------------------------+
| Presence: | Optional |
+---------------------+-------------------------------------------------------+
| Calling convention: | HVC32 |
+---------------------+----------+--------------------------------------------+
| Function ID: | (uint32) | 0x86000001 |
+---------------------+----------+----+---------------------------------------+
| Arguments: | (uint32) | R1 | ``KVM_PTP_VIRT_COUNTER (0)`` |
| | | +---------------------------------------+
| | | | ``KVM_PTP_PHYS_COUNTER (1)`` |
+---------------------+----------+----+---------------------------------------+
| Return Values: | (int32) | R0 | ``NOT_SUPPORTED (-1)`` on error, else |
| | | | upper 32 bits of wall clock time |
| +----------+----+---------------------------------------+
| | (uint32) | R1 | Lower 32 bits of wall clock time |
| +----------+----+---------------------------------------+
| | (uint32) | R2 | Upper 32 bits of counter |
| +----------+----+---------------------------------------+
| | (uint32) | R3 | Lower 32 bits of counter |
+---------------------+----------+----+---------------------------------------+

View File

@ -7831,6 +7831,7 @@ M: Chao Yu <chao@kernel.org>
L: linux-f2fs-devel@lists.sourceforge.net
S: Maintained
W: https://f2fs.wiki.kernel.org/
B: https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
F: Documentation/ABI/testing/sysfs-fs-f2fs
F: Documentation/filesystems/f2fs.rst

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
PATCHLEVEL = 1
SUBLEVEL = 0
SUBLEVEL = 1
EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth
@ -2094,7 +2094,9 @@ $(clean-dirs):
clean: $(clean-dirs)
$(call cmd,rmfiles)
@find $(or $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \
@find $(or $(KBUILD_EXTMOD), .) \
$(if $(filter-out arch/$(SRCARCH)/boot/dts, $(dtstree)), $(dtstree)) \
$(RCS_FIND_IGNORE) \
\( -name '*.[aios]' -o -name '*.rsi' -o -name '*.ko' -o -name '.*.cmd' \
-o -name '*.ko.*' \
-o -name '*.dtb' -o -name '*.dtbo' -o -name '*.dtb.S' -o -name '*.dt.yaml' \

View File

@ -1 +1 @@
5a26ea7c4a053d84e22ddc713c092565ccf2a173
09ad10d4ee63f8983acad5515463dd202cc83054

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,538 @@
__cfg80211_alloc_event_skb
__cfg80211_alloc_reply_skb
__cfg80211_radar_event
__cfg80211_send_event_skb
__hci_cmd_send
__hci_cmd_sync
__hci_cmd_sync_ev
__hci_cmd_sync_sk
__hci_cmd_sync_status
__hci_cmd_sync_status_sk
__ieee80211_schedule_txq
__nfc_alloc_vendor_cmd_reply_skb
alloc_can_err_skb
alloc_can_skb
alloc_candev_mqs
alloc_canfd_skb
alloc_canxl_skb
arc4_crypt
arc4_setkey
baswap
bridge_tunnel_header
bt_accept_dequeue
bt_accept_enqueue
bt_accept_unlink
bt_debugfs
bt_err
bt_err_ratelimited
bt_info
bt_procfs_cleanup
bt_procfs_init
bt_sock_ioctl
bt_sock_link
bt_sock_poll
bt_sock_reclassify_lock
bt_sock_recvmsg
bt_sock_register
bt_sock_stream_recvmsg
bt_sock_unlink
bt_sock_unregister
bt_sock_wait_ready
bt_sock_wait_state
bt_status
bt_to_errno
bt_warn
bt_warn_ratelimited
btbcm_check_bdaddr
btbcm_finalize
btbcm_initialize
btbcm_patchram
btbcm_read_pcm_int_params
btbcm_set_bdaddr
btbcm_setup_apple
btbcm_setup_patchram
btbcm_write_pcm_int_params
can_bus_off
can_change_mtu
can_change_state
can_dropped_invalid_skb
can_eth_ioctl_hwts
can_ethtool_op_get_ts_info_hwts
can_fd_dlc2len
can_fd_len2dlc
can_free_echo_skb
can_get_echo_skb
can_get_state_str
can_proto_register
can_proto_unregister
can_put_echo_skb
can_rx_offload_add_fifo
can_rx_offload_add_manual
can_rx_offload_add_timestamp
can_rx_offload_del
can_rx_offload_enable
can_rx_offload_get_echo_skb
can_rx_offload_irq_finish
can_rx_offload_irq_offload_fifo
can_rx_offload_irq_offload_timestamp
can_rx_offload_queue_tail
can_rx_offload_queue_timestamp
can_rx_offload_threaded_irq_finish
can_rx_register
can_rx_unregister
can_send
can_skb_get_frame_len
can_sock_destruct
cfg80211_any_usable_channels
cfg80211_assoc_comeback
cfg80211_assoc_failure
cfg80211_auth_timeout
cfg80211_background_cac_abort
cfg80211_bss_color_notify
cfg80211_bss_flush
cfg80211_bss_iter
cfg80211_cac_event
cfg80211_calculate_bitrate
cfg80211_ch_switch_notify
cfg80211_ch_switch_started_notify
cfg80211_chandef_compatible
cfg80211_chandef_create
cfg80211_chandef_dfs_required
cfg80211_chandef_usable
cfg80211_chandef_valid
cfg80211_check_combinations
cfg80211_check_station_change
cfg80211_classify8021d
cfg80211_conn_failed
cfg80211_connect_done
cfg80211_control_port_tx_status
cfg80211_cqm_beacon_loss_notify
cfg80211_cqm_pktloss_notify
cfg80211_cqm_rssi_notify
cfg80211_cqm_txe_notify
cfg80211_crit_proto_stopped
cfg80211_del_sta_sinfo
cfg80211_disconnected
cfg80211_external_auth_request
cfg80211_find_elem_match
cfg80211_find_vendor_elem
cfg80211_free_nan_func
cfg80211_ft_event
cfg80211_get_bss
cfg80211_get_drvinfo
cfg80211_get_ies_channel_number
cfg80211_get_iftype_ext_capa
cfg80211_get_p2p_attr
cfg80211_get_station
cfg80211_gtk_rekey_notify
cfg80211_ibss_joined
cfg80211_iftype_allowed
cfg80211_inform_bss_data
cfg80211_inform_bss_frame_data
cfg80211_is_element_inherited
cfg80211_iter_combinations
cfg80211_merge_profile
cfg80211_mgmt_tx_status_ext
cfg80211_michael_mic_failure
cfg80211_nan_func_terminated
cfg80211_nan_match
cfg80211_new_sta
cfg80211_notify_new_peer_candidate
cfg80211_pmksa_candidate_notify
cfg80211_pmsr_complete
cfg80211_pmsr_report
cfg80211_port_authorized
cfg80211_probe_status
cfg80211_put_bss
cfg80211_ready_on_channel
cfg80211_ref_bss
cfg80211_reg_can_beacon
cfg80211_reg_can_beacon_relax
cfg80211_register_netdevice
cfg80211_remain_on_channel_expired
cfg80211_report_obss_beacon_khz
cfg80211_report_wowlan_wakeup
cfg80211_roamed
cfg80211_rx_assoc_resp
cfg80211_rx_control_port
cfg80211_rx_mgmt_ext
cfg80211_rx_mlme_mgmt
cfg80211_rx_spurious_frame
cfg80211_rx_unexpected_4addr_frame
cfg80211_rx_unprot_mlme_mgmt
cfg80211_scan_done
cfg80211_sched_scan_results
cfg80211_sched_scan_stopped
cfg80211_sched_scan_stopped_locked
cfg80211_send_layer2_update
cfg80211_shutdown_all_interfaces
cfg80211_sinfo_alloc_tid_stats
cfg80211_sta_opmode_change_notify
cfg80211_stop_iface
cfg80211_tdls_oper_request
cfg80211_tx_mgmt_expired
cfg80211_tx_mlme_mgmt
cfg80211_unlink_bss
cfg80211_unregister_wdev
cfg80211_update_owe_info_event
cfg80211_vendor_cmd_get_sender
cfg80211_vendor_cmd_reply
close_candev
free_candev
freq_reg_info
get_wiphy_regdom
h4_recv_buf
hci_alloc_dev_priv
hci_cmd_sync
hci_cmd_sync_cancel
hci_cmd_sync_queue
hci_conn_check_secure
hci_conn_security
hci_conn_switch_role
hci_free_dev
hci_get_route
hci_mgmt_chan_register
hci_mgmt_chan_unregister
hci_recv_diag
hci_recv_frame
hci_register_cb
hci_register_dev
hci_release_dev
hci_reset_dev
hci_resume_dev
hci_set_fw_info
hci_set_hw_info
hci_suspend_dev
hci_uart_register_device
hci_uart_tx_wakeup
hci_uart_unregister_device
hci_unregister_cb
hci_unregister_dev
hidp_hid_driver
ieee80211_alloc_hw_nm
ieee80211_amsdu_to_8023s
ieee80211_ap_probereq_get
ieee80211_ave_rssi
ieee80211_beacon_cntdwn_is_complete
ieee80211_beacon_get_template
ieee80211_beacon_get_tim
ieee80211_beacon_loss
ieee80211_beacon_set_cntdwn
ieee80211_beacon_update_cntdwn
ieee80211_bss_get_elem
ieee80211_calc_rx_airtime
ieee80211_calc_tx_airtime
ieee80211_chandef_to_operating_class
ieee80211_channel_switch_disconnect
ieee80211_channel_to_freq_khz
ieee80211_chswitch_done
ieee80211_color_change_finish
ieee80211_connection_loss
ieee80211_cqm_beacon_loss_notify
ieee80211_cqm_rssi_notify
ieee80211_csa_finish
ieee80211_ctstoself_duration
ieee80211_ctstoself_get
ieee80211_data_to_8023_exthdr
ieee80211_disable_rssi_reports
ieee80211_disconnect
ieee80211_enable_rssi_reports
ieee80211_find_sta
ieee80211_find_sta_by_ifaddr
ieee80211_find_sta_by_link_addrs
ieee80211_free_hw
ieee80211_free_txskb
ieee80211_freq_khz_to_channel
ieee80211_generic_frame_duration
ieee80211_get_bssid
ieee80211_get_buffered_bc
ieee80211_get_channel_khz
ieee80211_get_fils_discovery_tmpl
ieee80211_get_hdrlen_from_skb
ieee80211_get_key_rx_seq
ieee80211_get_mesh_hdrlen
ieee80211_get_num_supported_channels
ieee80211_get_response_rate
ieee80211_get_tkip_p1k_iv
ieee80211_get_tkip_p2k
ieee80211_get_tkip_rx_p1k
ieee80211_get_tx_rates
ieee80211_get_unsol_bcast_probe_resp_tmpl
ieee80211_get_vht_max_nss
ieee80211_gtk_rekey_add
ieee80211_gtk_rekey_notify
ieee80211_hdrlen
ieee80211_hw_restart_disconnect
ieee80211_ie_split_ric
ieee80211_iter_chan_contexts_atomic
ieee80211_iter_keys
ieee80211_iter_keys_rcu
ieee80211_iterate_active_interfaces_atomic
ieee80211_iterate_active_interfaces_mtx
ieee80211_iterate_interfaces
ieee80211_iterate_stations
ieee80211_iterate_stations_atomic
ieee80211_key_mic_failure
ieee80211_key_replay
ieee80211_manage_rx_ba_offl
ieee80211_mandatory_rates
ieee80211_mark_rx_ba_filtered_frames
ieee80211_nan_func_match
ieee80211_nan_func_terminated
ieee80211_next_txq
ieee80211_nullfunc_get
ieee80211_operating_class_to_band
ieee80211_parse_p2p_noa
ieee80211_probereq_get
ieee80211_proberesp_get
ieee80211_pspoll_get
ieee80211_queue_delayed_work
ieee80211_queue_stopped
ieee80211_queue_work
ieee80211_radar_detected
ieee80211_radiotap_iterator_init
ieee80211_radiotap_iterator_next
ieee80211_rate_control_register
ieee80211_rate_control_unregister
ieee80211_ready_on_channel
ieee80211_register_hw
ieee80211_remain_on_channel_expired
ieee80211_remove_key
ieee80211_report_low_ack
ieee80211_report_wowlan_wakeup
ieee80211_request_smps
ieee80211_reserve_tid
ieee80211_restart_hw
ieee80211_resume_disconnect
ieee80211_rts_duration
ieee80211_rts_get
ieee80211_rx_ba_timer_expired
ieee80211_rx_irqsafe
ieee80211_rx_list
ieee80211_rx_napi
ieee80211_s1g_channel_width
ieee80211_scan_completed
ieee80211_sched_scan_results
ieee80211_sched_scan_stopped
ieee80211_send_bar
ieee80211_send_eosp_nullfunc
ieee80211_set_active_links
ieee80211_set_active_links_async
ieee80211_set_key_rx_seq
ieee80211_sta_block_awake
ieee80211_sta_eosp
ieee80211_sta_ps_transition
ieee80211_sta_pspoll
ieee80211_sta_recalc_aggregates
ieee80211_sta_register_airtime
ieee80211_sta_set_buffered
ieee80211_sta_uapsd_trigger
ieee80211_start_tx_ba_cb_irqsafe
ieee80211_start_tx_ba_session
ieee80211_stop_queue
ieee80211_stop_queues
ieee80211_stop_rx_ba_session
ieee80211_stop_tx_ba_cb_irqsafe
ieee80211_stop_tx_ba_session
ieee80211_tdls_oper_request
ieee80211_tkip_add_iv
ieee80211_tx_dequeue
ieee80211_tx_prepare_skb
ieee80211_tx_rate_update
ieee80211_tx_status
ieee80211_tx_status_8023
ieee80211_tx_status_ext
ieee80211_tx_status_irqsafe
ieee80211_txq_airtime_check
ieee80211_txq_get_depth
ieee80211_txq_may_transmit
ieee80211_txq_schedule_start
ieee80211_unregister_hw
ieee80211_unreserve_tid
ieee80211_update_mu_groups
ieee80211_update_p2p_noa
ieee80211_vif_to_wdev
ieee80211_wake_queue
ieee80211_wake_queues
ieee802154_alloc_hw
ieee802154_configure_durations
ieee802154_free_hw
ieee802154_hdr_peek
ieee802154_hdr_peek_addrs
ieee802154_hdr_pull
ieee802154_hdr_push
ieee802154_max_payload
ieee802154_register_hw
ieee802154_rx_irqsafe
ieee802154_stop_queue
ieee802154_unregister_hw
ieee802154_wake_queue
ieee802154_xmit_complete
ieee802154_xmit_error
ieee802154_xmit_hw_error
ieeee80211_obss_color_collision_notify
l2cap_add_psm
l2cap_chan_close
l2cap_chan_connect
l2cap_chan_create
l2cap_chan_del
l2cap_chan_list
l2cap_chan_put
l2cap_chan_send
l2cap_chan_set_defaults
l2cap_conn_get
l2cap_conn_put
l2cap_is_socket
l2cap_register_user
l2cap_unregister_user
l2tp_recv_common
l2tp_session_create
l2tp_session_dec_refcount
l2tp_session_delete
l2tp_session_get
l2tp_session_get_by_ifname
l2tp_session_get_nth
l2tp_session_inc_refcount
l2tp_session_register
l2tp_session_set_header_len
l2tp_sk_to_tunnel
l2tp_tunnel_create
l2tp_tunnel_dec_refcount
l2tp_tunnel_delete
l2tp_tunnel_get
l2tp_tunnel_get_nth
l2tp_tunnel_get_session
l2tp_tunnel_inc_refcount
l2tp_tunnel_register
l2tp_udp_encap_recv
l2tp_xmit_skb
lowpan_header_compress
lowpan_header_decompress
lowpan_nhc_add
lowpan_nhc_del
lowpan_register_netdev
lowpan_register_netdevice
lowpan_unregister_netdev
lowpan_unregister_netdevice
nfc_add_se
nfc_alloc_recv_skb
nfc_allocate_device
nfc_class
nfc_dep_link_is_up
nfc_driver_failure
nfc_find_se
nfc_fw_download_done
nfc_get_local_general_bytes
nfc_proto_register
nfc_proto_unregister
nfc_register_device
nfc_remove_se
nfc_se_connectivity
nfc_se_transaction
nfc_send_to_raw_sock
nfc_set_remote_general_bytes
nfc_target_lost
nfc_targets_found
nfc_tm_activated
nfc_tm_data_received
nfc_tm_deactivated
nfc_unregister_device
nfc_vendor_cmd_reply
of_can_transceiver
open_candev
ppp_channel_index
ppp_dev_name
ppp_input
ppp_input_error
ppp_output_wakeup
ppp_register_channel
ppp_register_compressor
ppp_register_net_channel
ppp_unit_number
ppp_unregister_channel
ppp_unregister_compressor
pppox_compat_ioctl
pppox_ioctl
pppox_unbind_sock
qca_read_soc_version
qca_send_pre_shutdown_cmd
qca_set_bdaddr
qca_set_bdaddr_rome
qca_uart_setup
rate_control_set_rates
reg_initiator_name
reg_query_regdb_wmm
register_candev
register_pppox_proto
regulatory_hint
regulatory_pre_cac_allowed
regulatory_set_wiphy_regd
regulatory_set_wiphy_regd_sync
rfc1042_header
rfkill_alloc
rfkill_blocked
rfkill_destroy
rfkill_find_type
rfkill_get_led_trigger_name
rfkill_init_sw_state
rfkill_pause_polling
rfkill_register
rfkill_resume_polling
rfkill_set_hw_state_reason
rfkill_set_led_trigger_name
rfkill_set_states
rfkill_set_sw_state
rfkill_soft_blocked
rfkill_unregister
safe_candev_priv
slhc_compress
slhc_free
slhc_init
slhc_remember
slhc_toss
slhc_uncompress
tipc_dump_done
tipc_dump_start
tipc_nl_sk_walk
tipc_sk_fill_sock_diag
unregister_candev
unregister_pppox_proto
usb_serial_claim_interface
usb_serial_deregister_drivers
usb_serial_generic_chars_in_buffer
usb_serial_generic_close
usb_serial_generic_get_icount
usb_serial_generic_open
usb_serial_generic_process_read_urb
usb_serial_generic_read_bulk_callback
usb_serial_generic_resume
usb_serial_generic_submit_read_urbs
usb_serial_generic_throttle
usb_serial_generic_tiocmiwait
usb_serial_generic_unthrottle
usb_serial_generic_wait_until_sent
usb_serial_generic_write
usb_serial_generic_write_bulk_callback
usb_serial_generic_write_start
usb_serial_handle_dcd_change
usb_serial_port_softint
usb_serial_register_drivers
usb_serial_resume
usb_serial_suspend
wdev_chandef
wdev_to_ieee80211_vif
wiphy_apply_custom_regulatory
wiphy_free
wiphy_new_nm
wiphy_read_of_freq_limits
wiphy_register
wiphy_rfkill_set_hw_state_reason
wiphy_rfkill_start_polling
wiphy_to_ieee80211_hw
wiphy_unregister
wpan_phy_find
wpan_phy_for_each
wpan_phy_free
wpan_phy_new
wpan_phy_register
wpan_phy_unregister

View File

@ -0,0 +1,47 @@
drivers/bluetooth/btbcm.ko
drivers/bluetooth/btqca.ko
drivers/bluetooth/btsdio.ko
drivers/bluetooth/hci_uart.ko
drivers/net/can/dev/can-dev.ko
drivers/net/can/slcan/slcan.ko
drivers/net/can/vcan.ko
drivers/net/ppp/bsd_comp.ko
drivers/net/ppp/ppp_deflate.ko
drivers/net/ppp/ppp_generic.ko
drivers/net/ppp/ppp_mppe.ko
drivers/net/ppp/pppox.ko
drivers/net/ppp/pptp.ko
drivers/net/slip/slhc.ko
drivers/usb/class/cdc-acm.ko
drivers/usb/serial/ftdi_sio.ko
drivers/usb/serial/usbserial.ko
lib/crypto/libarc4.ko
net/6lowpan/6lowpan.ko
net/6lowpan/nhc_dest.ko
net/6lowpan/nhc_fragment.ko
net/6lowpan/nhc_hop.ko
net/6lowpan/nhc_ipv6.ko
net/6lowpan/nhc_mobility.ko
net/6lowpan/nhc_routing.ko
net/6lowpan/nhc_udp.ko
net/8021q/8021q.ko
net/bluetooth/bluetooth.ko
net/bluetooth/hidp/hidp.ko
net/bluetooth/rfcomm/rfcomm.ko
net/can/can.ko
net/can/can-bcm.ko
net/can/can-gw.ko
net/can/can-raw.ko
net/ieee802154/6lowpan/ieee802154_6lowpan.ko
net/ieee802154/ieee802154.ko
net/ieee802154/ieee802154_socket.ko
net/l2tp/l2tp_core.ko
net/l2tp/l2tp_ppp.ko
net/mac80211/mac80211.ko
net/mac802154/mac802154.ko
net/nfc/nfc.ko
net/rfkill/rfkill.ko
net/tipc/diag.ko
net/tipc/tipc.ko
net/wireless/cfg80211.ko

View File

@ -1316,6 +1316,9 @@ config RELR
config ARCH_HAS_MEM_ENCRYPT
bool
config ARCH_HAS_MEM_RELINQUISH
bool
config ARCH_HAS_CC_PLATFORM
bool

View File

@ -6,5 +6,6 @@
void kvm_init_hyp_services(void);
bool kvm_arm_hyp_service_available(u32 func_id);
void kvm_arm_init_hyp_services(void);
#endif

View File

@ -28,9 +28,12 @@ config ARM64
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_IOREMAP_PHYS_HOOKS
select ARCH_HAS_KCOV
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEM_RELINQUISH
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP
select ARCH_HAS_PTE_SPECIAL
@ -141,6 +144,7 @@ config ARM64
select GENERIC_GETTIMEOFDAY
select GENERIC_VDSO_TIME_NS
select HARDIRQS_SW_RESEND
select HAVE_MOD_ARCH_SPECIFIC if (ARM64_MODULE_PLTS || KVM)
select HAVE_MOVE_PMD
select HAVE_MOVE_PUD
select HAVE_PCI
@ -2058,7 +2062,6 @@ config ARM64_SME
config ARM64_MODULE_PLTS
bool "Use PLTs to allow module memory to spill over into vmalloc area"
depends on MODULES
select HAVE_MOD_ARCH_SPECIFIC
help
Allocate PLTs when loading modules so that jumps and calls whose
targets are too far away for their relative offsets to be encoded
@ -2204,6 +2207,12 @@ config CMDLINE_FROM_BOOTLOADER
the boot loader doesn't provide any, the default kernel command
string provided in CMDLINE will be used.
config CMDLINE_EXTEND
bool "Extend bootloader kernel arguments"
help
The command-line arguments provided by the boot loader will be
appended to the default kernel command string.
config CMDLINE_FORCE
bool "Always use the default kernel command string"
help

View File

@ -190,6 +190,11 @@ ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS),y)
endif
endif
ifeq ($(CONFIG_KVM),y)
archscripts:
$(Q)$(MAKE) $(build)=arch/arm64/tools gen-hyprel
endif
ifeq ($(KBUILD_EXTMOD),)
# We need to generate vdso-offsets.h before compiling certain files in kernel/.
# In order to do that, we should use the archprepare target, but we can't since

View File

@ -0,0 +1,49 @@
# SPDX-License-Identifier: GPL-2.0
#
# This file is included by the generic Kbuild makefile to permit the
# architecture to perform postlink actions on vmlinux and any .ko module file.
# In this case, we only need it for fips140.ko, which needs some postprocessing
# for the integrity check mandated by FIPS. This involves making copies of the
# relocation sections so that the module will have access to them at
# initialization time, and calculating and injecting a HMAC digest into the
# module. All other targets are NOPs.
#
PHONY := __archpost
__archpost:
-include include/config/auto.conf
include scripts/Kbuild.include
CMD_FIPS140_GEN_HMAC = crypto/fips140_gen_hmac
quiet_cmd_gen_hmac = HMAC $@
cmd_gen_hmac = $(OBJCOPY) $@ \
--dump-section=$(shell $(READELF) -SW $@|grep -Eo '\.rela\.text\S*')=$@.rela.text \
--dump-section=$(shell $(READELF) -SW $@|grep -Eo '\.rela\.rodata\S*')=$@.rela.rodata && \
$(OBJCOPY) $@ \
--add-section=.init.rela.text=$@.rela.text \
--add-section=.init.rela.rodata=$@.rela.rodata \
--set-section-flags=.init.rela.text=alloc,readonly \
--set-section-flags=.init.rela.rodata=alloc,readonly && \
$(CMD_FIPS140_GEN_HMAC) $@
# `@true` prevents complaints when there is nothing to be done
vmlinux: FORCE
@true
$(objtree)/crypto/fips140.ko: FORCE
$(call cmd,gen_hmac)
%.ko: FORCE
@true
clean:
rm -f $(objtree)/crypto/fips140.ko.rela.*
PHONY += FORCE clean
FORCE:
.PHONY: $(PHONY)

View File

@ -0,0 +1,2 @@
CONFIG_CRYPTO_FIPS140_MOD=m
# CONFIG_MODULE_SIG_ALL is not set

View File

@ -1,5 +1,4 @@
CONFIG_UAPI_HEADER_TEST=y
CONFIG_LOCALVERSION="-mainline"
CONFIG_AUDIT=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
@ -31,6 +30,7 @@ CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_BPF=y
CONFIG_NAMESPACES=y
# CONFIG_PID_NS is not set
CONFIG_RT_SOFTIRQ_AWARE_SCHED=y
# CONFIG_RD_BZIP2 is not set
# CONFIG_RD_LZMA is not set
# CONFIG_RD_XZ is not set
@ -56,7 +56,8 @@ CONFIG_CP15_BARRIER_EMULATION=y
CONFIG_SETEND_EMULATION=y
CONFIG_RANDOMIZE_BASE=y
# CONFIG_RANDOMIZE_MODULE_REGION_FULL is not set
CONFIG_CMDLINE="console=ttynull stack_depot_disable=on cgroup_disable=pressure kasan.stacktrace=off kvm-arm.mode=protected bootconfig ioremap_guard"
CONFIG_CMDLINE="console=ttynull stack_depot_disable=on cgroup_disable=pressure kasan.page_alloc.sample=10 kasan.stacktrace=off kvm-arm.mode=protected bootconfig ioremap_guard"
CONFIG_CMDLINE_EXTEND=y
# CONFIG_DMI is not set
CONFIG_PM_WAKELOCKS=y
CONFIG_PM_WAKELOCKS_LIMIT=0
@ -75,9 +76,11 @@ CONFIG_ARM_SCPI_CPUFREQ=y
CONFIG_ARM_SCMI_CPUFREQ=y
CONFIG_VIRTUALIZATION=y
CONFIG_KVM=y
CONFIG_KVM_S2MPU=y
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_SHADOW_CALL_STACK=y
CONFIG_CFI_CLANG=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
@ -140,6 +143,7 @@ CONFIG_IPV6_MROUTE=y
CONFIG_NETFILTER=y
CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_AMANDA=y
CONFIG_NF_CONNTRACK_FTP=y
@ -566,6 +570,7 @@ CONFIG_QUOTA=y
CONFIG_QFMT_V2=y
CONFIG_FUSE_FS=y
CONFIG_VIRTIO_FS=y
CONFIG_FUSE_BPF=y
CONFIG_OVERLAY_FS=y
CONFIG_INCREMENTAL_FS=y
CONFIG_MSDOS_FS=y

View File

@ -0,0 +1,52 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# Create a separate FIPS archive that duplicates the modules that are relevant
# for FIPS 140 certification as builtin objects
#
sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
aes-ce-blk-y := aes-glue-ce.o aes-ce.o
aes-neon-blk-y := aes-glue-neon.o aes-neon.o
sha256-arm64-y := sha256-glue.o sha256-core.o
sha512-arm64-y := sha512-glue.o sha512-core.o
aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
crypto-arm64-fips-src := $(srctree)/arch/arm64/crypto/
crypto-arm64-fips-modules := sha1-ce.o sha2-ce.o sha512-ce.o ghash-ce.o \
aes-ce-cipher.o aes-ce-blk.o aes-neon-blk.o \
sha256-arm64.o sha512-arm64.o aes-arm64.o \
aes-neon-bs.o
crypto-fips-objs += $(foreach o,$(crypto-arm64-fips-modules),$($(o:.o=-y):.o=-fips-arch.o))
CFLAGS_aes-glue-ce-fips-arch.o := -DUSE_V8_CRYPTO_EXTENSIONS
$(obj)/aes-glue-%-fips-arch.o: KBUILD_CFLAGS += $(FIPS140_CFLAGS)
$(obj)/aes-glue-%-fips-arch.o: $(crypto-arm64-fips-src)/aes-glue.c FORCE
$(call if_changed_rule,cc_o_c)
$(obj)/%-fips-arch.o: KBUILD_CFLAGS += $(FIPS140_CFLAGS)
$(obj)/%-fips-arch.o: $(crypto-arm64-fips-src)/%.c FORCE
$(call if_changed_rule,cc_o_c)
$(obj)/%-fips-arch.o: $(crypto-arm64-fips-src)/%.S FORCE
$(call if_changed_rule,as_o_S)
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $(<) void $(@)
$(obj)/%-core.S: $(crypto-arm64-fips-src)/%-armv8.pl
$(call cmd,perlasm)
$(obj)/sha256-core.S: $(crypto-arm64-fips-src)/sha512-armv8.pl
$(call cmd,perlasm)
clean-files += sha256-core.S sha512-core.S
$(obj)/%-fips-arch.o: $(obj)/%.S FORCE
$(call if_changed_rule,as_o_S)

View File

@ -19,6 +19,7 @@
#error "cpucaps have overflown ARM64_CB_BIT"
#endif
#ifndef BUILD_FIPS140_KO
#ifndef __ASSEMBLY__
#include <linux/stringify.h>
@ -261,4 +262,50 @@ alternative_has_feature_unlikely(unsigned long feature)
#endif /* __ASSEMBLY__ */
#else
/*
* The FIPS140 module does not support alternatives patching, as this
* invalidates the HMAC digest of the .text section. However, some alternatives
* are known to be irrelevant so we can tolerate them in the FIPS140 module, as
* they will never be applied in the first place in the use cases that the
* FIPS140 module targets (Android running on a production phone). Any other
* uses of alternatives should be avoided, as it is not safe in the general
* case to simply use the default sequence in one place (the fips module) and
* the alternative sequence everywhere else.
*
* Below is an allowlist of features that we can ignore, by simply taking the
* safe default instruction sequence. Note that this implies that the FIPS140
* module is not compatible with VHE, or with pseudo-NMI support.
*/
#define __ALT_ARM64_HAS_LDAPR 0,
#define __ALT_ARM64_HAS_VIRT_HOST_EXTN 0,
#define __ALT_ARM64_HAS_IRQ_PRIO_MASKING 0,
#define ALTERNATIVE(oldinstr, newinstr, feature, ...) \
_ALTERNATIVE(oldinstr, __ALT_ ## feature, #feature)
#define _ALTERNATIVE(oldinstr, feature, feature_str) \
__take_second_arg(feature oldinstr, \
".err Feature " feature_str " not supported in fips140 module")
#ifndef __ASSEMBLY__
#include <linux/types.h>
static __always_inline bool
alternative_has_feature_likely(unsigned long feature)
{
return feature == ARM64_HAS_LDAPR ||
feature == ARM64_HAS_VIRT_HOST_EXTN ||
feature == ARM64_HAS_IRQ_PRIO_MASKING;
}
#define alternative_has_feature_unlikely alternative_has_feature_likely
#endif /* !__ASSEMBLY__ */
#endif /* BUILD_FIPS140_KO */
#endif /* __ASM_ALTERNATIVE_MACROS_H */

View File

@ -109,6 +109,8 @@ void __init early_fixmap_init(void);
extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
extern pte_t *__get_fixmap_pte(enum fixed_addresses idx);
#include <asm-generic/fixmap.h>
#endif /* !__ASSEMBLY__ */

View File

@ -6,5 +6,14 @@
void kvm_init_hyp_services(void);
bool kvm_arm_hyp_service_available(u32 func_id);
void kvm_arm_init_hyp_services(void);
void kvm_init_memshare_services(void);
void kvm_init_ioremap_services(void);
#ifdef CONFIG_MEMORY_RELINQUISH
void kvm_init_memrelinquish_services(void);
#else
static inline void kvm_init_memrelinquish_services(void) {}
#endif
#endif

View File

@ -0,0 +1,29 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2022 - Google LLC
*/
#ifndef __IO_MPT_S2MPU_H__
#define __IO_MPT_S2MPU_H__
#include <linux/bitfield.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_s2mpu.h>
struct s2mpu_mpt_cfg {
enum s2mpu_version version;
};
struct s2mpu_mpt_ops {
u32 (*smpt_size)(void);
void (*init_with_prot)(void *dev_va, enum mpt_prot prot);
void (*init_with_mpt)(void *dev_va, struct mpt *mpt);
void (*apply_range)(void *dev_va, struct mpt *mpt, u32 first_gb, u32 last_gb);
void (*prepare_range)(struct mpt *mpt, phys_addr_t first_byte,
phys_addr_t last_byte, enum mpt_prot prot);
int (*pte_from_addr_smpt)(u32 *smpt, u64 addr);
};
const struct s2mpu_mpt_ops *s2mpu_get_mpt_ops(struct s2mpu_mpt_cfg cfg);
#endif /* __IO_MPT_S2MPU_H__ */

View File

@ -135,7 +135,7 @@
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
* The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
* The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu.
*
* Note that when using 4K pages, we concatenate two first level page tables
* together. With 16K pages, we concatenate 16 first level page tables.
@ -344,6 +344,8 @@
#define PAR_TO_HPFAR(par) \
(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
#define FAR_MASK GENMASK_ULL(11, 0)
#define ECN(x) { ESR_ELx_EC_##x, #x }
#define kvm_arm_exception_class \
@ -361,4 +363,13 @@
#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
CPACR_EL1_ZEN_EL1EN)
/*
* ARMv8 Reset Values
*/
#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
PSR_F_BIT | PSR_D_BIT)
#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
PSR_AA32_I_BIT | PSR_AA32_F_BIT)
#endif /* __ARM64_KVM_ARM_H__ */

View File

@ -59,23 +59,44 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize,
/* Hypercalls available after pKVM finalisation */
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_reclaim_page,
__KVM_HOST_SMCCC_FUNC___pkvm_host_map_guest,
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
__KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
__KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_sync_state,
__KVM_HOST_SMCCC_FUNC___pkvm_iommu_driver_init,
__KVM_HOST_SMCCC_FUNC___pkvm_iommu_register,
__KVM_HOST_SMCCC_FUNC___pkvm_iommu_pm_notify,
__KVM_HOST_SMCCC_FUNC___pkvm_iommu_finalize,
__KVM_HOST_SMCCC_FUNC___pkvm_register_hcall,
__KVM_HOST_SMCCC_FUNC___pkvm_alloc_module_va,
__KVM_HOST_SMCCC_FUNC___pkvm_map_module_page,
__KVM_HOST_SMCCC_FUNC___pkvm_unmap_module_page,
__KVM_HOST_SMCCC_FUNC___pkvm_init_module,
__KVM_HOST_SMCCC_FUNC___pkvm_close_module_registration,
/*
* Start of the dynamically registered hypercalls. Start a bit
* further, just in case some modules...
*/
__KVM_HOST_SMCCC_FUNC___dynamic_hcalls = 128,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
@ -106,7 +127,7 @@ enum __kvm_host_smccc_func {
#define per_cpu_ptr_nvhe_sym(sym, cpu) \
({ \
unsigned long base, off; \
base = kvm_arm_hyp_percpu_base[cpu]; \
base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \
off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \
(unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \
base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \
@ -211,7 +232,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init)
#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector)
extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[];
DECLARE_KVM_NVHE_SYM(__per_cpu_start);
DECLARE_KVM_NVHE_SYM(__per_cpu_end);
@ -231,8 +252,6 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_gic_config(void);
extern u64 __vgic_v3_read_vmcr(void);
extern void __vgic_v3_write_vmcr(u32 vmcr);
extern void __vgic_v3_init_lrs(void);
extern u64 __kvm_get_mdcr_el2(void);

View File

@ -42,6 +42,11 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
unsigned long get_except64_offset(unsigned long psr, unsigned long target_mode,
enum exception_type type);
unsigned long get_except64_cpsr(unsigned long old, bool has_mte,
unsigned long sctlr, unsigned long mode);
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
@ -490,4 +495,61 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
return test_bit(feature, vcpu->arch.features);
}
static inline int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
{
/*
* For now make sure that both address/generic pointer authentication
* features are requested by the userspace together and the system
* supports these capabilities.
*/
if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) ||
!vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC) ||
!system_has_full_ptr_auth())
return -EINVAL;
vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
return 0;
}
/* Reset a vcpu's core registers. */
static inline void kvm_reset_vcpu_core(struct kvm_vcpu *vcpu)
{
u32 pstate;
if (vcpu_el1_is_32bit(vcpu)) {
pstate = VCPU_RESET_PSTATE_SVC;
} else {
pstate = VCPU_RESET_PSTATE_EL1;
}
/* Reset core registers */
memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
vcpu->arch.ctxt.spsr_abt = 0;
vcpu->arch.ctxt.spsr_und = 0;
vcpu->arch.ctxt.spsr_irq = 0;
vcpu->arch.ctxt.spsr_fiq = 0;
vcpu_gp_regs(vcpu)->pstate = pstate;
}
/* PSCI reset handling for a vcpu. */
static inline void kvm_reset_vcpu_psci(struct kvm_vcpu *vcpu,
struct vcpu_reset_state *reset_state)
{
unsigned long target_pc = reset_state->pc;
/* Gracefully handle Thumb2 entry point */
if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
target_pc &= ~1UL;
vcpu_set_thumb(vcpu);
}
/* Propagate caller endianness */
if (reset_state->be)
kvm_vcpu_set_be(vcpu);
*vcpu_pc(vcpu) = target_pc;
vcpu_set_reg(vcpu, 0, reset_state->r0);
}
#endif /* __ARM64_KVM_EMULATE_H__ */

View File

@ -73,6 +73,64 @@ u32 __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
struct kvm_hyp_memcache {
phys_addr_t head;
unsigned long nr_pages;
};
static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
phys_addr_t *p,
phys_addr_t (*to_pa)(void *virt))
{
*p = mc->head;
mc->head = to_pa(p);
mc->nr_pages++;
}
static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
void *(*to_va)(phys_addr_t phys))
{
phys_addr_t *p = to_va(mc->head);
if (!mc->nr_pages)
return NULL;
mc->head = *p;
mc->nr_pages--;
return p;
}
static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc,
unsigned long min_pages,
void *(*alloc_fn)(void *arg),
phys_addr_t (*to_pa)(void *virt),
void *arg)
{
while (mc->nr_pages < min_pages) {
phys_addr_t *p = alloc_fn(arg);
if (!p)
return -ENOMEM;
push_hyp_memcache(mc, p, to_pa);
}
return 0;
}
static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc,
void (*free_fn)(void *virt, void *arg),
void *(*to_va)(phys_addr_t phys),
void *arg)
{
while (mc->nr_pages)
free_fn(pop_hyp_memcache(mc, to_va), arg);
}
void free_hyp_memcache(struct kvm_hyp_memcache *mc, struct kvm *kvm);
void free_hyp_stage2_memcache(struct kvm_hyp_memcache *mc, struct kvm *kvm);
int topup_hyp_memcache(struct kvm_vcpu *vcpu);
struct kvm_vmid {
atomic64_t id;
};
@ -115,6 +173,23 @@ struct kvm_smccc_features {
unsigned long vendor_hyp_bmap;
};
struct kvm_pinned_page {
struct rb_node node;
struct page *page;
u64 ipa;
};
typedef unsigned int pkvm_handle_t;
struct kvm_protected_vm {
pkvm_handle_t handle;
struct kvm_hyp_memcache teardown_mc;
struct kvm_hyp_memcache teardown_stage2_mc;
struct rb_root pinned_pages;
gpa_t pvmfw_load_addr;
bool enabled;
};
struct kvm_arch {
struct kvm_s2_mmu mmu;
@ -149,7 +224,8 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_EL1_32BIT 4
/* PSCI SYSTEM_SUSPEND enabled for the guest */
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
/* Guest has bought into the MMIO guard extension */
#define KVM_ARCH_FLAG_MMIO_GUARD 6
unsigned long flags;
/*
@ -166,6 +242,12 @@ struct kvm_arch {
/* Hypercall features firmware registers' descriptor */
struct kvm_smccc_features smccc_feat;
/*
* For an untrusted host VM, 'pkvm.handle' is used to lookup
* the associated pKVM instance in the hypervisor.
*/
struct kvm_protected_vm pkvm;
};
struct kvm_vcpu_fault_info {
@ -277,6 +359,7 @@ struct kvm_host_data {
struct kvm_host_psci_config {
/* PSCI version used by host. */
u32 version;
u32 smccc_version;
/* Function IDs used by host if version is v0.1. */
struct psci_0_1_function_ids function_ids_0_1;
@ -296,6 +379,35 @@ extern s64 kvm_nvhe_sym(hyp_physvirt_offset);
extern u64 kvm_nvhe_sym(hyp_cpu_logical_map)[NR_CPUS];
#define hyp_cpu_logical_map CHOOSE_NVHE_SYM(hyp_cpu_logical_map)
enum pkvm_iommu_pm_event {
PKVM_IOMMU_PM_SUSPEND,
PKVM_IOMMU_PM_RESUME,
};
struct pkvm_iommu_ops;
struct pkvm_iommu_driver {
const struct pkvm_iommu_ops *ops;
struct list_head list;
atomic_t state;
};
extern struct pkvm_iommu_driver kvm_nvhe_sym(pkvm_s2mpu_driver);
extern struct pkvm_iommu_driver kvm_nvhe_sym(pkvm_sysmmu_sync_driver);
int pkvm_iommu_driver_init(struct pkvm_iommu_driver *drv, void *data, size_t size);
int pkvm_iommu_register(struct device *dev, struct pkvm_iommu_driver *drv,
phys_addr_t pa, size_t size, struct device *parent);
int pkvm_iommu_suspend(struct device *dev);
int pkvm_iommu_resume(struct device *dev);
int pkvm_iommu_s2mpu_init(u32 version);
int pkvm_iommu_s2mpu_register(struct device *dev, phys_addr_t pa);
int pkvm_iommu_sysmmu_sync_register(struct device *dev, phys_addr_t pa,
struct device *parent);
/* Reject future calls to pkvm_iommu_driver_init() and pkvm_iommu_register(). */
int pkvm_iommu_finalize(void);
struct vcpu_reset_state {
unsigned long pc;
unsigned long r0;
@ -399,8 +511,12 @@ struct kvm_vcpu_arch {
/* vcpu power state */
struct kvm_mp_state mp_state;
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
union {
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
/* Pages to be donated to pkvm/EL2 if it runs out */
struct kvm_hyp_memcache pkvm_memcache;
};
/* Target CPU and feature flags */
int target;
@ -474,9 +590,25 @@ struct kvm_vcpu_arch {
*fset &= ~(m); \
} while (0)
#define __vcpu_copy_flag(vt, vs, flagset, f, m) \
do { \
typeof(vs->arch.flagset) tmp, val; \
\
__build_check_flag(vs, flagset, f, m); \
\
val = READ_ONCE(vs->arch.flagset); \
val &= (m); \
tmp = READ_ONCE(vt->arch.flagset); \
tmp &= ~(m); \
tmp |= val; \
WRITE_ONCE(vt->arch.flagset, tmp); \
} while (0)
#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
#define vcpu_copy_flag(vt, vs,...) __vcpu_copy_flag((vt), (vs), __VA_ARGS__)
/* SVE exposed to guest */
#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0))
@ -494,6 +626,8 @@ struct kvm_vcpu_arch {
#define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1))
/* Target EL/MODE (not a single flag, but let's abuse the macro) */
#define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1))
/* Cover both PENDING_EXCEPTION and EXCEPT_MASK for global operations */
#define PC_UPDATE_REQ __vcpu_single_flag(iflags, GENMASK(3, 0))
/* Helpers to encode exceptions with minimum fuss */
#define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK)
@ -525,6 +659,8 @@ struct kvm_vcpu_arch {
#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
/* Save TRBE context if active */
#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
/* pKVM host vcpu state is dirty, needs resync */
#define PKVM_HOST_STATE_DIRTY __vcpu_single_flag(iflags, BIT(7))
/* SVE enabled for host EL0 */
#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
@ -601,9 +737,6 @@ struct kvm_vcpu_arch {
#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r)))
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
{
/*
@ -695,8 +828,32 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
return true;
}
#define vcpu_read_sys_reg(__vcpu, reg) \
({ \
u64 __val = 0x8badf00d8badf00d; \
\
/* SYSREGS_ON_CPU is only used in VHE */ \
((!is_nvhe_hyp_code() && \
vcpu_get_flag(__vcpu, SYSREGS_ON_CPU) && \
__vcpu_read_sys_reg_from_cpu(reg, &__val))) ? \
__val \
: \
ctxt_sys_reg(&__vcpu->arch.ctxt, reg); \
})
#define vcpu_write_sys_reg(__vcpu, __val, reg) \
do { \
/* SYSREGS_ON_CPU is only used in VHE */ \
if (is_nvhe_hyp_code() || \
!vcpu_get_flag(__vcpu, SYSREGS_ON_CPU) || \
!__vcpu_write_sys_reg_to_cpu(__val, reg)) \
ctxt_sys_reg(&__vcpu->arch.ctxt, reg) = __val; \
} while (0)
struct kvm_vm_stat {
struct kvm_vm_stat_generic generic;
atomic64_t protected_hyp_mem;
atomic64_t protected_shared_mem;
};
struct kvm_vcpu_stat {
@ -869,9 +1026,26 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
#define __vcpu_save_guest_debug_regs(vcpu) \
do { \
u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1); \
\
(vcpu)->arch.guest_debug_preserved.mdscr_el1 = val; \
} while(0)
#define __vcpu_restore_guest_debug_regs(vcpu) \
do { \
u64 val = (vcpu)->arch.guest_debug_preserved.mdscr_el1; \
\
vcpu_write_sys_reg(vcpu, val, MDSCR_EL1); \
} while (0)
#define kvm_vcpu_os_lock_enabled(vcpu) \
(!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK))
#define kvm_vcpu_needs_debug_regs(vcpu) \
((vcpu)->guest_debug || kvm_vcpu_os_lock_enabled(vcpu))
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
@ -915,12 +1089,7 @@ int kvm_set_ipa_limit(void);
#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
static inline bool kvm_vm_is_protected(struct kvm *kvm)
{
return false;
}
#define kvm_vm_is_protected(kvm) ((kvm)->arch.pkvm.enabled)
void kvm_init_protected_traps(struct kvm_vcpu *vcpu);

View File

@ -15,6 +15,9 @@
DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
DECLARE_PER_CPU(int, hyp_cpu_number);
#define hyp_smp_processor_id() (__this_cpu_read(hyp_cpu_number))
#define read_sysreg_elx(r,nvh,vh) \
({ \
@ -61,8 +64,8 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
#ifdef __KVM_NVHE_HYPERVISOR__
@ -90,6 +93,7 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
void __sve_save_state(void *sve_pffr, u32 *fpsr);
void __sve_restore_state(void *sve_pffr, u32 *fpsr);
#ifndef __KVM_NVHE_HYPERVISOR__
@ -123,4 +127,10 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
extern unsigned long kvm_nvhe_sym(__icache_flags);
extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
extern bool kvm_nvhe_sym(smccc_trng_available);
extern bool kvm_nvhe_sym(__pkvm_modules_enabled);
#endif /* __ARM64_KVM_HYP_H__ */

View File

@ -116,6 +116,7 @@ alternative_cb_end
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/kvm_host.h>
#include <asm/kvm_pkvm_module.h>
void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
@ -166,7 +167,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
@ -187,8 +188,13 @@ static inline void *__kvm_vector_slot2addr(void *base,
struct kvm;
#define kvm_flush_dcache_to_poc(a,l) \
dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
#define kvm_flush_dcache_to_poc(a, l) do { \
unsigned long __a = (unsigned long)(a); \
unsigned long __l = (unsigned long)(l); \
\
if (__l) \
dcache_clean_inval_poc(__a, __a + __l); \
} while (0)
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{

View File

@ -42,6 +42,38 @@ typedef u64 kvm_pte_t;
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
#define KVM_PHYS_INVALID (-1ULL)
#define KVM_PTE_TYPE BIT(1)
#define KVM_PTE_TYPE_BLOCK 0
#define KVM_PTE_TYPE_PAGE 1
#define KVM_PTE_TYPE_TABLE 1
#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1
#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51)
#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
static inline bool kvm_pte_valid(kvm_pte_t pte)
{
return pte & KVM_PTE_VALID;
@ -57,6 +89,18 @@ static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
return pa;
}
static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
{
kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
if (PAGE_SHIFT == 16) {
pa &= GENMASK(51, 48);
pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
}
return pte;
}
static inline u64 kvm_granule_shift(u32 level)
{
/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
@ -73,6 +117,17 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
}
static inline bool kvm_pte_table(kvm_pte_t pte, u32 level)
{
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
return false;
if (!kvm_pte_valid(pte))
return false;
return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
}
/**
* struct kvm_pgtable_mm_ops - Memory management callbacks.
* @zalloc_page: Allocate a single zeroed memory page.
@ -129,6 +184,7 @@ enum kvm_pgtable_stage2_flags {
* @KVM_PGTABLE_PROT_W: Write permission.
* @KVM_PGTABLE_PROT_R: Read permission.
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
* @KVM_PGTABLE_PROT_NC: Normal non-cacheable attributes.
* @KVM_PGTABLE_PROT_SW0: Software bit 0.
* @KVM_PGTABLE_PROT_SW1: Software bit 1.
* @KVM_PGTABLE_PROT_SW2: Software bit 2.
@ -140,6 +196,7 @@ enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_R = BIT(2),
KVM_PGTABLE_PROT_DEVICE = BIT(3),
KVM_PGTABLE_PROT_NC = BIT(4),
KVM_PGTABLE_PROT_SW0 = BIT(55),
KVM_PGTABLE_PROT_SW1 = BIT(56),
@ -161,6 +218,22 @@ enum kvm_pgtable_prot {
typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
enum kvm_pgtable_prot prot);
typedef bool (*kvm_pgtable_pte_is_counted_cb_t)(kvm_pte_t pte, u32 level);
/**
* struct kvm_pgtable_pte_ops - PTE callbacks.
* @force_pte_cb: Force the mapping granularity to pages and
* return true if we support this instead of
* block mappings.
* @pte_is_counted_cb Verify the attributes of the @pte argument
* and return true if the descriptor needs to be
* refcounted, otherwise return false.
*/
struct kvm_pgtable_pte_ops {
kvm_pgtable_force_pte_cb_t force_pte_cb;
kvm_pgtable_pte_is_counted_cb_t pte_is_counted_cb;
};
/**
* struct kvm_pgtable - KVM page-table.
* @ia_bits: Maximum input address size, in bits.
@ -169,8 +242,7 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
* @mm_ops: Memory management callbacks.
* @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
* @flags: Stage-2 page-table flags.
* @force_pte_cb: Function that returns true if page level mappings must
* be used instead of block mappings.
* @pte_ops: PTE callbacks.
*/
struct kvm_pgtable {
u32 ia_bits;
@ -181,7 +253,7 @@ struct kvm_pgtable {
/* Stage-2 only */
struct kvm_s2_mmu *mmu;
enum kvm_pgtable_stage2_flags flags;
kvm_pgtable_force_pte_cb_t force_pte_cb;
struct kvm_pgtable_pte_ops *pte_ops;
};
/**
@ -296,24 +368,31 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
*/
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
/**
* kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
* @vtcr: Content of the VTCR register.
*
* Return: the size (in bytes) of the stage-2 PGD
*/
size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
/**
* __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
* @pgt: Uninitialised page-table structure to initialise.
* @mmu: S2 MMU context for this S2 translation
* @mm_ops: Memory management callbacks.
* @flags: Stage-2 configuration flags.
* @force_pte_cb: Function that returns true if page level mappings must
* be used instead of block mappings.
* @pte_ops: PTE callbacks.
*
* Return: 0 on success, negative error code on failure.
*/
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb);
struct kvm_pgtable_pte_ops *pte_ops);
#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
__kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops, pte_ops) \
__kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, pte_ops)
/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
@ -357,14 +436,16 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
void *mc);
/**
* kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
* track ownership.
* kvm_pgtable_stage2_annotate() - Unmap and annotate pages in the IPA space
* to track ownership (and more).
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Base intermediate physical address to annotate.
* @size: Size of the annotated range.
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
* page-table pages.
* @owner_id: Unique identifier for the owner of the page.
* @annotation: A 63 bit value that will be stored in the page tables.
* @annotation[0] must be 0, and @annotation[63:1] is stored
* in the page tables.
*
* By default, all page-tables are owned by identifier 0. This function can be
* used to mark portions of the IPA space as owned by other entities. When a
@ -373,8 +454,8 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
void *mc, u8 owner_id);
int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size,
void *mc, kvm_pte_t annotation);
/**
* kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.

View File

@ -2,18 +2,298 @@
/*
* Copyright (C) 2020 - Google LLC
* Author: Quentin Perret <qperret@google.com>
* Author: Fuad Tabba <tabba@google.com>
*/
#ifndef __ARM64_KVM_PKVM_H__
#define __ARM64_KVM_PKVM_H__
#include <linux/arm_ffa.h>
#include <linux/memblock.h>
#include <linux/scatterlist.h>
#include <asm/kvm_pgtable.h>
#include <asm/sysreg.h>
/* Maximum number of VMs that can co-exist under pKVM. */
#define KVM_MAX_PVMS 255
#define HYP_MEMBLOCK_REGIONS 128
#define PVMFW_INVALID_LOAD_ADDR (-1)
int pkvm_vm_ioctl_enable_cap(struct kvm *kvm,struct kvm_enable_cap *cap);
int pkvm_init_host_vm(struct kvm *kvm, unsigned long type);
int pkvm_create_hyp_vm(struct kvm *kvm);
void pkvm_destroy_hyp_vm(struct kvm *kvm);
void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa);
/*
* Definitions for features to be allowed or restricted for guest virtual
* machines, depending on the mode KVM is running in and on the type of guest
* that is running.
*
* The ALLOW masks represent a bitmask of feature fields that are allowed
* without any restrictions as long as they are supported by the system.
*
* The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
* features that are restricted to support at most the specified feature.
*
* If a feature field is not present in either, than it is not supported.
*
* The approach taken for protected VMs is to allow features that are:
* - Needed by common Linux distributions (e.g., floating point)
* - Trivial to support, e.g., supporting the feature does not introduce or
* require tracking of additional state in KVM
* - Cannot be trapped or prevent the guest from using anyway
*/
/*
* Allow for protected VMs:
* - Floating-point and Advanced SIMD
* - GICv3(+) system register interface
* - Data Independent Timing
*/
#define PVM_ID_AA64PFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) \
)
/*
* Restrict to the following *unsigned* features for protected VMs:
* - AArch64 guests only (no support for AArch32 guests):
* AArch32 adds complexity in trap handling, emulation, condition codes,
* etc...
* - RAS (v1)
* Supported by KVM
*/
#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \
)
/*
* Allow for protected VMs:
* - Branch Target Identification
* - Speculative Store Bypassing
*/
#define PVM_ID_AA64PFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
)
/*
* Allow for protected VMs:
* - Mixed-endian
* - Distinction between Secure and Non-secure Memory
* - Mixed-endian at EL0 only
* - Non-context synchronizing exception entry and exit
*/
#define PVM_ID_AA64MMFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
)
/*
* Restrict to the following *unsigned* features for protected VMs:
* - 40-bit IPA
* - 16-bit ASID
*/
#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
)
/*
* Allow for protected VMs:
* - Hardware translation table updates to Access flag and Dirty state
* - Number of VMID bits from CPU
* - Hierarchical Permission Disables
* - Privileged Access Never
* - SError interrupt exceptions from speculative reads
* - Enhanced Translation Synchronization
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
)
/*
* Allow for protected VMs:
* - Common not Private translations
* - User Access Override
* - IESB bit in the SCTLR_ELx registers
* - Unaligned single-copy atomicity and atomic functions
* - ESR_ELx.EC value on an exception by read access to feature ID space
* - TTL field in address operations.
* - Break-before-make sequences when changing translation block size
* - E0PDx mechanism
*/
#define PVM_ID_AA64MMFR2_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
)
/*
* No support for Scalable Vectors for protected VMs:
* Requires additional support from KVM, e.g., context-switching and
* trapping at EL2
*/
#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
/*
* No support for debug, including breakpoints, and watchpoints for protected
* VMs:
* The Arm architecture mandates support for at least the Armv8 debug
* architecture, which would include at least 2 hardware breakpoints and
* watchpoints. Providing that support to protected guests adds
* considerable state and complexity. Therefore, the reserved value of 0 is
* used for debug-related fields.
*/
#define PVM_ID_AA64DFR0_ALLOW (0ULL)
#define PVM_ID_AA64DFR1_ALLOW (0ULL)
/*
* No support for implementation defined features.
*/
#define PVM_ID_AA64AFR0_ALLOW (0ULL)
#define PVM_ID_AA64AFR1_ALLOW (0ULL)
/*
* No restrictions on instructions implemented in AArch64.
*/
#define PVM_ID_AA64ISAR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
)
#define PVM_ID_AA64ISAR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
)
#define PVM_ID_AA64ISAR2_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \
)
/*
* Returns the maximum number of breakpoints supported for protected VMs.
*/
static inline int pkvm_get_max_brps(void)
{
int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs),
PVM_ID_AA64DFR0_ALLOW);
/*
* If breakpoints are supported, the maximum number is 1 + the field.
* Otherwise, return 0, which is not compliant with the architecture,
* but is reserved and is used here to indicate no debug support.
*/
return num ? num + 1 : 0;
}
/*
* Returns the maximum number of watchpoints supported for protected VMs.
*/
static inline int pkvm_get_max_wrps(void)
{
int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs),
PVM_ID_AA64DFR0_ALLOW);
return num ? num + 1 : 0;
}
enum pkvm_moveable_reg_type {
PKVM_MREG_MEMORY,
PKVM_MREG_PROTECTED_RANGE,
};
struct pkvm_moveable_reg {
phys_addr_t start;
u64 size;
enum pkvm_moveable_reg_type type;
};
#define PKVM_NR_MOVEABLE_REGS 512
extern struct pkvm_moveable_reg kvm_nvhe_sym(pkvm_moveable_regs)[];
extern unsigned int kvm_nvhe_sym(pkvm_moveable_regs_nr);
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
extern phys_addr_t kvm_nvhe_sym(pvmfw_base);
extern phys_addr_t kvm_nvhe_sym(pvmfw_size);
static inline unsigned long
hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
{
unsigned long nr_pages = reg->size >> PAGE_SHIFT;
unsigned long start, end;
start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
end = start + nr_pages * vmemmap_entry_size;
start = ALIGN_DOWN(start, PAGE_SIZE);
end = ALIGN(end, PAGE_SIZE);
return end - start;
}
static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
{
unsigned long res = 0, i;
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
vmemmap_entry_size);
}
return res >> PAGE_SHIFT;
}
static inline unsigned long hyp_vm_table_pages(void)
{
return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
}
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
{
unsigned long total = 0, i;
@ -27,27 +307,28 @@ static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
return total;
}
static inline unsigned long __hyp_pgtable_total_pages(void)
static inline unsigned long __hyp_pgtable_moveable_regs_pages(void)
{
unsigned long res = 0, i;
/* Cover all of memory with page-granularity */
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
/* Cover all of moveable regions with page-granularity */
for (i = 0; i < kvm_nvhe_sym(pkvm_moveable_regs_nr); i++) {
struct pkvm_moveable_reg *reg = &kvm_nvhe_sym(pkvm_moveable_regs)[i];
res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
}
return res;
}
#define __PKVM_PRIVATE_SZ SZ_1G
static inline unsigned long hyp_s1_pgtable_pages(void)
{
unsigned long res;
res = __hyp_pgtable_total_pages();
res = __hyp_pgtable_moveable_regs_pages();
/* Allow 1 GiB for private mappings */
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
res += __hyp_pgtable_max_pages(__PKVM_PRIVATE_SZ >> PAGE_SHIFT);
return res;
}
@ -60,12 +341,48 @@ static inline unsigned long host_s2_pgtable_pages(void)
* Include an extra 16 pages to safely upper-bound the worst case of
* concatenated pgds.
*/
res = __hyp_pgtable_total_pages() + 16;
res = __hyp_pgtable_moveable_regs_pages() + 16;
/* Allow 1 GiB for MMIO mappings */
/* Allow 1 GiB for non-moveable regions */
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
return res;
}
#define KVM_FFA_MBOX_NR_PAGES 1
/*
* Maximum number of consitutents allowed in a descriptor. This number is
* arbitrary, see comment below on SG_MAX_SEGMENTS in hyp_ffa_proxy_pages().
*/
#define KVM_FFA_MAX_NR_CONSTITUENTS 4096
static inline unsigned long hyp_ffa_proxy_pages(void)
{
size_t desc_max;
/*
* SG_MAX_SEGMENTS is supposed to bound the number of elements in an
* sglist, which should match the number of consituents in the
* corresponding FFA descriptor. As such, the EL2 buffer needs to be
* large enough to hold a descriptor with SG_MAX_SEGMENTS consituents
* at least. But the kernel's DMA code doesn't enforce the limit, and
* it is sometimes abused, so let's allow larger descriptors and hope
* for the best.
*/
BUILD_BUG_ON(KVM_FFA_MAX_NR_CONSTITUENTS < SG_MAX_SEGMENTS);
/*
* The hypervisor FFA proxy needs enough memory to buffer a fragmented
* descriptor returned from EL3 in response to a RETRIEVE_REQ call.
*/
desc_max = sizeof(struct ffa_mem_region) +
sizeof(struct ffa_mem_region_attributes) +
sizeof(struct ffa_composite_mem_region) +
KVM_FFA_MAX_NR_CONSTITUENTS * sizeof(struct ffa_mem_region_addr_range);
/* Plus a page each for the hypervisor's RX and TX mailboxes. */
return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
}
#endif /* __ARM64_KVM_PKVM_H__ */

View File

@ -0,0 +1,91 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ARM64_KVM_PKVM_MODULE_H__
#define __ARM64_KVM_PKVM_MODULE_H__
#include <asm/kvm_pgtable.h>
#include <linux/export.h>
typedef void (*dyn_hcall_t)(struct kvm_cpu_context *);
enum pkvm_psci_notification {
PKVM_PSCI_CPU_SUSPEND,
PKVM_PSCI_SYSTEM_SUSPEND,
PKVM_PSCI_CPU_ENTRY,
};
#ifdef CONFIG_MODULES
struct pkvm_module_ops {
int (*create_private_mapping)(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot,
unsigned long *haddr);
int (*register_serial_driver)(void (*hyp_putc_cb)(char));
void (*puts)(const char *str);
void (*putx64)(u64 num);
void *(*fixmap_map)(phys_addr_t phys);
void (*fixmap_unmap)(void);
void *(*linear_map_early)(phys_addr_t phys, size_t size, enum kvm_pgtable_prot prot);
void (*linear_unmap_early)(void *addr, size_t size);
void (*flush_dcache_to_poc)(void *addr, size_t size);
int (*register_host_perm_fault_handler)(int (*cb)(struct kvm_cpu_context *ctxt, u64 esr, u64 addr));
int (*protect_host_page)(u64 pfn, enum kvm_pgtable_prot prot);
int (*register_host_smc_handler)(bool (*cb)(struct kvm_cpu_context *));
int (*register_default_trap_handler)(bool (*cb)(struct kvm_cpu_context *));
int (*register_illegal_abt_notifier)(void (*cb)(struct kvm_cpu_context *));
int (*register_psci_notifier)(void (*cb)(enum pkvm_psci_notification, struct kvm_cpu_context *));
int (*register_hyp_panic_notifier)(void (*cb)(struct kvm_cpu_context *host_ctxt));
};
int __pkvm_load_el2_module(struct module *this, unsigned long *token);
int __pkvm_register_el2_call(unsigned long hfn_hyp_va);
#else
static inline int __pkvm_load_el2_module(struct module *this,
unsigned long *token)
{
return -ENOSYS;
}
static inline int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
{
return -ENOSYS;
}
#endif /* CONFIG_MODULES */
#ifdef MODULE
/*
* Convert an EL2 module addr from the kernel VA to the hyp VA
*/
#define pkvm_el2_mod_va(kern_va, token) \
({ \
unsigned long hyp_text_kern_va = \
(unsigned long)THIS_MODULE->arch.hyp.text.start; \
unsigned long offset; \
\
offset = (unsigned long)kern_va - hyp_text_kern_va; \
token + offset; \
})
#define pkvm_load_el2_module(init_fn, token) \
({ \
THIS_MODULE->arch.hyp.init = init_fn; \
__pkvm_load_el2_module(THIS_MODULE, token); \
})
#define pkvm_register_el2_mod_call(hfn, token) \
({ \
__pkvm_register_el2_call(pkvm_el2_mod_va(hfn, token)); \
})
#define pkvm_el2_mod_call(id, ...) \
({ \
struct arm_smccc_res res; \
\
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_ID(id), \
##__VA_ARGS__, &res); \
WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \
\
res.a1; \
})
#endif
#endif

View File

@ -0,0 +1,436 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2021 - Google LLC
* Author: David Brazdil <dbrazdil@google.com>
*/
#ifndef __ARM64_KVM_S2MPU_H__
#define __ARM64_KVM_S2MPU_H__
#include <linux/bitfield.h>
#include <asm/kvm_mmu.h>
#define S2MPU_MMIO_SIZE SZ_64K
#define SYSMMU_SYNC_MMIO_SIZE SZ_64K
#define SYSMMU_SYNC_S2_OFFSET SZ_32K
#define SYSMMU_SYNC_S2_MMIO_SIZE (SYSMMU_SYNC_MMIO_SIZE - \
SYSMMU_SYNC_S2_OFFSET)
#define NR_VIDS 8
#define NR_CTX_IDS 8
#define ALL_VIDS_BITMAP GENMASK(NR_VIDS - 1, 0)
/*
* S2MPU V9 specific values (some new and some different from old versions)
* to avoid any confusion all names are prefixed with V9.
*/
#define REG_NS_V9_CTRL_PROT_EN_PER_VID_SET 0x50
#define REG_NS_V9_CTRL_ERR_RESP_T_PER_VID_SET 0x70
#define REG_NS_V9_CFG_MPTW_ATTRIBUTE 0x10
#define REG_NS_V9_READ_MPTC 0x3014
#define REG_NS_V9_READ_MPTC_TAG_PPN 0x3018
#define REG_NS_V9_READ_MPTC_TAG_OTHERS 0x301C
#define REG_NS_V9_READ_MPTC_DATA 0x3020
#define REG_NS_V9_READ_PTLB 0x3030
#define REG_NS_V9_READ_PTLB_TAG 0x3034
#define REG_NS_V9_READ_PTLB_DATA_S1_EN_PPN_AP 0x3040
#define REG_NS_V9_READ_PTLB_DATA_S1_DIS_AP_LIST 0x3044
#define REG_NS_V9_PMMU_INDICATOR 0x3050
#define REG_NS_V9_PMMU_INFO 0x3100
#define REG_NS_V9_PMMU_PTLB_INFO(n) (0x3400 + (n)*0x4)
#define REG_NS_V9_SWALKER_INFO 0x3104
#define REG_NS_V9_MPTC_INFO 0x3C00
/* V9 Masks */
#define V9_READ_MPTC_TAG_PPN_VALID_MASK BIT(28)
#define V9_READ_MPTC_TAG_PPN_TPN_PPN_MASK GENMASK(23, 0)
#define V9_READ_MPTC_TAG_PPN_MASK (V9_READ_MPTC_TAG_PPN_VALID_MASK | \
V9_READ_MPTC_TAG_PPN_TPN_PPN_MASK)
#define V9_READ_MPTC_TAG_OTHERS_VID_MASK GENMASK(10, 8)
#define V9_READ_MPTC_TAG_OTHERS_PAGE_GRAN_MASK GENMASK(5, 4)
#define V9_READ_MPTC_TAG_OTHERS_MASK (V9_READ_MPTC_TAG_OTHERS_VID_MASK | \
V9_READ_MPTC_TAG_OTHERS_PAGE_GRAN_MASK)
#define V9_READ_PTLB_WAY_MASK GENMASK(31, 24)
#define V9_READ_PTLB_SET_MASK GENMASK(23, 16)
#define V9_READ_PTLB_PTLB_MASK GENMASK(15, 4)
#define V9_READ_PTLB_PMMU_MASK GENMASK(3, 0)
#define V9_READ_PTLB_MASK (V9_READ_PTLB_WAY_MASK | V9_READ_PTLB_SET_MASK | \
V9_READ_PTLB_PTLB_MASK | V9_READ_PTLB_PMMU_MASK)
#define V9_READ_PTLB_TAG_VALID_MASK BIT(31)
#define V9_READ_PTLB_TAG_PAGE_SIZE_MASK GENMASK(30, 28)
#define V9_READ_PTLB_TAG_STAGE1_ENABLED_MASK BIT(27)
#define V9_READ_PTLB_TAG_VID_MASK GENMASK(26, 24)
#define V9_READ_PTLB_TAG_TPN_MASK GENMASK(23, 0)
#define V9_READ_PTLB_TAG_MASK (V9_READ_PTLB_TAG_VALID_MASK | \
V9_READ_PTLB_TAG_TPN_MASK | \
V9_READ_PTLB_TAG_VID_MASK | \
V9_READ_PTLB_TAG_PAGE_SIZE_MASK | \
V9_READ_PTLB_TAG_STAGE1_ENABLED_MASK)
#define V9_READ_PTLB_DTA_S1_EN_PPN_AP_S2AP_MASK GENMASK(25, 24)
#define V9_READ_PTLB_DTA_S1_EN_PPN_AP_PPN_MASK GENMASK(23, 0)
#define V9_READ_PTLB_DATA_S1_ENABLE_PPN_AP_MASK (V9_READ_PTLB_DTA_S1_EN_PPN_AP_S2AP_MASK | \
V9_READ_PTLB_DTA_S1_EN_PPN_AP_PPN_MASK)
#define V9_READ_MPTC_INFO_NUM_MPTC_SET GENMASK(31, 16)
#define V9_READ_MPTC_INFO_NUM_MPTC_WAY GENMASK(15, 12)
#define V9_READ_MPTC_INFO_MASK (V9_READ_MPTC_INFO_NUM_MPTC_SET | \
V9_READ_MPTC_INFO_NUM_MPTC_SET)
#define V9_READ_PMMU_INFO_NUM_PTLB GENMASK(15, 1)
#define V9_READ_PMMU_INFO_VA_WIDTH BIT(0)
#define V9_READ_PMMU_INFO_NUM_STREAM_TABLE GENMASK(31, 16)
#define V9_READ_PMMU_INFO_MASK (V9_READ_PMMU_INFO_NUM_PTLB | \
V9_READ_PMMU_INFO_VA_WIDTH | \
V9_READ_PMMU_INFO_NUM_STREAM_TABLE)
#define V9_READ_PMMU_PTLB_INFO_NUM_WAY GENMASK(31, 16)
#define V9_READ_PMMU_PTLB_INFO_NUM_SET GENMASK(15, 0)
#define V9_READ_PMMU_PTLB_INFO_MASK (V9_READ_PMMU_PTLB_INFO_NUM_WAY | \
V9_READ_PMMU_PTLB_INFO_NUM_SET)
#define V9_READ_PMMU_INDICATOR_PMMU_NUM GENMASK(3, 0)
#define V9_READ_PMMU_INDICATOR_MASK V9_READ_PMMU_INDICATOR_PMMU_NUM
#define V9_READ_MPTC_WAY_MASK GENMASK(17, 16)
#define V9_READ_MPTC_SET_MASK GENMASK(15, 0)
#define V9_READ_MPTC_MASK (V9_READ_MPTC_WAY_MASK | \
V9_READ_MPTC_SET_MASK)
#define V9_READ_MPTC_WAY(way) FIELD_PREP(V9_READ_MPTC_WAY_MASK, (way))
#define V9_READ_MPTC_SET(set) FIELD_PREP(V9_READ_MPTC_SET_MASK, (set))
#define V9_READ_MPTC(set, way) (V9_READ_MPTC_SET(set) | V9_READ_MPTC_WAY(way))
#define V9_READ_PTLB_WAY(x) FIELD_PREP(V9_READ_PTLB_WAY_MASK, (x))
#define V9_READ_PTLB_SET(x) FIELD_PREP(V9_READ_PTLB_SET_MASK, (x))
#define V9_READ_PTLB_PTLB(x) FIELD_PREP(V9_READ_PTLB_PTLB_MASK, (x))
#define V9_READ_PTLB_PMMU(x) FIELD_PREP(V9_READ_PTLB_PMMU_MASK, (x))
#define V9_READ_PTLB(pu_i, pb_i, s, w) (V9_READ_PTLB_WAY(w) | V9_READ_PTLB_SET(s) | \
V9_READ_PTLB_PTLB(pb_i) | V9_READ_PTLB_PMMU(pu_i))
#define V9_READ_SLTB_INFO_SET_MASK GENMASK(15, 0)
#define V9_READ_SLTB_INFO_WAY_MASK GENMASK(31, 16)
#define V9_READ_SLTB_INFO_MASK (V9_READ_SLTB_INFO_SET_MASK | \
V9_READ_SLTB_INFO_WAY_MASK)
#define V9_SWALKER_INFO_NUM_STLB_MASK GENMASK(31, 16)
#define V9_SWALKER_INFO_NUM_PMMU_MASK GENMASK(15, 0)
#define V9_SWALKER_INFO_MASK (V9_SWALKER_INFO_NUM_STLB_MASK | \
V9_SWALKER_INFO_NUM_PMMU_MASK)
/*
* STLB has 2 types: A,B based on how S2MPU is connected
* registers or masks that vary based on type are suffixed with
* either TYPEA or TYPEB.
*/
#define REG_NS_V9_READ_STLB 0x3000
#define REG_NS_V9_READ_STLB_TPN 0x3004
#define REG_NS_V9_READ_STLB_TAG_PPN 0x3008
#define REG_NS_V9_READ_STLB_TAG_OTHERS 0x300C
#define REG_NS_V9_READ_STLB_DATA 0x3010
#define REG_NS_V9_STLB_INFO(n) (0x3800 + (n)*0x4)
#define V9_READ_STLB_SET_MASK_TYPEA GENMASK(7, 0)
#define V9_READ_STLB_WAY_MASK_TYPEA GENMASK(15, 8)
#define V9_READ_STLB_SUBLINE_MASK_TYPEA GENMASK(31, 20)
#define V9_READ_STLB_STLBID_MASK_TYPEA GENMASK(17, 16)
#define V9_READ_STLB_MASK_TYPEA (V9_READ_STLB_SET_MASK_TYPEA | \
V9_READ_STLB_WAY_MASK_TYPEA | \
V9_READ_STLB_SUBLINE_MASK_TYPEA | \
V9_READ_STLB_STLBID_MASK_TYPEA)
#define V9_READ_STLB_SET_MASK_TYPEB GENMASK(15, 0)
#define V9_READ_STLB_WAY_MASK_TYPEB GENMASK(17, 16)
#define V9_READ_STLB_STLBID_MASK_TYPEB GENMASK(31, 20)
#define V9_READ_STLB_MASK_TYPEB (V9_READ_STLB_SET_MASK_TYPEB | \
V9_READ_STLB_WAY_MASK_TYPEB | \
V9_READ_STLB_STLBID_MASK_TYPEB)
#define V9_READ_STLB_TPN_TPN_MASK GENMASK(23, 0)
#define V9_READ_STLB_TPN_S2VALID_MASK BIT(24)
#define V9_READ_STLB_TPN_STAGE1_ENABLED_MASK BIT(27)
#define V9_READ_STLB_TPN_VALID_MASK BIT(28)
#define V9_READ_STLB_TPN_MASK (V9_READ_STLB_TPN_TPN_MASK | \
V9_READ_STLB_TPN_S2VALID_MASK | \
V9_READ_STLB_TPN_STAGE1_ENABLED_MASK | \
V9_READ_STLB_TPN_VALID_MASK)
#define V9_READ_STLB_TAG_PPN_VALID_MASK_TYPEB BIT(28)
#define V9_READ_STLB_TAG_PPN_PPN_MASK GENMASK(23, 0)
#define V9_READ_STLB_TAG_PPN_MASK (V9_READ_STLB_TAG_PPN_PPN_MASK | \
V9_READ_STLB_TAG_PPN_VALID_MASK_TYPEB)
#define V9_READ_STLB_TAG_OTHERS_S2AP_MASK_TYPEA GENMASK(1, 0)
#define V9_READ_STLB_TAG_OTHERS_PS_MASK GENMASK(10, 8)
#define V9_READ_STLB_TAG_OTHERS_BPS_MASK BIT(12)
#define V9_READ_STLB_TAG_OTHERS_VID_MASK GENMASK(23, 20)
#define V9_READ_STLB_TAG_OTHERS_MASK (V9_READ_STLB_TAG_OTHERS_S2AP_MASK_TYPEA | \
V9_READ_STLB_TAG_OTHERS_PS_MASK | \
V9_READ_STLB_TAG_OTHERS_BPS_MASK | \
V9_READ_STLB_TAG_OTHERS_VID_MASK)
#define V9_READ_STLB_WAY_TYPEA(x) FIELD_PREP(V9_READ_STLB_WAY_MASK_TYPEA, (x))
#define V9_READ_STLB_SET_TYPEA(x) FIELD_PREP(V9_READ_STLB_SET_MASK_TYPEA, (x))
#define V9_READ_STLB_STLBID_TYPEA(x) FIELD_PREP(V9_READ_STLB_STLBID_MASK_TYPEA, (x))
#define V9_READ_STLB_SUBLINE_TYPEA(x) FIELD_PREP(V9_READ_STLB_SUBLINE_MASK_TYPEA, (x))
#define V9_READ_STLB_TYPEA(s_i, sub, s, w) (V9_READ_STLB_WAY_TYPEA(w) | \
V9_READ_STLB_SET_TYPEA(s) | \
V9_READ_STLB_STLBID_TYPEA(s_i) | \
V9_READ_STLB_SUBLINE_TYPEA(sub))
#define V9_READ_STLB_WAY_TYPEB(x) FIELD_PREP(V9_READ_STLB_WAY_MASK_TYPEB, (x))
#define V9_READ_STLB_SET_TYPEB(x) FIELD_PREP(V9_READ_STLB_SET_MASK_TYPEB, (x))
#define V9_READ_STLB_STLBID_TYPEB(x) FIELD_PREP(V9_READ_STLB_STLBID_MASK_TYPEB, (x))
#define V9_READ_STLB_TYPEB(s_i, s, w) (V9_READ_STLB_WAY_TYPEB(w) | \
V9_READ_STLB_SET_TYPEB(s) | \
V9_READ_STLB_STLBID_TYPEB(s_i))
#define V9_MAX_PTLB_NUM 0x100
#define V9_MAX_STLB_NUM 0x100
#define V9_CTRL0_DIS_CHK_S1L1PTW_MASK BIT(0)
#define V9_CTRL0_DIS_CHK_S1L2PTW_MASK BIT(1)
#define V9_CTRL0_DIS_CHK_USR_MARCHED_REQ_MASK BIT(3)
#define V9_CTRL0_FAULT_MODE_MASK BIT(4)
#define V9_CTRL0_ENF_FLT_MODE_S1_NONSEC_MASK BIT(5)
#define V9_CTRL0_DESTRUCTIVE_AP_CHK_MODE_MASK BIT(6)
#define V9_CTRL0_MASK (V9_CTRL0_DIS_CHK_S1L1PTW_MASK | \
V9_CTRL0_DESTRUCTIVE_AP_CHK_MODE_MASK | \
V9_CTRL0_DIS_CHK_USR_MARCHED_REQ_MASK | \
V9_CTRL0_DIS_CHK_S1L2PTW_MASK | \
V9_CTRL0_ENF_FLT_MODE_S1_NONSEC_MASK | \
V9_CTRL0_FAULT_MODE_MASK)
/*
* S2MPU V9 specific values (some new and some different from old versions)
* to avoid any confusion all names are prefixed with V9.
*/
#define V9_L1ENTRY_ATTR_GRAN_MASK BIT(3)
#define V9_MPT_PROT_BITS 4
#define V9_MPT_ACCESS_SHIFT 2
/* V1,V2 variants. */
#define MPT_ACCESS_SHIFT 0
#define L1ENTRY_ATTR_GRAN_MASK GENMASK(5, 4)
#define MPT_PROT_BITS 2
#define REG_NS_CTRL0 0x0
#define REG_NS_CTRL1 0x4
#define REG_NS_CFG 0x10
#define REG_NS_INTERRUPT_ENABLE_PER_VID_SET 0x20
#define REG_NS_INTERRUPT_CLEAR 0x2c
#define REG_NS_VERSION 0x60
#define REG_NS_INFO 0x64
#define REG_NS_STATUS 0x68
#define REG_NS_NUM_CONTEXT 0x100
#define REG_NS_CONTEXT_CFG_VALID_VID 0x104
#define REG_NS_ALL_INVALIDATION 0x1000
#define REG_NS_RANGE_INVALIDATION 0x1020
#define REG_NS_RANGE_INVALIDATION_START_PPN 0x1024
#define REG_NS_RANGE_INVALIDATION_END_PPN 0x1028
#define REG_NS_FAULT_STATUS 0x2000
#define REG_NS_FAULT_PA_LOW(vid) (0x2004 + ((vid) * 0x20))
#define REG_NS_FAULT_PA_HIGH(vid) (0x2008 + ((vid) * 0x20))
#define REG_NS_FAULT_INFO(vid) (0x2010 + ((vid) * 0x20))
#define REG_NS_READ_MPTC 0x3000
#define REG_NS_READ_MPTC_TAG_PPN 0x3004
#define REG_NS_READ_MPTC_TAG_OTHERS 0x3008
#define REG_NS_READ_MPTC_DATA 0x3010
#define REG_NS_L1ENTRY_L2TABLE_ADDR(vid, gb) (0x4000 + ((vid) * 0x200) + ((gb) * 0x8))
#define REG_NS_L1ENTRY_ATTR(vid, gb) (0x4004 + ((vid) * 0x200) + ((gb) * 0x8))
#define CTRL0_ENABLE BIT(0)
#define CTRL0_INTERRUPT_ENABLE BIT(1)
#define CTRL0_FAULT_RESP_TYPE_SLVERR BIT(2) /* for v1 */
#define CTRL0_FAULT_RESP_TYPE_DECERR BIT(2) /* for v2 */
#define CTRL0_MASK (CTRL0_ENABLE | \
CTRL0_INTERRUPT_ENABLE | \
CTRL0_FAULT_RESP_TYPE_SLVERR | \
CTRL0_FAULT_RESP_TYPE_DECERR)
#define CTRL1_DISABLE_CHK_S1L1PTW BIT(0)
#define CTRL1_DISABLE_CHK_S1L2PTW BIT(1)
#define CTRL1_ENABLE_PAGE_SIZE_AWARENESS BIT(2)
#define CTRL1_DISABLE_CHK_USER_MATCHED_REQ BIT(3)
#define CTRL1_MASK (CTRL1_DISABLE_CHK_S1L1PTW | \
CTRL1_DISABLE_CHK_S1L2PTW | \
CTRL1_ENABLE_PAGE_SIZE_AWARENESS | \
CTRL1_DISABLE_CHK_USER_MATCHED_REQ)
#define CFG_MPTW_CACHE_OVERRIDE BIT(0)
#define CFG_MPTW_CACHE_VALUE GENMASK(7, 4)
#define CFG_MPTW_QOS_OVERRIDE BIT(8)
#define CFG_MPTW_QOS_VALUE GENMASK(15, 12)
#define CFG_MPTW_SHAREABLE BIT(16)
#define CFG_MASK (CFG_MPTW_CACHE_OVERRIDE | \
CFG_MPTW_CACHE_VALUE | \
CFG_MPTW_QOS_OVERRIDE | \
CFG_MPTW_QOS_VALUE | \
CFG_MPTW_SHAREABLE)
/* For use with hi_lo_readq_relaxed(). */
#define REG_NS_FAULT_PA_HIGH_LOW(vid) REG_NS_FAULT_PA_LOW(vid)
/* Mask used for extracting VID from FAULT_* register offset. */
#define REG_NS_FAULT_VID_MASK GENMASK(7, 5)
#define VERSION_MAJOR_ARCH_VER_MASK GENMASK(31, 28)
#define VERSION_MINOR_ARCH_VER_MASK GENMASK(27, 24)
#define VERSION_REV_ARCH_VER_MASK GENMASK(23, 16)
#define VERSION_RTL_VER_MASK GENMASK(7, 0)
/* Ignore RTL version in driver version check. */
#define VERSION_CHECK_MASK (VERSION_MAJOR_ARCH_VER_MASK | \
VERSION_MINOR_ARCH_VER_MASK | \
VERSION_REV_ARCH_VER_MASK)
#define INFO_NUM_SET_MASK GENMASK(15, 0)
#define STATUS_BUSY BIT(0)
#define STATUS_ON_INVALIDATING BIT(1)
#define NUM_CONTEXT_MASK GENMASK(3, 0)
#define CONTEXT_CFG_VALID_VID_CTX_VALID(ctx) BIT((4 * (ctx)) + 3)
#define CONTEXT_CFG_VALID_VID_CTX_VID(ctx, vid) \
FIELD_PREP(GENMASK((4 * (ctx) + 2), 4 * (ctx)), (vid))
#define INVALIDATION_INVALIDATE BIT(0)
#define RANGE_INVALIDATION_PPN_SHIFT 12
#define NR_FAULT_INFO_REGS 8
#define FAULT_INFO_VID_MASK GENMASK(26, 24)
#define FAULT_INFO_TYPE_MASK GENMASK(23, 21)
#define FAULT_INFO_TYPE_CONTEXT 0x4 /* v2 only */
#define FAULT_INFO_TYPE_AP 0x2
#define FAULT_INFO_TYPE_MPTW 0x1
#define FAULT_INFO_RW_BIT BIT(20)
#define FAULT_INFO_LEN_MASK GENMASK(19, 16)
#define FAULT_INFO_ID_MASK GENMASK(15, 0)
#define L1ENTRY_L2TABLE_ADDR_SHIFT 4
#define L1ENTRY_L2TABLE_ADDR(pa) ((pa) >> L1ENTRY_L2TABLE_ADDR_SHIFT)
#define READ_MPTC_WAY_MASK GENMASK(18, 16)
#define READ_MPTC_SET_MASK GENMASK(15, 0)
#define READ_MPTC_MASK (READ_MPTC_WAY_MASK | READ_MPTC_SET_MASK)
#define READ_MPTC_WAY(way) FIELD_PREP(READ_MPTC_WAY_MASK, (way))
#define READ_MPTC_SET(set) FIELD_PREP(READ_MPTC_SET_MASK, (set))
#define READ_MPTC(set, way) (READ_MPTC_SET(set) | READ_MPTC_WAY(way))
#define READ_MPTC_TAG_PPN_MASK GENMASK(23, 0)
#define READ_MPTC_TAG_OTHERS_VID_MASK GENMASK(10, 8)
#define READ_MPTC_TAG_OTHERS_GRAN_MASK GENMASK(5, 4)
#define READ_MPTC_TAG_OTHERS_VALID_BIT BIT(0)
#define READ_MPTC_TAG_OTHERS_MASK (READ_MPTC_TAG_OTHERS_VID_MASK | \
READ_MPTC_TAG_OTHERS_GRAN_MASK | \
READ_MPTC_TAG_OTHERS_VALID_BIT)
#define L1ENTRY_ATTR_L2TABLE_EN BIT(0)
#define L1ENTRY_ATTR_GRAN_4K 0x0
#define L1ENTRY_ATTR_GRAN_64K 0x1
#define L1ENTRY_ATTR_GRAN_2M 0x2
#define L1ENTRY_ATTR_GRAN(gran, msk) FIELD_PREP(msk, gran)
#define L1ENTRY_ATTR_PROT_MASK GENMASK(2, 1)
#define L1ENTRY_ATTR_PROT(prot) FIELD_PREP(L1ENTRY_ATTR_PROT_MASK, prot)
#define L1ENTRY_ATTR_1G(prot) L1ENTRY_ATTR_PROT(prot)
#define L1ENTRY_ATTR_L2(gran, msk) (L1ENTRY_ATTR_GRAN(gran, msk) | \
L1ENTRY_ATTR_L2TABLE_EN)
#define NR_GIGABYTES 64
#define RO_GIGABYTES_FIRST 4
#define RO_GIGABYTES_LAST 33
#define NR_RO_GIGABYTES (RO_GIGABYTES_LAST - RO_GIGABYTES_FIRST + 1)
#define NR_RW_GIGABYTES (NR_GIGABYTES - NR_RO_GIGABYTES)
#ifdef CONFIG_ARM64_64K_PAGES
#define SMPT_GRAN SZ_64K
#define SMPT_GRAN_ATTR L1ENTRY_ATTR_GRAN_64K
#else
#define SMPT_GRAN SZ_4K
#define SMPT_GRAN_ATTR L1ENTRY_ATTR_GRAN_4K
#endif
static_assert(SMPT_GRAN <= PAGE_SIZE);
#define SMPT_WORD_SIZE sizeof(u32)
#define SMPT_ELEMS_PER_BYTE(prot_bits) (BITS_PER_BYTE / (prot_bits))
#define SMPT_ELEMS_PER_WORD(prot_bits) (SMPT_WORD_SIZE * SMPT_ELEMS_PER_BYTE(prot_bits))
#define SMPT_WORD_BYTE_RANGE(prot_bits) (SMPT_GRAN * SMPT_ELEMS_PER_WORD(prot_bits))
#define SMPT_NUM_ELEMS (SZ_1G / SMPT_GRAN)
#define SMPT_SIZE(prot_bits) (SMPT_NUM_ELEMS / SMPT_ELEMS_PER_BYTE(prot_bits))
#define SMPT_NUM_WORDS(prot_bits) (SMPT_SIZE(prot_bits) / SMPT_WORD_SIZE)
#define SMPT_NUM_PAGES(prot_bits) (SMPT_SIZE(prot_bits) / PAGE_SIZE)
#define SMPT_ORDER(prot_bits) get_order(SMPT_SIZE(prot_bits))
#define SMPT_GRAN_MASK GENMASK(1, 0)
/* SysMMU_SYNC registers, relative to SYSMMU_SYNC_S2_OFFSET. */
#define REG_NS_SYNC_CMD 0x0
#define REG_NS_SYNC_COMP 0x4
#define SYNC_CMD_SYNC BIT(0)
#define SYNC_COMP_COMPLETE BIT(0)
/*
* Iterate over S2MPU gigabyte regions. Skip those that cannot be modified
* (the MMIO registers are read only, with reset value MPT_PROT_NONE).
*/
#define for_each_gb_in_range(i, first, last) \
for ((i) = (first); (i) <= (last) && (i) < NR_GIGABYTES; \
(i) = (((i) + 1 == RO_GIGABYTES_FIRST) ? RO_GIGABYTES_LAST : (i)) + 1)
#define for_each_gb(i) for_each_gb_in_range(i, 0, NR_GIGABYTES - 1)
#define for_each_vid(i) for ((i) = 0; (i) < NR_VIDS; (i)++)
#define for_each_gb_and_vid(gb, vid) for_each_vid((vid)) for_each_gb((gb))
enum s2mpu_version {
S2MPU_VERSION_1 = 0x11000000,
S2MPU_VERSION_2 = 0x20000000,
S2MPU_VERSION_9 = 0x90000000,
};
static inline int smpt_order_from_version(enum s2mpu_version version)
{
if (version == S2MPU_VERSION_9)
return SMPT_ORDER(V9_MPT_PROT_BITS);
else if ((version == S2MPU_VERSION_1) || (version == S2MPU_VERSION_2))
return SMPT_ORDER(MPT_PROT_BITS);
BUG();
}
enum mpt_prot {
MPT_PROT_NONE = 0,
MPT_PROT_R = BIT(0),
MPT_PROT_W = BIT(1),
MPT_PROT_RW = MPT_PROT_R | MPT_PROT_W,
MPT_PROT_MASK = MPT_PROT_RW,
};
enum mpt_update_flags {
MPT_UPDATE_L1 = BIT(0),
MPT_UPDATE_L2 = BIT(1),
};
struct fmpt {
u32 *smpt;
bool gran_1g;
enum mpt_prot prot;
enum mpt_update_flags flags;
};
struct mpt {
struct fmpt fmpt[NR_GIGABYTES];
enum s2mpu_version version;
};
#endif /* __ARM64_KVM_S2MPU_H__ */

View File

@ -4,7 +4,7 @@
#include <asm/atomic_ll_sc.h>
#ifdef CONFIG_ARM64_LSE_ATOMICS
#if defined(CONFIG_ARM64_LSE_ATOMICS) && !defined(BUILD_FIPS140_KO)
#define __LSE_PREAMBLE ".arch_extension lse\n"

View File

@ -0,0 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ASM_MEM_ENCRYPT_H
#define __ASM_MEM_ENCRYPT_H
bool mem_encrypt_active(void);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
#endif /* __ASM_MEM_ENCRYPT_H */

View File

@ -0,0 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2022 Google LLC
* Author: Keir Fraser <keirf@google.com>
*/
#ifndef __ASM_MEM_RELINQUISH_H
#define __ASM_MEM_RELINQUISH_H
struct page;
bool kvm_has_memrelinquish_services(void);
void page_relinquish(struct page *page);
#endif /* __ASM_MEM_RELINQUISH_H */

View File

@ -147,6 +147,7 @@
* Memory types for Stage-2 translation
*/
#define MT_S2_NORMAL 0xf
#define MT_S2_NORMAL_NC 0x5
#define MT_S2_DEVICE_nGnRE 0x1
/*
@ -154,6 +155,7 @@
* Stage-2 enforces Normal-WB and Device-nGnRE
*/
#define MT_S2_FWB_NORMAL 6
#define MT_S2_FWB_NORMAL_NC 5
#define MT_S2_FWB_DEVICE_nGnRE 1
#ifdef CONFIG_ARM64_4K_PAGES

View File

@ -14,12 +14,50 @@ struct mod_plt_sec {
int plt_max_entries;
};
struct mod_arch_specific {
struct mod_plt_sec core;
struct mod_plt_sec init;
/* for CONFIG_DYNAMIC_FTRACE */
#define ARM64_MODULE_PLTS_ARCHDATA \
struct mod_plt_sec core; \
struct mod_plt_sec init; \
\
/* for CONFIG_DYNAMIC_FTRACE */ \
struct plt_entry *ftrace_trampolines;
#else
#define ARM64_MODULE_PLTS_ARCHDATA
#endif
#ifdef CONFIG_KVM
struct pkvm_module_section {
void *start;
void *end;
};
typedef s32 kvm_nvhe_reloc_t;
struct pkvm_module_ops;
struct pkvm_el2_module {
struct pkvm_module_section text;
struct pkvm_module_section bss;
struct pkvm_module_section rodata;
struct pkvm_module_section data;
kvm_nvhe_reloc_t *relocs;
unsigned int nr_relocs;
int (*init)(const struct pkvm_module_ops *ops);
};
void kvm_apply_hyp_module_relocations(void *mod_start, void *hyp_va,
kvm_nvhe_reloc_t *begin,
kvm_nvhe_reloc_t *end);
#define ARM64_MODULE_KVM_ARCHDATA \
/* For pKVM hypervisor modules */ \
struct pkvm_el2_module hyp;
#else
#define ARM64_MODULE_KVM_ARCHDATA
#endif
#ifdef CONFIG_HAVE_MOD_ARCH_SPECIFIC
struct mod_arch_specific {
ARM64_MODULE_PLTS_ARCHDATA
ARM64_MODULE_KVM_ARCHDATA
};
#endif

View File

@ -1,3 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <asm/page-def.h>
SECTIONS {
#ifdef CONFIG_ARM64_MODULE_PLTS
.plt 0 : { BYTE(0) }
@ -17,4 +20,24 @@ SECTIONS {
*/
.text.hot : { *(.text.hot) }
#endif
#ifdef CONFIG_KVM
.hyp.text : ALIGN(PAGE_SIZE) {
*(.hyp.text)
. = ALIGN(PAGE_SIZE);
}
.hyp.bss : ALIGN(PAGE_SIZE) {
*(.hyp.bss)
. = ALIGN(PAGE_SIZE);
}
.hyp.rodata : ALIGN(PAGE_SIZE) {
*(.hyp.rodata)
. = ALIGN(PAGE_SIZE);
}
.hyp.data : ALIGN(PAGE_SIZE) {
*(.hyp.data)
. = ALIGN(PAGE_SIZE);
}
.hyp.reloc : ALIGN(4) { *(.hyp.reloc) }
#endif
}

View File

@ -6,6 +6,7 @@
int aarch64_insn_read(void *addr, u32 *insnp);
int aarch64_insn_write(void *addr, u32 insn);
int aarch64_addr_write(void *addr, u64 dst);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);

View File

@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[];
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
extern char __hyp_data_start[], __hyp_data_end[];
extern char __hyp_rodata_start[], __hyp_rodata_end[];
extern char __hyp_reloc_begin[], __hyp_reloc_end[];
extern char __hyp_bss_start[], __hyp_bss_end[];

View File

@ -10,6 +10,7 @@ bool can_set_direct_map(void);
int set_memory_valid(unsigned long addr, int numpages, int enable);
int arch_set_direct_map_range_uncached(unsigned long addr, unsigned long numpages);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
bool kernel_page_present(struct page *page);

View File

@ -35,9 +35,7 @@ static __must_check inline bool may_use_simd(void)
* migrated, and if it's clear we cannot be migrated to a CPU
* where it is set.
*/
return !WARN_ON(!system_capabilities_finalized()) &&
system_supports_fpsimd() &&
!in_hardirq() && !irqs_disabled() && !in_nmi() &&
return !in_hardirq() && !irqs_disabled() && !in_nmi() &&
!this_cpu_read(fpsimd_context_busy);
}

View File

@ -81,6 +81,12 @@ void __hyp_reset_vectors(void);
DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
static inline bool is_pkvm_initialized(void)
{
return IS_ENABLED(CONFIG_KVM) &&
static_branch_likely(&kvm_protected_mode_initialized);
}
/* Reports the availability of HYP mode */
static inline bool is_hyp_mode_available(void)
{
@ -88,8 +94,7 @@ static inline bool is_hyp_mode_available(void)
* If KVM protected mode is initialized, all CPUs must have been booted
* in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
*/
if (IS_ENABLED(CONFIG_KVM) &&
static_branch_likely(&kvm_protected_mode_initialized))
if (is_pkvm_initialized())
return true;
return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
@ -103,8 +108,7 @@ static inline bool is_hyp_mode_mismatched(void)
* If KVM protected mode is initialized, all CPUs must have been booted
* in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
*/
if (IS_ENABLED(CONFIG_KVM) &&
static_branch_likely(&kvm_protected_mode_initialized))
if (is_pkvm_initialized())
return false;
return __boot_cpu_mode[0] != __boot_cpu_mode[1];

View File

@ -457,6 +457,15 @@ enum {
#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
/* Protected KVM */
#define KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA 0
#define KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO 1
struct kvm_protected_vm_info {
__u64 firmware_size;
__u64 __reserved[7];
};
/* arm64-specific kvm_run::system_event flags */
/*
* Reset caused by a PSCI v1.1 SYSTEM_RESET2 call.

View File

@ -287,8 +287,11 @@ static __init void parse_cmdline(void)
{
const u8 *prop = get_bootargs_cmdline();
if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
IS_ENABLED(CONFIG_CMDLINE_FORCE) ||
!prop) {
__parse_cmdline(CONFIG_CMDLINE, true);
}
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
__parse_cmdline(prop, true);

View File

@ -71,12 +71,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
/* Vectors installed by hyp-init on reset HVC. */
KVM_NVHE_ALIAS(__hyp_stub_vectors);
/* Kernel symbol used by icache_is_vpipt(). */
KVM_NVHE_ALIAS(__icache_flags);
/* VMID bits set by the KVM VMID allocator */
KVM_NVHE_ALIAS(kvm_arm_vmid_bits);
/* Static keys which are set if a vGIC trap should be handled in hyp. */
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
@ -92,9 +86,6 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities);
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
/* Array containing bases of nVHE per-CPU memory regions. */
KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
/* PMU available static key */
#ifdef CONFIG_HW_PERF_EVENTS
KVM_NVHE_ALIAS(kvm_arm_pmu_available);
@ -111,12 +102,6 @@ KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy);
KVM_NVHE_ALIAS_HYP(__memset, __pi_memset);
#endif
/* Kernel memory sections */
KVM_NVHE_ALIAS(__start_rodata);
KVM_NVHE_ALIAS(__end_rodata);
KVM_NVHE_ALIAS(__bss_start);
KVM_NVHE_ALIAS(__bss_stop);
/* Hyp memory sections */
KVM_NVHE_ALIAS(__hyp_idmap_text_start);
KVM_NVHE_ALIAS(__hyp_idmap_text_end);
@ -124,6 +109,8 @@ KVM_NVHE_ALIAS(__hyp_text_start);
KVM_NVHE_ALIAS(__hyp_text_end);
KVM_NVHE_ALIAS(__hyp_bss_start);
KVM_NVHE_ALIAS(__hyp_bss_end);
KVM_NVHE_ALIAS(__hyp_data_start);
KVM_NVHE_ALIAS(__hyp_data_end);
KVM_NVHE_ALIAS(__hyp_rodata_start);
KVM_NVHE_ALIAS(__hyp_rodata_end);

View File

@ -505,14 +505,76 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
return 0;
}
static int module_init_hyp(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
struct module *mod)
{
#ifdef CONFIG_KVM
const Elf_Shdr *s;
/*
* If the .hyp.text is missing or empty, this is not a hypervisor
* module so ignore the rest of it.
*/
s = find_section(hdr, sechdrs, ".hyp.text");
if (!s || !s->sh_size)
return 0;
mod->arch.hyp.text = (struct pkvm_module_section) {
.start = (void *)s->sh_addr,
.end = (void *)s->sh_addr + s->sh_size,
};
s = find_section(hdr, sechdrs, ".hyp.bss");
if (!s)
return -ENOEXEC;
mod->arch.hyp.bss = (struct pkvm_module_section) {
.start = (void *)s->sh_addr,
.end = (void *)s->sh_addr + s->sh_size,
};
s = find_section(hdr, sechdrs, ".hyp.rodata");
if (!s)
return -ENOEXEC;
mod->arch.hyp.rodata = (struct pkvm_module_section) {
.start = (void *)s->sh_addr,
.end = (void *)s->sh_addr + s->sh_size,
};
s = find_section(hdr, sechdrs, ".hyp.data");
if (!s)
return -ENOEXEC;
mod->arch.hyp.data = (struct pkvm_module_section) {
.start = (void *)s->sh_addr,
.end = (void *)s->sh_addr + s->sh_size,
};
s = find_section(hdr, sechdrs, ".hyp.reloc");
if (!s)
return -ENOEXEC;
mod->arch.hyp.relocs = (void *)s->sh_addr;
mod->arch.hyp.nr_relocs = s->sh_size / sizeof(*mod->arch.hyp.relocs);
#endif
return 0;
}
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
int err;
const Elf_Shdr *s;
s = find_section(hdr, sechdrs, ".altinstructions");
if (s)
apply_alternatives_module((void *)s->sh_addr, s->sh_size);
return module_init_ftrace_plt(hdr, sechdrs, me);
err = module_init_ftrace_plt(hdr, sechdrs, me);
if (err)
return err;
return module_init_hyp(hdr, sechdrs, me);
}

View File

@ -66,16 +66,16 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
return ret;
}
static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
static int __kprobes __aarch64_text_write(void *dst, void *src, size_t size)
{
void *waddr = addr;
unsigned long flags = 0;
unsigned long flags;
void *waddr;
int ret;
raw_spin_lock_irqsave(&patch_lock, flags);
waddr = patch_map(addr, FIX_TEXT_POKE0);
waddr = patch_map(dst, FIX_TEXT_POKE0);
ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
ret = copy_to_kernel_nofault(waddr, src, size);
patch_unmap(FIX_TEXT_POKE0);
raw_spin_unlock_irqrestore(&patch_lock, flags);
@ -85,7 +85,14 @@ static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
int __kprobes aarch64_insn_write(void *addr, u32 insn)
{
return __aarch64_insn_write(addr, cpu_to_le32(insn));
__le32 __insn = cpu_to_le32(insn);
return __aarch64_text_write(addr, &__insn, AARCH64_INSN_SIZE);
}
int __kprobes aarch64_addr_write(void *addr, u64 dst)
{
return __aarch64_text_write(addr, &dst, sizeof(dst));
}
int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)

View File

@ -40,6 +40,7 @@
#include <asm/elf.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/hypervisor.h>
#include <asm/kasan.h>
#include <asm/numa.h>
#include <asm/sections.h>
@ -49,6 +50,7 @@
#include <asm/tlbflush.h>
#include <asm/traps.h>
#include <asm/efi.h>
#include <asm/hypervisor.h>
#include <asm/xen/hypervisor.h>
#include <asm/mmu_context.h>
@ -438,3 +440,10 @@ static int __init register_arm64_panic_block(void)
return 0;
}
device_initcall(register_arm64_panic_block);
void kvm_arm_init_hyp_services(void)
{
kvm_init_ioremap_services();
kvm_init_memshare_services();
kvm_init_memrelinquish_services();
}

View File

@ -13,7 +13,7 @@
*(__kvm_ex_table) \
__stop___kvm_ex_table = .;
#define HYPERVISOR_DATA_SECTIONS \
#define HYPERVISOR_RODATA_SECTIONS \
HYP_SECTION_NAME(.rodata) : { \
. = ALIGN(PAGE_SIZE); \
__hyp_rodata_start = .; \
@ -23,6 +23,15 @@
__hyp_rodata_end = .; \
}
#define HYPERVISOR_DATA_SECTION \
HYP_SECTION_NAME(.data) : { \
. = ALIGN(PAGE_SIZE); \
__hyp_data_start = .; \
*(HYP_SECTION_NAME(.data)) \
. = ALIGN(PAGE_SIZE); \
__hyp_data_end = .; \
}
#define HYPERVISOR_PERCPU_SECTION \
. = ALIGN(PAGE_SIZE); \
HYP_SECTION_NAME(.data..percpu) : { \
@ -51,7 +60,8 @@
#define SBSS_ALIGN PAGE_SIZE
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
#define HYPERVISOR_DATA_SECTIONS
#define HYPERVISOR_RODATA_SECTIONS
#define HYPERVISOR_DATA_SECTION
#define HYPERVISOR_PERCPU_SECTION
#define HYPERVISOR_RELOC_SECTION
#define SBSS_ALIGN 0
@ -188,7 +198,7 @@ SECTIONS
/* everything from this point to __init_begin will be marked RO NX */
RO_DATA(PAGE_SIZE)
HYPERVISOR_DATA_SECTIONS
HYPERVISOR_RODATA_SECTIONS
/* code sections that are never executed via the kernel mapping */
.rodata.text : {
@ -276,6 +286,8 @@ SECTIONS
_sdata = .;
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
HYPERVISOR_DATA_SECTION
/*
* Data written with the MMU off but read with the MMU on requires
* cache lines to be invalidated, discarding up to a Cache Writeback

View File

@ -69,4 +69,13 @@ config PROTECTED_NVHE_STACKTRACE
If unsure, or not using protected nVHE (pKVM), say N.
config KVM_S2MPU
bool "Stage-2 Memory Protection Unit support"
depends on KVM
help
Support for the Stage-2 Memory Protection Unit (S2MPU) and Stream
Security Mapping Table (SSMT) devices in KVM. This allows the
hypervisor to restrict DMA access to its memory and the memory of
protected guests.
endif # VIRTUALIZATION

View File

@ -8,13 +8,13 @@ ccflags-y += -I $(srctree)/$(src)
include $(srctree)/virt/kvm/Makefile.kvm
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM) += hyp/
obj-$(CONFIG_KVM) += hyp/ iommu/
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
arch_timer.o trng.o vmid.o \
arch_timer.o trng.o vmid.o iommu.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \

View File

@ -88,7 +88,9 @@ static u64 timer_get_offset(struct arch_timer_context *ctxt)
switch(arch_timer_ctx_index(ctxt)) {
case TIMER_VTIMER:
return __vcpu_sys_reg(vcpu, CNTVOFF_EL2);
if (likely(!kvm_vm_is_protected(vcpu->kvm)))
return __vcpu_sys_reg(vcpu, CNTVOFF_EL2);
fallthrough;
default:
return 0;
}
@ -768,6 +770,9 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tmp;
if (unlikely(kvm_vm_is_protected(vcpu->kvm)))
cntvoff = 0;
mutex_lock(&kvm->lock);
kvm_for_each_vcpu(i, tmp, kvm)
timer_set_offset(vcpu_vtimer(tmp), cntvoff);

View File

@ -37,6 +37,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pkvm.h>
#include <asm/kvm_emulate.h>
#include <asm/sections.h>
@ -50,8 +51,8 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
DECLARE_KVM_NVHE_PER_CPU(int, hyp_cpu_number);
static bool vgic_present;
@ -78,18 +79,31 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
{
int r;
if (cap->flags)
return -EINVAL;
/* Capabilities with flags */
switch (cap->cap) {
case KVM_CAP_ARM_PROTECTED_VM:
return pkvm_vm_ioctl_enable_cap(kvm, cap);
default:
if (cap->flags)
return -EINVAL;
}
/* Capabilities without flags */
switch (cap->cap) {
case KVM_CAP_ARM_NISV_TO_USER:
r = 0;
set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&kvm->arch.flags);
if (kvm_vm_is_protected(kvm)) {
r = -EINVAL;
} else {
r = 0;
set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&kvm->arch.flags);
}
break;
case KVM_CAP_ARM_MTE:
mutex_lock(&kvm->lock);
if (!system_supports_mte() || kvm->created_vcpus) {
if (!system_supports_mte() ||
kvm_vm_is_protected(kvm) ||
kvm->created_vcpus) {
r = -EINVAL;
} else {
r = 0;
@ -138,24 +152,27 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int ret;
ret = kvm_arm_setup_stage2(kvm, type);
if (ret)
return ret;
ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
if (ret)
return ret;
if (type & ~KVM_VM_TYPE_MASK)
return -EINVAL;
ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
goto out_free_stage2_pgd;
return ret;
ret = pkvm_init_host_vm(kvm, type);
if (ret)
goto err_unshare_kvm;
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
ret = -ENOMEM;
goto out_free_stage2_pgd;
goto err_unshare_kvm;
}
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type);
if (ret)
goto err_free_cpumask;
kvm_vgic_early_init(kvm);
/* The maximum number of VCPUs is limited by the host's GIC model */
@ -164,9 +181,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
set_default_spectre(kvm);
kvm_arm_init_hypercalls(kvm);
return ret;
out_free_stage2_pgd:
kvm_free_stage2_pgd(&kvm->arch.mmu);
return 0;
err_free_cpumask:
free_cpumask_var(kvm->arch.supported_cpus);
err_unshare_kvm:
kvm_unshare_hyp(kvm, kvm + 1);
return ret;
}
@ -187,14 +207,22 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_vgic_destroy(kvm);
if (is_protected_kvm_enabled())
pkvm_destroy_hyp_vm(kvm);
kvm_destroy_vcpus(kvm);
if (atomic64_read(&kvm->stat.protected_hyp_mem))
pr_warn("%lluB of donations to the nVHE hyp are missing\n",
atomic64_read(&kvm->stat.protected_hyp_mem));
kvm_unshare_hyp(kvm, kvm + 1);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
static int kvm_check_extension(struct kvm *kvm, long ext)
{
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
r = vgic_present;
@ -212,7 +240,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_VCPU_EVENTS:
case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
case KVM_CAP_ARM_NISV_TO_USER:
case KVM_CAP_ARM_INJECT_EXT_DABT:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_VCPU_ATTRIBUTES:
@ -220,6 +247,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_SYSTEM_SUSPEND:
r = 1;
break;
case KVM_CAP_ARM_NISV_TO_USER:
r = !kvm || !kvm_vm_is_protected(kvm);
break;
case KVM_CAP_SET_GUEST_DEBUG2:
return KVM_GUESTDBG_VALID_MASK;
case KVM_CAP_ARM_SET_DEVICE_ADDR:
@ -293,6 +323,75 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
return r;
}
/*
* Checks whether the extension specified in ext is supported in protected
* mode for the specified vm.
* The capabilities supported by kvm in general are passed in kvm_cap.
*/
static int pkvm_check_extension(struct kvm *kvm, long ext, int kvm_cap)
{
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
case KVM_CAP_ARM_PSCI:
case KVM_CAP_ARM_PSCI_0_2:
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
case KVM_CAP_MSI_DEVID:
case KVM_CAP_ARM_VM_IPA_SIZE:
r = kvm_cap;
break;
case KVM_CAP_GUEST_DEBUG_HW_BPS:
r = min(kvm_cap, pkvm_get_max_brps());
break;
case KVM_CAP_GUEST_DEBUG_HW_WPS:
r = min(kvm_cap, pkvm_get_max_wrps());
break;
case KVM_CAP_ARM_PMU_V3:
r = kvm_cap && FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer),
PVM_ID_AA64DFR0_ALLOW);
break;
case KVM_CAP_ARM_SVE:
r = kvm_cap && FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE),
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
break;
case KVM_CAP_ARM_PTRAUTH_ADDRESS:
r = kvm_cap &&
FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API),
PVM_ID_AA64ISAR1_ALLOW) &&
FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA),
PVM_ID_AA64ISAR1_ALLOW);
break;
case KVM_CAP_ARM_PTRAUTH_GENERIC:
r = kvm_cap &&
FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI),
PVM_ID_AA64ISAR1_ALLOW) &&
FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA),
PVM_ID_AA64ISAR1_ALLOW);
break;
case KVM_CAP_ARM_PROTECTED_VM:
r = 1;
break;
default:
r = 0;
break;
}
return r;
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r = kvm_check_extension(kvm, ext);
if (kvm && kvm_vm_is_protected(kvm))
r = pkvm_check_extension(kvm, ext, r);
return r;
}
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@ -363,7 +462,11 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm)))
static_branch_dec(&userspace_irqchip_in_use);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
if (is_protected_kvm_enabled())
free_hyp_stage2_memcache(&vcpu->arch.pkvm_memcache, vcpu->kvm);
else
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
@ -385,6 +488,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
struct kvm_s2_mmu *mmu;
int *last_ran;
if (is_protected_kvm_enabled())
goto nommu;
mmu = vcpu->arch.hw_mmu;
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
@ -402,6 +508,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
*last_ran = vcpu->vcpu_id;
}
nommu:
vcpu->cpu = cpu;
kvm_vgic_load(vcpu);
@ -422,18 +529,36 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu_ptrauth_disable(vcpu);
kvm_arch_vcpu_load_debug_state_flags(vcpu);
if (is_protected_kvm_enabled()) {
kvm_call_hyp_nvhe(__pkvm_vcpu_load,
vcpu->kvm->arch.pkvm.handle,
vcpu->vcpu_idx, vcpu->arch.hcr_el2);
kvm_call_hyp(__vgic_v3_restore_vmcr_aprs,
&vcpu->arch.vgic_cpu.vgic_v3);
}
if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
vcpu_set_on_unsupported_cpu(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
if (is_protected_kvm_enabled()) {
kvm_call_hyp(__vgic_v3_save_vmcr_aprs,
&vcpu->arch.vgic_cpu.vgic_v3);
kvm_call_hyp_nvhe(__pkvm_vcpu_put);
/* __pkvm_vcpu_put implies a sync of the state */
if (!kvm_vm_is_protected(vcpu->kvm))
vcpu_set_flag(vcpu, PKVM_HOST_STATE_DIRTY);
}
kvm_arch_vcpu_put_debug_state_flags(vcpu);
kvm_arch_vcpu_put_fp(vcpu);
if (has_vhe())
kvm_vcpu_put_sysregs_vhe(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
kvm_vgic_put(vcpu, false);
kvm_vcpu_pmu_restore_host(vcpu);
kvm_arm_vmid_clear_active();
@ -569,6 +694,15 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (ret)
return ret;
if (is_protected_kvm_enabled()) {
/* Start with the vcpu in a dirty state */
if (!kvm_vm_is_protected(vcpu->kvm))
vcpu_set_flag(vcpu, PKVM_HOST_STATE_DIRTY);
ret = pkvm_create_hyp_vm(kvm);
if (ret)
return ret;
}
if (!irqchip_in_kernel(kvm)) {
/*
* Tell the rest of the code that there are userspace irqchip
@ -577,14 +711,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
static_branch_inc(&userspace_irqchip_in_use);
}
/*
* Initialize traps for protected VMs.
* NOTE: Move to run in EL2 directly, rather than via a hypercall, once
* the code is in place for first run initialization at EL2.
*/
if (kvm_vm_is_protected(kvm))
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
mutex_lock(&kvm->lock);
set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags);
mutex_unlock(&kvm->lock);
@ -660,15 +786,14 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
* doorbells to be signalled, should an interrupt become pending.
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
vgic_v4_put(vcpu, true);
kvm_vgic_put(vcpu, true);
preempt_enable();
kvm_vcpu_halt(vcpu);
vcpu_clear_flag(vcpu, IN_WFIT);
preempt_disable();
vgic_v4_load(vcpu);
kvm_vgic_load(vcpu);
preempt_enable();
}
@ -1522,6 +1647,9 @@ static void cpu_prepare_hyp_mode(int cpu)
{
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
unsigned long tcr;
int *hyp_cpu_number_ptr = per_cpu_ptr_nvhe_sym(hyp_cpu_number, cpu);
*hyp_cpu_number_ptr = cpu;
/*
* Calculate the raw per-cpu offset without a translation from the
@ -1779,6 +1907,7 @@ static bool init_psci_relay(void)
}
kvm_host_psci_config.version = psci_ops.get_version();
kvm_host_psci_config.smccc_version = arm_smccc_get_version();
if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) {
kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids();
@ -1844,13 +1973,13 @@ static void teardown_hyp_mode(void)
free_hyp_pgds();
for_each_possible_cpu(cpu) {
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order());
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
}
}
static int do_pkvm_init(u32 hyp_va_bits)
{
void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base);
void *per_cpu_base = kvm_ksym_ref(kvm_nvhe_sym(kvm_arm_hyp_percpu_base));
int ret;
preempt_disable();
@ -1870,11 +1999,8 @@ static int do_pkvm_init(u32 hyp_va_bits)
return ret;
}
static int kvm_hyp_init_protection(u32 hyp_va_bits)
static void kvm_hyp_init_symbols(void)
{
void *addr = phys_to_virt(hyp_mem_base);
int ret;
kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
@ -1883,6 +2009,15 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
kvm_nvhe_sym(__icache_flags) = __icache_flags;
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
kvm_nvhe_sym(smccc_trng_available) = smccc_trng_available;
}
static int kvm_hyp_init_protection(u32 hyp_va_bits)
{
void *addr = phys_to_virt(hyp_mem_base);
int ret;
ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
if (ret)
@ -1950,7 +2085,7 @@ static int init_hyp_mode(void)
page_addr = page_address(page);
memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size());
kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr;
kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr;
}
/*
@ -1963,6 +2098,13 @@ static int init_hyp_mode(void)
goto out_err;
}
err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_start),
kvm_ksym_ref(__hyp_data_end), PAGE_HYP);
if (err) {
kvm_err("Cannot map .hyp.data section\n");
goto out_err;
}
err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
if (err) {
@ -2043,7 +2185,7 @@ static int init_hyp_mode(void)
}
for_each_possible_cpu(cpu) {
char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
char *percpu_begin = (char *)kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu];
char *percpu_end = percpu_begin + nvhe_percpu_size();
/* Map Hyp percpu pages */
@ -2057,6 +2199,8 @@ static int init_hyp_mode(void)
cpu_prepare_hyp_mode(cpu);
}
kvm_hyp_init_symbols();
if (is_protected_kvm_enabled()) {
init_cpu_logical_map();
@ -2064,9 +2208,7 @@ static int init_hyp_mode(void)
err = -ENODEV;
goto out_err;
}
}
if (is_protected_kvm_enabled()) {
err = kvm_hyp_init_protection(hyp_va_bits);
if (err) {
kvm_err("Failed to init hyp memory protection\n");
@ -2099,6 +2241,17 @@ static int pkvm_drop_host_privileges(void)
* once the host stage 2 is installed.
*/
static_branch_enable(&kvm_protected_mode_initialized);
/*
* Fixup the boot mode so that we don't take spurious round
* trips via EL2 on cpu_resume. Flush to the PoC for a good
* measure, so that it can be observed by a CPU coming out of
* suspend with the MMU off.
*/
__boot_cpu_mode[0] = __boot_cpu_mode[1] = BOOT_CPU_MODE_EL1;
dcache_clean_poc((unsigned long)__boot_cpu_mode,
(unsigned long)(__boot_cpu_mode + 2));
on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
return ret;
}

View File

@ -39,9 +39,7 @@ static DEFINE_PER_CPU(u64, mdcr_el2);
*/
static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
{
u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
vcpu->arch.guest_debug_preserved.mdscr_el1 = val;
__vcpu_save_guest_debug_regs(vcpu);
trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
vcpu->arch.guest_debug_preserved.mdscr_el1);
@ -52,9 +50,7 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
{
u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1;
vcpu_write_sys_reg(vcpu, val, MDSCR_EL1);
__vcpu_restore_guest_debug_regs(vcpu);
trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
vcpu_read_sys_reg(vcpu, MDSCR_EL1));
@ -175,7 +171,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
kvm_arm_setup_mdcr_el2(vcpu);
/* Check if we need to use the debug registers. */
if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
if (kvm_vcpu_needs_debug_regs(vcpu)) {
/* Save guest debug state */
save_guest_debug_regs(vcpu);
@ -284,7 +280,7 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
/*
* Restore the guest's debug registers if we were using them.
*/
if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
if (kvm_vcpu_needs_debug_regs(vcpu)) {
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS))
/*

View File

@ -29,7 +29,9 @@
#include "trace.h"
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS()
KVM_GENERIC_VM_STATS(),
STATS_DESC_ICOUNTER(VM, protected_hyp_mem),
STATS_DESC_ICOUNTER(VM, protected_shared_mem),
};
const struct kvm_stats_header kvm_vm_stats_header = {

View File

@ -240,6 +240,21 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu)
{
int handled;
/*
* If we run a non-protected VM when protection is enabled
* system-wide, resync the state from the hypervisor and mark
* it as dirty on the host side if it wasn't dirty already
* (which could happen if preemption has taken place).
*/
if (is_protected_kvm_enabled() && !kvm_vm_is_protected(vcpu->kvm)) {
preempt_disable();
if (!(vcpu_get_flag(vcpu, PKVM_HOST_STATE_DIRTY))) {
kvm_call_hyp_nvhe(__pkvm_vcpu_sync_state);
vcpu_set_flag(vcpu, PKVM_HOST_STATE_DIRTY);
}
preempt_enable();
}
/*
* See ARM ARM B1.14.1: "Hyp traps on instructions
* that fail their condition code check"
@ -307,6 +322,13 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
/* For exit types that need handling before we can be preempted */
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
{
/*
* We just exited, so the state is clean from a hypervisor
* perspective.
*/
if (is_protected_kvm_enabled())
vcpu_clear_flag(vcpu, PKVM_HOST_STATE_DIRTY);
if (ARM_SERROR_PENDING(exception_index)) {
if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
u64 disr = kvm_vcpu_get_disr(vcpu);

View File

@ -61,12 +61,25 @@ static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val)
vcpu->arch.ctxt.spsr_und = val;
}
unsigned long get_except64_offset(unsigned long psr, unsigned long target_mode,
enum exception_type type)
{
u64 mode = psr & (PSR_MODE_MASK | PSR_MODE32_BIT);
u64 exc_offset;
if (mode == target_mode)
exc_offset = CURRENT_EL_SP_ELx_VECTOR;
else if ((mode | PSR_MODE_THREAD_BIT) == target_mode)
exc_offset = CURRENT_EL_SP_EL0_VECTOR;
else if (!(mode & PSR_MODE32_BIT))
exc_offset = LOWER_EL_AArch64_VECTOR;
else
exc_offset = LOWER_EL_AArch32_VECTOR;
return exc_offset + type;
}
/*
* This performs the exception entry at a given EL (@target_mode), stashing PC
* and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE.
* The EL passed to this function *must* be a non-secure, privileged mode with
* bit 0 being set (PSTATE.SP == 1).
*
* When an exception is taken, most PSTATE fields are left unchanged in the
* handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
* of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
@ -78,45 +91,17 @@ static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val)
* Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
* MSB to LSB.
*/
static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
enum exception_type type)
unsigned long get_except64_cpsr(unsigned long old, bool has_mte,
unsigned long sctlr, unsigned long target_mode)
{
unsigned long sctlr, vbar, old, new, mode;
u64 exc_offset;
mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
if (mode == target_mode)
exc_offset = CURRENT_EL_SP_ELx_VECTOR;
else if ((mode | PSR_MODE_THREAD_BIT) == target_mode)
exc_offset = CURRENT_EL_SP_EL0_VECTOR;
else if (!(mode & PSR_MODE32_BIT))
exc_offset = LOWER_EL_AArch64_VECTOR;
else
exc_offset = LOWER_EL_AArch32_VECTOR;
switch (target_mode) {
case PSR_MODE_EL1h:
vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL1);
sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
__vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
break;
default:
/* Don't do that */
BUG();
}
*vcpu_pc(vcpu) = vbar + exc_offset + type;
old = *vcpu_cpsr(vcpu);
new = 0;
u64 new = 0;
new |= (old & PSR_N_BIT);
new |= (old & PSR_Z_BIT);
new |= (old & PSR_C_BIT);
new |= (old & PSR_V_BIT);
if (kvm_has_mte(kern_hyp_va(vcpu->kvm)))
if (has_mte)
new |= PSR_TCO_BIT;
new |= (old & PSR_DIT_BIT);
@ -152,6 +137,36 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
new |= target_mode;
return new;
}
/*
* This performs the exception entry at a given EL (@target_mode), stashing PC
* and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE.
* The EL passed to this function *must* be a non-secure, privileged mode with
* bit 0 being set (PSTATE.SP == 1).
*/
static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
enum exception_type type)
{
u64 offset = get_except64_offset(*vcpu_cpsr(vcpu), target_mode, type);
unsigned long sctlr, vbar, old, new;
switch (target_mode) {
case PSR_MODE_EL1h:
vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL1);
sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
__vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
break;
default:
/* Don't do that */
BUG();
}
*vcpu_pc(vcpu) = vbar + offset;
old = *vcpu_cpsr(vcpu);
new = get_except64_cpsr(old, kvm_has_mte(kern_hyp_va(vcpu->kvm)), sctlr, target_mode);
*vcpu_cpsr(vcpu) = new;
__vcpu_write_spsr(vcpu, old);
}

View File

@ -25,3 +25,9 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
SYM_FUNC_START(__sve_save_state)
mov x2, #1
sve_save 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_save_state)

View File

@ -2,9 +2,12 @@
#include <linux/kbuild.h>
#include <nvhe/memory.h>
#include <nvhe/pkvm.h>
int main(void)
{
DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
DEFINE(PKVM_HYP_VM_SIZE, sizeof(struct pkvm_hyp_vm));
DEFINE(PKVM_HYP_VCPU_SIZE, sizeof(struct pkvm_hyp_vcpu));
return 0;
}

View File

@ -0,0 +1,17 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2022 - Google LLC
* Author: Andrew Walbran <qwandor@google.com>
*/
#ifndef __KVM_HYP_FFA_H
#define __KVM_HYP_FFA_H
#include <asm/kvm_host.h>
#define FFA_MIN_FUNC_NUM 0x60
#define FFA_MAX_FUNC_NUM 0x7F
int hyp_ffa_init(void *pages);
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt);
#endif /* __KVM_HYP_FFA_H */

View File

@ -1,205 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2021 Google LLC
* Author: Fuad Tabba <tabba@google.com>
*/
#ifndef __ARM64_KVM_FIXED_CONFIG_H__
#define __ARM64_KVM_FIXED_CONFIG_H__
#include <asm/sysreg.h>
/*
* This file contains definitions for features to be allowed or restricted for
* guest virtual machines, depending on the mode KVM is running in and on the
* type of guest that is running.
*
* The ALLOW masks represent a bitmask of feature fields that are allowed
* without any restrictions as long as they are supported by the system.
*
* The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
* features that are restricted to support at most the specified feature.
*
* If a feature field is not present in either, than it is not supported.
*
* The approach taken for protected VMs is to allow features that are:
* - Needed by common Linux distributions (e.g., floating point)
* - Trivial to support, e.g., supporting the feature does not introduce or
* require tracking of additional state in KVM
* - Cannot be trapped or prevent the guest from using anyway
*/
/*
* Allow for protected VMs:
* - Floating-point and Advanced SIMD
* - Data Independent Timing
*/
#define PVM_ID_AA64PFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) \
)
/*
* Restrict to the following *unsigned* features for protected VMs:
* - AArch64 guests only (no support for AArch32 guests):
* AArch32 adds complexity in trap handling, emulation, condition codes,
* etc...
* - RAS (v1)
* Supported by KVM
*/
#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \
)
/*
* Allow for protected VMs:
* - Branch Target Identification
* - Speculative Store Bypassing
*/
#define PVM_ID_AA64PFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
)
/*
* Allow for protected VMs:
* - Mixed-endian
* - Distinction between Secure and Non-secure Memory
* - Mixed-endian at EL0 only
* - Non-context synchronizing exception entry and exit
*/
#define PVM_ID_AA64MMFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
)
/*
* Restrict to the following *unsigned* features for protected VMs:
* - 40-bit IPA
* - 16-bit ASID
*/
#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
)
/*
* Allow for protected VMs:
* - Hardware translation table updates to Access flag and Dirty state
* - Number of VMID bits from CPU
* - Hierarchical Permission Disables
* - Privileged Access Never
* - SError interrupt exceptions from speculative reads
* - Enhanced Translation Synchronization
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
)
/*
* Allow for protected VMs:
* - Common not Private translations
* - User Access Override
* - IESB bit in the SCTLR_ELx registers
* - Unaligned single-copy atomicity and atomic functions
* - ESR_ELx.EC value on an exception by read access to feature ID space
* - TTL field in address operations.
* - Break-before-make sequences when changing translation block size
* - E0PDx mechanism
*/
#define PVM_ID_AA64MMFR2_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
)
/*
* No support for Scalable Vectors for protected VMs:
* Requires additional support from KVM, e.g., context-switching and
* trapping at EL2
*/
#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
/*
* No support for debug, including breakpoints, and watchpoints for protected
* VMs:
* The Arm architecture mandates support for at least the Armv8 debug
* architecture, which would include at least 2 hardware breakpoints and
* watchpoints. Providing that support to protected guests adds
* considerable state and complexity. Therefore, the reserved value of 0 is
* used for debug-related fields.
*/
#define PVM_ID_AA64DFR0_ALLOW (0ULL)
#define PVM_ID_AA64DFR1_ALLOW (0ULL)
/*
* No support for implementation defined features.
*/
#define PVM_ID_AA64AFR0_ALLOW (0ULL)
#define PVM_ID_AA64AFR1_ALLOW (0ULL)
/*
* No restrictions on instructions implemented in AArch64.
*/
#define PVM_ID_AA64ISAR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
)
#define PVM_ID_AA64ISAR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
)
#define PVM_ID_AA64ISAR2_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \
)
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
int kvm_check_pvm_sysreg_table(void);
#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */

View File

@ -7,7 +7,7 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
#define HYP_NO_ORDER USHRT_MAX
#define HYP_NO_ORDER 0xff
struct hyp_pool {
/*
@ -19,11 +19,11 @@ struct hyp_pool {
struct list_head free_area[MAX_ORDER];
phys_addr_t range_start;
phys_addr_t range_end;
unsigned short max_order;
u8 max_order;
};
/* Allocation */
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
void *hyp_alloc_pages(struct hyp_pool *pool, u8 order);
void hyp_split_page(struct hyp_page *page);
void hyp_get_page(struct hyp_pool *pool, void *addr);
void hyp_put_page(struct hyp_pool *pool, void *addr);

View File

@ -0,0 +1,102 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ARM64_KVM_NVHE_IOMMU_H__
#define __ARM64_KVM_NVHE_IOMMU_H__
#include <linux/types.h>
#include <asm/kvm_host.h>
#include <nvhe/mem_protect.h>
struct pkvm_iommu;
struct pkvm_iommu_ops {
/*
* Global driver initialization called before devices are registered.
* Driver-specific arguments are passed in a buffer shared by the host.
* The buffer memory has been pinned in EL2 but host retains R/W access.
* Extra care must be taken when reading from it to avoid TOCTOU bugs.
* If the driver maintains its own page tables, it is expected to
* initialize them to all memory owned by the host.
* Driver initialization lock held during callback.
*/
int (*init)(void *data, size_t size);
/*
* Driver-specific validation of a device that is being registered.
* All fields of the device struct have been populated.
* Called with the host lock held.
*/
int (*validate)(struct pkvm_iommu *dev);
/*
* Validation of a new child device that is being register by
* the parent device the child selected. Called with the host lock held.
*/
int (*validate_child)(struct pkvm_iommu *dev, struct pkvm_iommu *child);
/*
* Callback to apply a host stage-2 mapping change at driver level.
* Called before 'host_stage2_idmap_apply' with host lock held.
*/
void (*host_stage2_idmap_prepare)(phys_addr_t start, phys_addr_t end,
enum kvm_pgtable_prot prot);
/*
* Callback to apply a host stage-2 mapping change at device level.
* Called after 'host_stage2_idmap_prepare' with host lock held.
*/
void (*host_stage2_idmap_apply)(struct pkvm_iommu *dev,
phys_addr_t start, phys_addr_t end);
/*
* Callback to finish a host stage-2 mapping change at device level.
* Called after 'host_stage2_idmap_apply' with host lock held.
*/
void (*host_stage2_idmap_complete)(struct pkvm_iommu *dev);
/* Power management callbacks. Called with host lock held. */
int (*suspend)(struct pkvm_iommu *dev);
int (*resume)(struct pkvm_iommu *dev);
/*
* Host data abort handler callback. Called with host lock held.
* Returns true if the data abort has been handled.
*/
bool (*host_dabt_handler)(struct pkvm_iommu *dev,
struct kvm_cpu_context *host_ctxt,
u32 esr, size_t off);
/* Amount of memory allocated per-device for use by the driver. */
size_t data_size;
};
struct pkvm_iommu {
struct pkvm_iommu *parent;
struct list_head list;
struct list_head siblings;
struct list_head children;
unsigned long id;
const struct pkvm_iommu_ops *ops;
phys_addr_t pa;
void *va;
size_t size;
bool powered;
char data[];
};
int __pkvm_iommu_driver_init(struct pkvm_iommu_driver *drv, void *data, size_t size);
int __pkvm_iommu_register(unsigned long dev_id, unsigned long drv_id,
phys_addr_t dev_pa, size_t dev_size,
unsigned long parent_id,
void *kern_mem_va, size_t mem_size);
int __pkvm_iommu_pm_notify(unsigned long dev_id,
enum pkvm_iommu_pm_event event);
int __pkvm_iommu_finalize(void);
int pkvm_iommu_host_stage2_adjust_range(phys_addr_t addr, phys_addr_t *start,
phys_addr_t *end);
bool pkvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u32 esr,
phys_addr_t fault_pa);
void pkvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
enum kvm_pgtable_prot prot);
#endif /* __ARM64_KVM_NVHE_IOMMU_H__ */

View File

@ -8,8 +8,10 @@
#define __KVM_NVHE_MEM_PROTECT__
#include <linux/kvm_host.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
#include <asm/virt.h>
#include <nvhe/pkvm.h>
#include <nvhe/spinlock.h>
/*
@ -29,6 +31,7 @@ enum pkvm_page_state {
/* Meta-states which aren't encoded directly in the PTE's SW bits */
PKVM_NOPAGE,
PKVM_PAGE_RESTRICTED_PROT,
};
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
@ -43,30 +46,69 @@ static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
return prot & PKVM_PAGE_STATE_PROT_MASK;
}
struct host_kvm {
struct host_mmu {
struct kvm_arch arch;
struct kvm_pgtable pgt;
struct kvm_pgtable_mm_ops mm_ops;
hyp_spinlock_t lock;
};
extern struct host_kvm host_kvm;
extern struct host_mmu host_mmu;
extern const u8 pkvm_hyp_id;
/* This corresponds to page-table locking order */
enum pkvm_component_id {
PKVM_ID_HOST,
PKVM_ID_HYP,
PKVM_ID_GUEST,
PKVM_ID_FFA,
PKVM_ID_PROTECTED,
PKVM_ID_MAX = PKVM_ID_PROTECTED,
};
extern unsigned long hyp_nr_cpus;
int __pkvm_prot_finalize(void);
int __pkvm_host_share_hyp(u64 pfn);
int __pkvm_host_unshare_hyp(u64 pfn);
int __pkvm_host_reclaim_page(u64 pfn);
int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
int __pkvm_guest_share_host(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa);
int __pkvm_guest_unshare_host(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa);
int __pkvm_guest_relinquish_to_host(struct pkvm_hyp_vcpu *vcpu,
u64 ipa, u64 *ppa);
int __pkvm_install_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa);
int __pkvm_remove_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa);
bool __pkvm_check_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu);
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot,
bool update_iommu);
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, enum pkvm_component_id owner_id);
int host_stage2_protect_pages_locked(phys_addr_t addr, u64 size);
int host_stage2_unmap_reg_locked(phys_addr_t start, u64 size);
int kvm_host_prepare_stage2(void *pgt_pool_base);
int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd);
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
int hyp_register_host_perm_fault_handler(int (*cb)(struct kvm_cpu_context *ctxt, u64 esr, u64 addr));
int hyp_pin_shared_mem(void *from, void *to);
void hyp_unpin_shared_mem(void *from, void *to);
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc);
int hyp_protect_host_page(u64 pfn, enum kvm_pgtable_prot prot);
int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
struct kvm_hyp_memcache *host_mc);
void psci_mem_protect_inc(u64 n);
void psci_mem_protect_dec(u64 n);
static __always_inline void __load_host_stage2(void)
{
if (static_branch_likely(&kvm_protected_mode_initialized))
__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
else
write_sysreg(0, vttbr_el2);
}

View File

@ -7,9 +7,17 @@
#include <linux/types.h>
/*
* Accesses to struct hyp_page flags are serialized by the host stage-2
* page-table lock.
*/
#define HOST_PAGE_NEED_POISONING BIT(0)
#define HOST_PAGE_PENDING_RECLAIM BIT(1)
struct hyp_page {
unsigned short refcount;
unsigned short order;
u8 order;
u8 flags;
};
extern u64 __hyp_vmemmap;
@ -38,6 +46,10 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr)
#define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page))
#define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool)
/*
* Refcounting for 'struct hyp_page'.
* hyp_pool::lock must be held if atomic access to the refcount is required.
*/
static inline int hyp_page_count(void *addr)
{
struct hyp_page *p = hyp_virt_to_page(addr);
@ -45,4 +57,27 @@ static inline int hyp_page_count(void *addr)
return p->refcount;
}
static inline void hyp_page_ref_inc(struct hyp_page *p)
{
BUG_ON(p->refcount == USHRT_MAX);
p->refcount++;
}
static inline void hyp_page_ref_dec(struct hyp_page *p)
{
BUG_ON(!p->refcount);
p->refcount--;
}
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
{
hyp_page_ref_dec(p);
return (p->refcount == 0);
}
static inline void hyp_set_page_refcounted(struct hyp_page *p)
{
BUG_ON(p->refcount);
p->refcount = 1;
}
#endif /* __KVM_HYP_MEMORY_H */

View File

@ -12,10 +12,16 @@
extern struct kvm_pgtable pkvm_pgtable;
extern hyp_spinlock_t pkvm_pgd_lock;
extern const struct pkvm_module_ops module_ops;
int hyp_create_pcpu_fixmap(void);
void *hyp_fixmap_map(phys_addr_t phys);
void hyp_fixmap_unmap(void);
void hyp_poison_page(phys_addr_t phys);
int hyp_create_idmap(u32 hyp_va_bits);
int hyp_map_vectors(void);
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
int hyp_back_vmemmap(phys_addr_t back);
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
@ -23,17 +29,9 @@ int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot,
unsigned long *haddr);
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
void pkvm_remove_mappings(void *from, void *to);
static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
unsigned long *start, unsigned long *end)
{
unsigned long nr_pages = size >> PAGE_SHIFT;
struct hyp_page *p = hyp_phys_to_page(phys);
*start = (unsigned long)p;
*end = *start + nr_pages * sizeof(struct hyp_page);
*start = ALIGN_DOWN(*start, PAGE_SIZE);
*end = ALIGN(*end, PAGE_SIZE);
}
int __pkvm_map_module_page(u64 pfn, void *va, enum kvm_pgtable_prot prot);
void __pkvm_unmap_module_page(u64 pfn, void *va);
void *__pkvm_alloc_module_va(u64 nr_pages);
#endif /* __KVM_HYP_MM_H */

View File

@ -0,0 +1,34 @@
#include <asm/kvm_pgtable.h>
#define HCALL_HANDLED 0
#define HCALL_UNHANDLED -1
int __pkvm_register_host_smc_handler(bool (*cb)(struct kvm_cpu_context *));
int __pkvm_register_default_trap_handler(bool (*cb)(struct kvm_cpu_context *));
int __pkvm_register_illegal_abt_notifier(void (*cb)(struct kvm_cpu_context *));
int __pkvm_register_hyp_panic_notifier(void (*cb)(struct kvm_cpu_context *));
enum pkvm_psci_notification;
int __pkvm_register_psci_notifier(void (*cb)(enum pkvm_psci_notification, struct kvm_cpu_context *));
#ifdef CONFIG_MODULES
int __pkvm_init_module(void *module_init);
int __pkvm_register_hcall(unsigned long hfn_hyp_va);
int handle_host_dynamic_hcall(struct kvm_cpu_context *host_ctxt);
void pkvm_modules_lock(void);
void pkvm_modules_unlock(void);
bool pkvm_modules_enabled(void);
int __pkvm_close_module_registration(void);
#else
static inline int __pkvm_init_module(void *module_init) { return -EOPNOTSUPP; }
static inline int
__pkvm_register_hcall(unsigned long hfn_hyp_va) { return -EOPNOTSUPP; }
static inline int handle_host_dynamic_hcall(struct kvm_cpu_context *host_ctxt)
{
return HCALL_UNHANDLED;
}
static inline void pkvm_modules_lock(void) { }
static inline void pkvm_modules_unlock(void) { }
static inline bool pkvm_modules_enabled(void) { return false; }
static inline int __pkvm_close_module_registration(void) { return -EOPNOTSUPP; }
#endif

View File

@ -0,0 +1,140 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2021 Google LLC
* Author: Fuad Tabba <tabba@google.com>
*/
#ifndef __ARM64_KVM_NVHE_PKVM_H__
#define __ARM64_KVM_NVHE_PKVM_H__
#include <asm/kvm_pkvm.h>
#include <nvhe/gfp.h>
#include <nvhe/spinlock.h>
/*
* Holds the relevant data for maintaining the vcpu state completely at hyp.
*/
struct pkvm_hyp_vcpu {
struct kvm_vcpu vcpu;
/* Backpointer to the host's (untrusted) vCPU instance. */
struct kvm_vcpu *host_vcpu;
/*
* If this hyp vCPU is loaded, then this is a backpointer to the
* per-cpu pointer tracking us. Otherwise, NULL if not loaded.
*/
struct pkvm_hyp_vcpu **loaded_hyp_vcpu;
/* Tracks exit code for the protected guest. */
u32 exit_code;
/*
* Track the power state transition of a protected vcpu.
* Can be in one of three states:
* PSCI_0_2_AFFINITY_LEVEL_ON
* PSCI_0_2_AFFINITY_LEVEL_OFF
* PSCI_0_2_AFFINITY_LEVEL_PENDING
*/
int power_state;
};
/*
* Holds the relevant data for running a protected vm.
*/
struct pkvm_hyp_vm {
struct kvm kvm;
/* Backpointer to the host's (untrusted) KVM instance. */
struct kvm *host_kvm;
/* The guest's stage-2 page-table managed by the hypervisor. */
struct kvm_pgtable pgt;
struct kvm_pgtable_mm_ops mm_ops;
struct hyp_pool pool;
hyp_spinlock_t lock;
/* Primary vCPU pending entry to the pvmfw */
struct pkvm_hyp_vcpu *pvmfw_entry_vcpu;
/*
* The number of vcpus initialized and ready to run.
* Modifying this is protected by 'vm_table_lock'.
*/
unsigned int nr_vcpus;
/* Array of the hyp vCPU structures for this VM. */
struct pkvm_hyp_vcpu *vcpus[];
};
static inline struct pkvm_hyp_vm *
pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu)
{
return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm);
}
static inline bool vcpu_is_protected(struct kvm_vcpu *vcpu)
{
if (!is_protected_kvm_enabled())
return false;
return vcpu->kvm->arch.pkvm.enabled;
}
static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
{
return vcpu_is_protected(&hyp_vcpu->vcpu);
}
extern phys_addr_t pvmfw_base;
extern phys_addr_t pvmfw_size;
void pkvm_hyp_vm_table_init(void *tbl);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
unsigned long pgd_hva, unsigned long last_ran_hva);
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
unsigned long vcpu_hva);
int __pkvm_teardown_vm(pkvm_handle_t handle);
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
unsigned int vcpu_idx);
void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void);
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
void kvm_reset_pvm_sys_regs(struct kvm_vcpu *vcpu);
int kvm_check_pvm_sysreg_table(void);
void pkvm_reset_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code);
bool kvm_hyp_handle_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code);
struct pkvm_hyp_vcpu *pkvm_mpidr_to_hyp_vcpu(struct pkvm_hyp_vm *vm, u64 mpidr);
static inline bool pkvm_hyp_vm_has_pvmfw(struct pkvm_hyp_vm *vm)
{
return vm->kvm.arch.pkvm.pvmfw_load_addr != PVMFW_INVALID_LOAD_ADDR;
}
static inline bool pkvm_ipa_range_has_pvmfw(struct pkvm_hyp_vm *vm,
u64 ipa_start, u64 ipa_end)
{
struct kvm_protected_vm *pkvm = &vm->kvm.arch.pkvm;
u64 pvmfw_load_end = pkvm->pvmfw_load_addr + pvmfw_size;
if (!pkvm_hyp_vm_has_pvmfw(vm))
return false;
return ipa_end > pkvm->pvmfw_load_addr && ipa_start < pvmfw_load_end;
}
int pkvm_load_pvmfw_pages(struct pkvm_hyp_vm *vm, u64 ipa, phys_addr_t phys,
u64 size);
void pkvm_poison_pvmfw_pages(void);
#endif /* __ARM64_KVM_NVHE_PKVM_H__ */

View File

@ -0,0 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ARM64_KVM_NVHE_SERIAL_H__
#define __ARM64_KVM_NVHE_SERIAL_H__
void hyp_puts(const char *s);
void hyp_putx64(u64 x);
void hyp_putc(char c);
int __pkvm_register_serial_driver(void (*driver_cb)(char));
#endif

View File

@ -28,9 +28,17 @@ typedef union hyp_spinlock {
};
} hyp_spinlock_t;
#define __HYP_SPIN_LOCK_INITIALIZER \
{ .__val = 0 }
#define __HYP_SPIN_LOCK_UNLOCKED \
((hyp_spinlock_t) __HYP_SPIN_LOCK_INITIALIZER)
#define DEFINE_HYP_SPINLOCK(x) hyp_spinlock_t x = __HYP_SPIN_LOCK_UNLOCKED
#define hyp_spin_lock_init(l) \
do { \
*(l) = (hyp_spinlock_t){ .__val = 0 }; \
*(l) = __HYP_SPIN_LOCK_UNLOCKED; \
} while (0)
static inline void hyp_spin_lock(hyp_spinlock_t *lock)

View File

@ -15,6 +15,4 @@
#define DECLARE_REG(type, name, ctxt, reg) \
type name = (type)cpu_reg(ctxt, (reg))
void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu);
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */

View File

@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
gen-hyprel
hyp.lds
hyp-reloc.S

View File

@ -1,111 +1,23 @@
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
#
asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
# Tracepoint and MMIO logging symbols should not be visible at nVHE KVM as
# there is no way to execute them and any such MMIO access from nVHE KVM
# will explode instantly (Words of Marc Zyngier). So introduce a generic flag
# __DISABLE_TRACE_MMIO__ to disable MMIO tracing for nVHE KVM.
ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__
ccflags-y += -fno-stack-protector \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
hostprogs := gen-hyprel
HOST_EXTRACFLAGS += -I$(objtree)/include
lib-objs := clear_page.o copy_page.o memcpy.o memset.o
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o iommu.o \
serial.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
hyp-obj-$(CONFIG_MODULES) += modules.o
hyp-obj-y += $(lib-objs)
##
## Build rules for compiling nVHE hyp code
## Output of this folder is `kvm_nvhe.o`, a partially linked object
## file containing all nVHE hyp code and data.
##
hyp-obj-$(CONFIG_KVM_S2MPU) += iommu/s2mpu.o
hyp-obj-$(CONFIG_KVM_S2MPU) += iommu/io-mpt-s2mpu.o
hyp-obj := $(patsubst %.o,%.nvhe.o,$(hyp-obj-y))
obj-y := kvm_nvhe.o
targets += $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
# 1) Compile all source files to `.nvhe.o` object files. The file extension
# avoids file name clashes for files shared with VHE.
$(obj)/%.nvhe.o: $(src)/%.c FORCE
$(call if_changed_rule,cc_o_c)
$(obj)/%.nvhe.o: $(src)/%.S FORCE
$(call if_changed_rule,as_o_S)
# 2) Compile linker script.
$(obj)/hyp.lds: $(src)/hyp.lds.S FORCE
$(call if_changed_dep,cpp_lds_S)
# 3) Partially link all '.nvhe.o' files and apply the linker script.
# Prefixes names of ELF sections with '.hyp', eg. '.hyp.text'.
# Note: The following rule assumes that the 'ld' rule puts LDFLAGS before
# the list of dependencies to form '-T $(obj)/hyp.lds'. This is to
# keep the dependency on the target while avoiding an error from
# GNU ld if the linker script is passed to it twice.
LDFLAGS_kvm_nvhe.tmp.o := -r -T
$(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
$(call if_changed,ld)
# 4) Generate list of hyp code/data positions that need to be relocated at
# runtime. Because the hypervisor is part of the kernel binary, relocations
# produce a kernel VA. We enumerate relocations targeting hyp at build time
# and convert the kernel VAs at those positions to hyp VAs.
$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel FORCE
$(call if_changed,hyprel)
# 5) Compile hyp-reloc.S and link it into the existing partially linked object.
# The object file now contains a section with pointers to hyp positions that
# will contain kernel VAs at runtime. These pointers have relocations on them
# so that they get updated as the hyp object is linked into `vmlinux`.
LDFLAGS_kvm_nvhe.rel.o := -r
$(obj)/kvm_nvhe.rel.o: $(obj)/kvm_nvhe.tmp.o $(obj)/hyp-reloc.o FORCE
$(call if_changed,ld)
# 6) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
# Prefixes names of ELF symbols with '__kvm_nvhe_'.
$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.rel.o FORCE
$(call if_changed,hypcopy)
# The HYPREL command calls `gen-hyprel` to generate an assembly file with
# a list of relocations targeting hyp code/data.
quiet_cmd_hyprel = HYPREL $@
cmd_hyprel = $(obj)/gen-hyprel $< > $@
# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
# to avoid clashes with VHE code/data.
quiet_cmd_hypcopy = HYPCOPY $@
cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ $< $@
# Remove ftrace, Shadow Call Stack, and CFI CFLAGS.
# This is equivalent to the 'notrace', '__noscs', and '__nocfi' annotations.
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI), $(KBUILD_CFLAGS))
# Starting from 13.0.0 llvm emits SHT_REL section '.llvm.call-graph-profile'
# when profile optimization is applied. gen-hyprel does not support SHT_REL and
# causes a build failure. Remove profile optimization flags.
KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS))
# KVM nVHE code is run at a different exception code with a different map, so
# compiler instrumentation that inserts callbacks or checks into the code may
# cause crashes. Just disable it.
GCOV_PROFILE := n
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# Skip objtool checking for this directory because nVHE code is compiled with
# non-standard build rules.
OBJECT_FILES_NON_STANDARD := y
include $(srctree)/arch/arm64/kvm/hyp/nvhe/Makefile.nvhe
obj-y := kvm_nvhe.o

View File

@ -0,0 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
$(obj)/hyp.lds: arch/arm64/kvm/hyp/nvhe/module.lds.S FORCE
$(call if_changed_dep,cpp_lds_S)
include $(srctree)/arch/arm64/kvm/hyp/nvhe/Makefile.nvhe

View File

@ -0,0 +1,94 @@
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
#
asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
# Tracepoint and MMIO logging symbols should not be visible at nVHE KVM as
# there is no way to execute them and any such MMIO access from nVHE KVM
# will explode instantly (Words of Marc Zyngier). So introduce a generic flag
# __DISABLE_TRACE_MMIO__ to disable MMIO tracing for nVHE KVM.
ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__
ccflags-y += -fno-stack-protector \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
HYPREL := arch/arm64/tools/gen-hyprel
##
## Build rules for compiling nVHE hyp code
## Output of this folder is `kvm_nvhe.o`, a partially linked object
## file containing all nVHE hyp code and data.
##
hyp-obj := $(patsubst %.o,%.nvhe.o,$(hyp-obj-y))
targets += $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
# 1) Compile all source files to `.nvhe.o` object files. The file extension
# avoids file name clashes for files shared with VHE.
$(obj)/%.nvhe.o: $(src)/%.c FORCE
$(call if_changed_rule,cc_o_c)
$(obj)/%.nvhe.o: $(src)/%.S FORCE
$(call if_changed_rule,as_o_S)
# 2) Partially link all '.nvhe.o' files and apply the linker script.
# Prefixes names of ELF sections with '.hyp', eg. '.hyp.text'.
# Note: The following rule assumes that the 'ld' rule puts LDFLAGS before
# the list of dependencies to form '-T $(obj)/hyp.lds'. This is to
# keep the dependency on the target while avoiding an error from
# GNU ld if the linker script is passed to it twice.
LDFLAGS_kvm_nvhe.tmp.o := -r -T
$(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
$(call if_changed,ld)
# 3) Generate list of hyp code/data positions that need to be relocated at
# runtime. Because the hypervisor is part of the kernel binary, relocations
# produce a kernel VA. We enumerate relocations targeting hyp at build time
# and convert the kernel VAs at those positions to hyp VAs.
$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o FORCE
$(call if_changed,hyprel)
# 4) Compile hyp-reloc.S and link it into the existing partially linked object.
# The object file now contains a section with pointers to hyp positions that
# will contain kernel VAs at runtime. These pointers have relocations on them
# so that they get updated as the hyp object is linked into `vmlinux`.
LDFLAGS_kvm_nvhe.rel.o := -r
$(obj)/kvm_nvhe.rel.o: $(obj)/kvm_nvhe.tmp.o $(obj)/hyp-reloc.o FORCE
$(call if_changed,ld)
# 5) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
# Prefixes names of ELF symbols with '__kvm_nvhe_'.
$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.rel.o FORCE
$(call if_changed,hypcopy)
# The HYPREL command calls `gen-hyprel` to generate an assembly file with
# a list of relocations targeting hyp code/data.
quiet_cmd_hyprel = HYPREL $@
cmd_hyprel = $(HYPREL) $< > $@
# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
# to avoid clashes with VHE code/data.
quiet_cmd_hypcopy = HYPCOPY $@
cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ $< $@
# Remove ftrace, Shadow Call Stack, and CFI CFLAGS.
# This is equivalent to the 'notrace', '__noscs', and '__nocfi' annotations.
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI), $(KBUILD_CFLAGS))
# Starting from 13.0.0 llvm emits SHT_REL section '.llvm.call-graph-profile'
# when profile optimization is applied. gen-hyprel does not support SHT_REL and
# causes a build failure. Remove profile optimization flags.
KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS))
# KVM nVHE code is run at a different exception code with a different map, so
# compiler instrumentation that inserts callbacks or checks into the code may
# cause crashes. Just disable it.
GCOV_PROFILE := n
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# Skip objtool checking for this directory because nVHE code is compiled with
# non-standard build rules.
OBJECT_FILES_NON_STANDARD := y

View File

@ -12,3 +12,14 @@ SYM_FUNC_START(__pi_dcache_clean_inval_poc)
ret
SYM_FUNC_END(__pi_dcache_clean_inval_poc)
SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc)
SYM_FUNC_START(__pi_icache_inval_pou)
alternative_if ARM64_HAS_CACHE_DIC
isb
ret
alternative_else_nop_endif
invalidate_icache_by_line x0, x1, x2, x3
ret
SYM_FUNC_END(__pi_icache_inval_pou)
SYM_FUNC_ALIAS(icache_inval_pou, __pi_icache_inval_pou)

View File

@ -0,0 +1,741 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* FF-A v1.0 proxy to filter out invalid memory-sharing SMC calls issued by
* the host. FF-A is a slightly more palatable abbreviation of "Arm Firmware
* Framework for Arm A-profile", which is specified by Arm in document
* number DEN0077.
*
* Copyright (C) 2022 - Google LLC
* Author: Andrew Walbran <qwandor@google.com>
*
* This driver hooks into the SMC trapping logic for the host and intercepts
* all calls falling within the FF-A range. Each call is either:
*
* - Forwarded on unmodified to the SPMD at EL3
* - Rejected as "unsupported"
* - Accompanied by a host stage-2 page-table check/update and reissued
*
* Consequently, any attempts by the host to make guest memory pages
* accessible to the secure world using FF-A will be detected either here
* (in the case that the memory is already owned by the guest) or during
* donation to the guest (in the case that the memory was previously shared
* with the secure world).
*
* To allow the rolling-back of page-table updates and FF-A calls in the
* event of failure, operations involving the RXTX buffers are locked for
* the duration and are therefore serialised.
*/
#include <linux/arm-smccc.h>
#include <linux/arm_ffa.h>
#include <asm/kvm_pkvm.h>
#include <nvhe/ffa.h>
#include <nvhe/mem_protect.h>
#include <nvhe/memory.h>
#include <nvhe/trap_handler.h>
#include <nvhe/spinlock.h>
/*
* "ID value 0 must be returned at the Non-secure physical FF-A instance"
* We share this ID with the host.
*/
#define HOST_FFA_ID 0
/*
* A buffer to hold the maximum descriptor size we can see from the host,
* which is required when the SPMD returns a fragmented FFA_MEM_RETRIEVE_RESP
* when resolving the handle on the reclaim path.
*/
struct kvm_ffa_descriptor_buffer {
void *buf;
size_t len;
};
static struct kvm_ffa_descriptor_buffer ffa_desc_buf;
struct kvm_ffa_buffers {
hyp_spinlock_t lock;
void *tx;
void *rx;
};
/*
* Note that we don't currently lock these buffers explicitly, instead
* relying on the locking of the host FFA buffers as we only have one
* client.
*/
static struct kvm_ffa_buffers hyp_buffers;
static struct kvm_ffa_buffers host_buffers;
static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
{
*res = (struct arm_smccc_res) {
.a0 = FFA_ERROR,
.a2 = ffa_errno,
};
}
static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
{
if (ret == FFA_RET_SUCCESS) {
*res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
.a2 = prop };
} else {
ffa_to_smccc_error(res, ret);
}
}
static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
{
ffa_to_smccc_res_prop(res, ret, 0);
}
static void ffa_set_retval(struct kvm_cpu_context *ctxt,
struct arm_smccc_res *res)
{
cpu_reg(ctxt, 0) = res->a0;
cpu_reg(ctxt, 1) = res->a1;
cpu_reg(ctxt, 2) = res->a2;
cpu_reg(ctxt, 3) = res->a3;
}
static bool is_ffa_call(u64 func_id)
{
return ARM_SMCCC_IS_FAST_CALL(func_id) &&
ARM_SMCCC_OWNER_NUM(func_id) == ARM_SMCCC_OWNER_STANDARD &&
ARM_SMCCC_FUNC_NUM(func_id) >= FFA_MIN_FUNC_NUM &&
ARM_SMCCC_FUNC_NUM(func_id) <= FFA_MAX_FUNC_NUM;
}
static int spmd_map_ffa_buffers(u64 ffa_page_count)
{
struct arm_smccc_res res;
arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP,
hyp_virt_to_phys(hyp_buffers.tx),
hyp_virt_to_phys(hyp_buffers.rx),
ffa_page_count,
0, 0, 0, 0,
&res);
return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
}
static int spmd_unmap_ffa_buffers(void)
{
struct arm_smccc_res res;
arm_smccc_1_1_smc(FFA_RXTX_UNMAP,
HOST_FFA_ID,
0, 0, 0, 0, 0, 0,
&res);
return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
}
static void spmd_mem_frag_tx(struct arm_smccc_res *res, u32 handle_lo,
u32 handle_hi, u32 fraglen, u32 endpoint_id)
{
arm_smccc_1_1_smc(FFA_MEM_FRAG_TX,
handle_lo, handle_hi, fraglen, endpoint_id,
0, 0, 0,
res);
}
static void spmd_mem_frag_rx(struct arm_smccc_res *res, u32 handle_lo,
u32 handle_hi, u32 fragoff)
{
arm_smccc_1_1_smc(FFA_MEM_FRAG_RX,
handle_lo, handle_hi, fragoff, HOST_FFA_ID,
0, 0, 0,
res);
}
static void spmd_mem_xfer(struct arm_smccc_res *res, u64 func_id, u32 len,
u32 fraglen)
{
arm_smccc_1_1_smc(func_id, len, fraglen,
0, 0, 0, 0, 0,
res);
}
static void spmd_mem_reclaim(struct arm_smccc_res *res, u32 handle_lo,
u32 handle_hi, u32 flags)
{
arm_smccc_1_1_smc(FFA_MEM_RECLAIM,
handle_lo, handle_hi, flags,
0, 0, 0, 0,
res);
}
static void spmd_retrieve_req(struct arm_smccc_res *res, u32 len)
{
arm_smccc_1_1_smc(FFA_FN64_MEM_RETRIEVE_REQ,
len, len,
0, 0, 0, 0, 0,
res);
}
static void do_ffa_rxtx_map(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(phys_addr_t, tx, ctxt, 1);
DECLARE_REG(phys_addr_t, rx, ctxt, 2);
DECLARE_REG(u32, npages, ctxt, 3);
int ret = 0;
void *rx_virt, *tx_virt;
if (npages != (KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) / FFA_PAGE_SIZE) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out;
}
if (!PAGE_ALIGNED(tx) || !PAGE_ALIGNED(rx)) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out;
}
hyp_spin_lock(&host_buffers.lock);
if (host_buffers.tx) {
ret = FFA_RET_DENIED;
goto out_unlock;
}
ret = spmd_map_ffa_buffers(npages);
if (ret)
goto out_unlock;
ret = __pkvm_host_share_hyp(hyp_phys_to_pfn(tx));
if (ret) {
ret = FFA_RET_INVALID_PARAMETERS;
goto err_unmap;
}
ret = __pkvm_host_share_hyp(hyp_phys_to_pfn(rx));
if (ret) {
ret = FFA_RET_INVALID_PARAMETERS;
goto err_unshare_tx;
}
tx_virt = hyp_phys_to_virt(tx);
ret = hyp_pin_shared_mem(tx_virt, tx_virt + 1);
if (ret) {
ret = FFA_RET_INVALID_PARAMETERS;
goto err_unshare_rx;
}
rx_virt = hyp_phys_to_virt(rx);
ret = hyp_pin_shared_mem(rx_virt, rx_virt + 1);
if (ret) {
ret = FFA_RET_INVALID_PARAMETERS;
goto err_unpin_tx;
}
host_buffers.tx = tx_virt;
host_buffers.rx = rx_virt;
out_unlock:
hyp_spin_unlock(&host_buffers.lock);
out:
ffa_to_smccc_res(res, ret);
return;
err_unpin_tx:
hyp_unpin_shared_mem(tx_virt, tx_virt + 1);
err_unshare_rx:
__pkvm_host_unshare_hyp(hyp_phys_to_pfn(rx));
err_unshare_tx:
__pkvm_host_unshare_hyp(hyp_phys_to_pfn(tx));
err_unmap:
spmd_unmap_ffa_buffers();
goto out_unlock;
}
static void do_ffa_rxtx_unmap(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, id, ctxt, 1);
int ret = 0;
if (id != HOST_FFA_ID) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out;
}
hyp_spin_lock(&host_buffers.lock);
if (!host_buffers.tx) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
}
hyp_unpin_shared_mem(host_buffers.tx, host_buffers.tx + 1);
WARN_ON(__pkvm_host_unshare_hyp(hyp_virt_to_pfn(host_buffers.tx)));
host_buffers.tx = NULL;
hyp_unpin_shared_mem(host_buffers.rx, host_buffers.rx + 1);
WARN_ON(__pkvm_host_unshare_hyp(hyp_virt_to_pfn(host_buffers.rx)));
host_buffers.rx = NULL;
spmd_unmap_ffa_buffers();
out_unlock:
hyp_spin_unlock(&host_buffers.lock);
out:
ffa_to_smccc_res(res, ret);
}
static u32 __ffa_host_share_ranges(struct ffa_mem_region_addr_range *ranges,
u32 nranges)
{
u32 i;
for (i = 0; i < nranges; ++i) {
struct ffa_mem_region_addr_range *range = &ranges[i];
u64 sz = (u64)range->pg_cnt * FFA_PAGE_SIZE;
u64 pfn = hyp_phys_to_pfn(range->address);
if (!PAGE_ALIGNED(sz))
break;
if (__pkvm_host_share_ffa(pfn, sz / PAGE_SIZE))
break;
}
return i;
}
static u32 __ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges,
u32 nranges)
{
u32 i;
for (i = 0; i < nranges; ++i) {
struct ffa_mem_region_addr_range *range = &ranges[i];
u64 sz = (u64)range->pg_cnt * FFA_PAGE_SIZE;
u64 pfn = hyp_phys_to_pfn(range->address);
if (!PAGE_ALIGNED(sz))
break;
if (__pkvm_host_unshare_ffa(pfn, sz / PAGE_SIZE))
break;
}
return i;
}
static int ffa_host_share_ranges(struct ffa_mem_region_addr_range *ranges,
u32 nranges)
{
u32 nshared = __ffa_host_share_ranges(ranges, nranges);
int ret = 0;
if (nshared != nranges) {
WARN_ON(__ffa_host_unshare_ranges(ranges, nshared) != nshared);
ret = FFA_RET_DENIED;
}
return ret;
}
static int ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges,
u32 nranges)
{
u32 nunshared = __ffa_host_unshare_ranges(ranges, nranges);
int ret = 0;
if (nunshared != nranges) {
WARN_ON(__ffa_host_share_ranges(ranges, nunshared) != nunshared);
ret = FFA_RET_DENIED;
}
return ret;
}
static void do_ffa_mem_frag_tx(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, handle_lo, ctxt, 1);
DECLARE_REG(u32, handle_hi, ctxt, 2);
DECLARE_REG(u32, fraglen, ctxt, 3);
DECLARE_REG(u32, endpoint_id, ctxt, 4);
struct ffa_mem_region_addr_range *buf;
int ret = FFA_RET_INVALID_PARAMETERS;
u32 nr_ranges;
if (fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)
goto out;
if (fraglen % sizeof(*buf))
goto out;
hyp_spin_lock(&host_buffers.lock);
if (!host_buffers.tx)
goto out_unlock;
buf = hyp_buffers.tx;
memcpy(buf, host_buffers.tx, fraglen);
nr_ranges = fraglen / sizeof(*buf);
ret = ffa_host_share_ranges(buf, nr_ranges);
if (ret) {
/*
* We're effectively aborting the transaction, so we need
* to restore the global state back to what it was prior to
* transmission of the first fragment.
*/
spmd_mem_reclaim(res, handle_lo, handle_hi, 0);
WARN_ON(res->a0 != FFA_SUCCESS);
goto out_unlock;
}
spmd_mem_frag_tx(res, handle_lo, handle_hi, fraglen, endpoint_id);
if (res->a0 != FFA_SUCCESS && res->a0 != FFA_MEM_FRAG_RX)
WARN_ON(ffa_host_unshare_ranges(buf, nr_ranges));
out_unlock:
hyp_spin_unlock(&host_buffers.lock);
out:
if (ret)
ffa_to_smccc_res(res, ret);
/*
* If for any reason this did not succeed, we're in trouble as we have
* now lost the content of the previous fragments and we can't rollback
* the host stage-2 changes. The pages previously marked as shared will
* remain stuck in that state forever, hence preventing the host from
* sharing/donating them again and may possibly lead to subsequent
* failures, but this will not compromise confidentiality.
*/
return;
}
static __always_inline void do_ffa_mem_xfer(const u64 func_id,
struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, len, ctxt, 1);
DECLARE_REG(u32, fraglen, ctxt, 2);
DECLARE_REG(u64, addr_mbz, ctxt, 3);
DECLARE_REG(u32, npages_mbz, ctxt, 4);
struct ffa_composite_mem_region *reg;
struct ffa_mem_region *buf;
u32 offset, nr_ranges;
int ret = 0;
BUILD_BUG_ON(func_id != FFA_FN64_MEM_SHARE &&
func_id != FFA_FN64_MEM_LEND);
if (addr_mbz || npages_mbz || fraglen > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out;
}
if (fraglen < sizeof(struct ffa_mem_region) +
sizeof(struct ffa_mem_region_attributes)) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out;
}
hyp_spin_lock(&host_buffers.lock);
if (!host_buffers.tx) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
}
buf = hyp_buffers.tx;
memcpy(buf, host_buffers.tx, fraglen);
offset = buf->ep_mem_access[0].composite_off;
if (!offset || buf->ep_count != 1 || buf->sender_id != HOST_FFA_ID) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
}
if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
}
reg = (void *)buf + offset;
nr_ranges = ((void *)buf + fraglen) - (void *)reg->constituents;
if (nr_ranges % sizeof(reg->constituents[0])) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
}
nr_ranges /= sizeof(reg->constituents[0]);
ret = ffa_host_share_ranges(reg->constituents, nr_ranges);
if (ret)
goto out_unlock;
spmd_mem_xfer(res, func_id, len, fraglen);
if (fraglen != len) {
if (res->a0 != FFA_MEM_FRAG_RX)
goto err_unshare;
if (res->a3 != fraglen)
goto err_unshare;
} else if (res->a0 != FFA_SUCCESS) {
goto err_unshare;
}
out_unlock:
hyp_spin_unlock(&host_buffers.lock);
out:
if (ret)
ffa_to_smccc_res(res, ret);
return;
err_unshare:
WARN_ON(ffa_host_unshare_ranges(reg->constituents, nr_ranges));
goto out_unlock;
}
static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, handle_lo, ctxt, 1);
DECLARE_REG(u32, handle_hi, ctxt, 2);
DECLARE_REG(u32, flags, ctxt, 3);
struct ffa_composite_mem_region *reg;
u32 offset, len, fraglen, fragoff;
struct ffa_mem_region *buf;
int ret = 0;
u64 handle;
handle = PACK_HANDLE(handle_lo, handle_hi);
hyp_spin_lock(&host_buffers.lock);
buf = hyp_buffers.tx;
*buf = (struct ffa_mem_region) {
.sender_id = HOST_FFA_ID,
.handle = handle,
};
spmd_retrieve_req(res, sizeof(*buf));
buf = hyp_buffers.rx;
if (res->a0 != FFA_MEM_RETRIEVE_RESP)
goto out_unlock;
len = res->a1;
fraglen = res->a2;
offset = buf->ep_mem_access[0].composite_off;
/*
* We can trust the SPMD to get this right, but let's at least
* check that we end up with something that doesn't look _completely_
* bogus.
*/
if (WARN_ON(offset > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) {
ret = FFA_RET_ABORTED;
goto out_unlock;
}
if (len > ffa_desc_buf.len) {
ret = FFA_RET_NO_MEMORY;
goto out_unlock;
}
buf = ffa_desc_buf.buf;
memcpy(buf, hyp_buffers.rx, fraglen);
for (fragoff = fraglen; fragoff < len; fragoff += fraglen) {
spmd_mem_frag_rx(res, handle_lo, handle_hi, fragoff);
if (res->a0 != FFA_MEM_FRAG_TX) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
}
fraglen = res->a3;
memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen);
}
spmd_mem_reclaim(res, handle_lo, handle_hi, flags);
if (res->a0 != FFA_SUCCESS)
goto out_unlock;
reg = (void *)buf + offset;
/* If the SPMD was happy, then we should be too. */
WARN_ON(ffa_host_unshare_ranges(reg->constituents,
reg->addr_range_cnt));
out_unlock:
hyp_spin_unlock(&host_buffers.lock);
if (ret)
ffa_to_smccc_res(res, ret);
}
static bool ffa_call_unsupported(u64 func_id)
{
switch (func_id) {
/* Unsupported memory management calls */
case FFA_FN64_MEM_RETRIEVE_REQ:
case FFA_MEM_RETRIEVE_RESP:
case FFA_MEM_RELINQUISH:
case FFA_MEM_OP_PAUSE:
case FFA_MEM_OP_RESUME:
case FFA_MEM_FRAG_RX:
case FFA_FN64_MEM_DONATE:
/* Indirect message passing via RX/TX buffers */
case FFA_MSG_SEND:
case FFA_MSG_POLL:
case FFA_MSG_WAIT:
/* 32-bit variants of 64-bit calls */
case FFA_MSG_SEND_DIRECT_REQ:
case FFA_MSG_SEND_DIRECT_RESP:
case FFA_RXTX_MAP:
case FFA_MEM_DONATE:
case FFA_MEM_RETRIEVE_REQ:
return true;
}
return false;
}
static bool do_ffa_features(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, id, ctxt, 1);
u64 prop = 0;
int ret = 0;
if (ffa_call_unsupported(id)) {
ret = FFA_RET_NOT_SUPPORTED;
goto out_handled;
}
switch (id) {
case FFA_MEM_SHARE:
case FFA_FN64_MEM_SHARE:
case FFA_MEM_LEND:
case FFA_FN64_MEM_LEND:
ret = FFA_RET_SUCCESS;
prop = 0; /* No support for dynamic buffers */
goto out_handled;
default:
return false;
}
out_handled:
ffa_to_smccc_res_prop(res, ret, prop);
return true;
}
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, func_id, host_ctxt, 0);
struct arm_smccc_res res;
if (!is_ffa_call(func_id))
return false;
switch (func_id) {
case FFA_FEATURES:
if (!do_ffa_features(&res, host_ctxt))
return false;
goto out_handled;
/* Memory management */
case FFA_FN64_RXTX_MAP:
do_ffa_rxtx_map(&res, host_ctxt);
goto out_handled;
case FFA_RXTX_UNMAP:
do_ffa_rxtx_unmap(&res, host_ctxt);
goto out_handled;
case FFA_MEM_SHARE:
case FFA_FN64_MEM_SHARE:
do_ffa_mem_xfer(FFA_FN64_MEM_SHARE, &res, host_ctxt);
goto out_handled;
case FFA_MEM_RECLAIM:
do_ffa_mem_reclaim(&res, host_ctxt);
goto out_handled;
case FFA_MEM_LEND:
case FFA_FN64_MEM_LEND:
do_ffa_mem_xfer(FFA_FN64_MEM_LEND, &res, host_ctxt);
goto out_handled;
case FFA_MEM_FRAG_TX:
do_ffa_mem_frag_tx(&res, host_ctxt);
goto out_handled;
}
if (!ffa_call_unsupported(func_id))
return false; /* Pass through */
ffa_to_smccc_error(&res, FFA_RET_NOT_SUPPORTED);
out_handled:
ffa_set_retval(host_ctxt, &res);
return true;
}
int hyp_ffa_init(void *pages)
{
struct arm_smccc_res res;
size_t min_rxtx_sz;
void *tx, *rx;
if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_1)
return 0;
arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_0, 0, 0, 0, 0, 0, 0, &res);
if (res.a0 == FFA_RET_NOT_SUPPORTED)
return 0;
if (res.a0 != FFA_VERSION_1_0)
return -EOPNOTSUPP;
arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
if (res.a0 != FFA_SUCCESS)
return -EOPNOTSUPP;
if (res.a2 != HOST_FFA_ID)
return -EINVAL;
arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP,
0, 0, 0, 0, 0, 0, &res);
if (res.a0 != FFA_SUCCESS)
return -EOPNOTSUPP;
switch (res.a2) {
case FFA_FEAT_RXTX_MIN_SZ_4K:
min_rxtx_sz = SZ_4K;
break;
case FFA_FEAT_RXTX_MIN_SZ_16K:
min_rxtx_sz = SZ_16K;
break;
case FFA_FEAT_RXTX_MIN_SZ_64K:
min_rxtx_sz = SZ_64K;
break;
default:
return -EINVAL;
}
if (min_rxtx_sz > PAGE_SIZE)
return -EOPNOTSUPP;
tx = pages;
pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE;
rx = pages;
pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE;
ffa_desc_buf = (struct kvm_ffa_descriptor_buffer) {
.buf = pages,
.len = PAGE_SIZE *
(hyp_ffa_proxy_pages() - (2 * KVM_FFA_MBOX_NR_PAGES)),
};
hyp_buffers = (struct kvm_ffa_buffers) {
.lock = __HYP_SPIN_LOCK_UNLOCKED,
.tx = tx,
.rx = rx,
};
host_buffers = (struct kvm_ffa_buffers) {
.lock = __HYP_SPIN_LOCK_UNLOCKED,
};
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,8 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
DEFINE_PER_CPU(int, hyp_cpu_number);
/*
* nVHE copy of data structures tracking available CPU cores.
* Only entries for CPUs that were online at KVM init are populated.
@ -23,6 +25,8 @@ u64 cpu_logical_map(unsigned int cpu)
return hyp_cpu_logical_map[cpu];
}
unsigned long __ro_after_init kvm_arm_hyp_percpu_base[NR_CPUS];
unsigned long __hyp_per_cpu_offset(unsigned int cpu)
{
unsigned long *cpu_base_array;

View File

@ -25,5 +25,7 @@ SECTIONS {
BEGIN_HYP_SECTION(.data..percpu)
PERCPU_INPUT(L1_CACHE_BYTES)
END_HYP_SECTION
HYP_SECTION(.bss)
HYP_SECTION(.data)
}

View File

@ -0,0 +1,570 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2022 Google LLC
* Author: David Brazdil <dbrazdil@google.com>
*/
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pkvm.h>
#include <hyp/adjust_pc.h>
#include <nvhe/iommu.h>
#include <nvhe/mm.h>
#define DRV_ID(drv_addr) ((unsigned long)drv_addr)
enum {
IOMMU_DRIVER_NOT_READY = 0,
IOMMU_DRIVER_INITIALIZING,
IOMMU_DRIVER_READY,
};
/* List of registered IOMMU drivers, protected with iommu_drv_lock. */
static LIST_HEAD(iommu_drivers);
/* IOMMU device list. Must only be accessed with host_mmu.lock held. */
static LIST_HEAD(iommu_list);
static bool iommu_finalized;
static DEFINE_HYP_SPINLOCK(iommu_registration_lock);
static DEFINE_HYP_SPINLOCK(iommu_drv_lock);
static void *iommu_mem_pool;
static size_t iommu_mem_remaining;
static void assert_host_component_locked(void)
{
hyp_assert_lock_held(&host_mmu.lock);
}
static void host_lock_component(void)
{
hyp_spin_lock(&host_mmu.lock);
}
static void host_unlock_component(void)
{
hyp_spin_unlock(&host_mmu.lock);
}
/*
* Find IOMMU driver by its ID. The input ID is treated as unstrusted
* and is properly validated.
*/
static inline struct pkvm_iommu_driver *get_driver(unsigned long id)
{
struct pkvm_iommu_driver *drv, *ret = NULL;
hyp_spin_lock(&iommu_drv_lock);
list_for_each_entry(drv, &iommu_drivers, list) {
if (DRV_ID(drv) == id) {
ret = drv;
break;
}
}
hyp_spin_unlock(&iommu_drv_lock);
return ret;
}
static inline bool driver_acquire_init(struct pkvm_iommu_driver *drv)
{
return atomic_cmpxchg_acquire(&drv->state, IOMMU_DRIVER_NOT_READY,
IOMMU_DRIVER_INITIALIZING)
== IOMMU_DRIVER_NOT_READY;
}
static inline void driver_release_init(struct pkvm_iommu_driver *drv,
bool success)
{
atomic_set_release(&drv->state, success ? IOMMU_DRIVER_READY
: IOMMU_DRIVER_NOT_READY);
}
static inline bool is_driver_ready(struct pkvm_iommu_driver *drv)
{
return atomic_read(&drv->state) == IOMMU_DRIVER_READY;
}
static size_t __iommu_alloc_size(struct pkvm_iommu_driver *drv)
{
return ALIGN(sizeof(struct pkvm_iommu) + drv->ops->data_size,
sizeof(unsigned long));
}
static bool validate_driver_id_unique(struct pkvm_iommu_driver *drv)
{
struct pkvm_iommu_driver *cur;
hyp_assert_lock_held(&iommu_drv_lock);
list_for_each_entry(cur, &iommu_drivers, list) {
if (DRV_ID(drv) == DRV_ID(cur))
return false;
}
return true;
}
static int __pkvm_register_iommu_driver(struct pkvm_iommu_driver *drv)
{
int ret = 0;
if (!drv)
return -EINVAL;
hyp_assert_lock_held(&iommu_registration_lock);
hyp_spin_lock(&iommu_drv_lock);
if (validate_driver_id_unique(drv))
list_add_tail(&drv->list, &iommu_drivers);
else
ret = -EEXIST;
hyp_spin_unlock(&iommu_drv_lock);
return ret;
}
/* Global memory pool for allocating IOMMU list entry structs. */
static inline struct pkvm_iommu *alloc_iommu(struct pkvm_iommu_driver *drv,
void *mem, size_t mem_size)
{
size_t size = __iommu_alloc_size(drv);
void *ptr;
assert_host_component_locked();
/*
* If new memory is being provided, replace the existing pool with it.
* Any remaining memory in the pool is discarded.
*/
if (mem && mem_size) {
iommu_mem_pool = mem;
iommu_mem_remaining = mem_size;
}
if (size > iommu_mem_remaining)
return NULL;
ptr = iommu_mem_pool;
iommu_mem_pool += size;
iommu_mem_remaining -= size;
return ptr;
}
static inline void free_iommu(struct pkvm_iommu_driver *drv, struct pkvm_iommu *ptr)
{
size_t size = __iommu_alloc_size(drv);
assert_host_component_locked();
if (!ptr)
return;
/* Only allow freeing the last allocated buffer. */
if ((void *)ptr + size != iommu_mem_pool)
return;
iommu_mem_pool -= size;
iommu_mem_remaining += size;
}
static bool is_overlap(phys_addr_t r1_start, size_t r1_size,
phys_addr_t r2_start, size_t r2_size)
{
phys_addr_t r1_end = r1_start + r1_size;
phys_addr_t r2_end = r2_start + r2_size;
return (r1_start < r2_end) && (r2_start < r1_end);
}
static bool is_mmio_range(phys_addr_t base, size_t size)
{
struct memblock_region *reg;
phys_addr_t limit = BIT(host_mmu.pgt.ia_bits);
size_t i;
/* Check against limits of host IPA space. */
if ((base >= limit) || !size || (size > limit - base))
return false;
for (i = 0; i < hyp_memblock_nr; i++) {
reg = &hyp_memory[i];
if (is_overlap(base, size, reg->base, reg->size))
return false;
}
return true;
}
static int __snapshot_host_stage2(u64 start, u64 pa_max, u32 level,
kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flags,
void * const arg)
{
struct pkvm_iommu_driver * const drv = arg;
u64 end = start + kvm_granule_size(level);
kvm_pte_t pte = *ptep;
/*
* Valid stage-2 entries are created lazily, invalid ones eagerly.
* Note: In the future we may need to check if [start,end) is MMIO.
* Note: Drivers initialize their PTs to all memory owned by the host,
* so we only call the driver on regions where that is not the case.
*/
if (pte && !kvm_pte_valid(pte))
drv->ops->host_stage2_idmap_prepare(start, end, /*prot*/ 0);
return 0;
}
static int snapshot_host_stage2(struct pkvm_iommu_driver * const drv)
{
struct kvm_pgtable_walker walker = {
.cb = __snapshot_host_stage2,
.arg = drv,
.flags = KVM_PGTABLE_WALK_LEAF,
};
struct kvm_pgtable *pgt = &host_mmu.pgt;
if (!drv->ops->host_stage2_idmap_prepare)
return 0;
return kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker);
}
static bool validate_against_existing_iommus(struct pkvm_iommu *dev)
{
struct pkvm_iommu *other;
assert_host_component_locked();
list_for_each_entry(other, &iommu_list, list) {
/* Device ID must be unique. */
if (dev->id == other->id)
return false;
/* MMIO regions must not overlap. */
if (is_overlap(dev->pa, dev->size, other->pa, other->size))
return false;
}
return true;
}
static struct pkvm_iommu *find_iommu_by_id(unsigned long id)
{
struct pkvm_iommu *dev;
assert_host_component_locked();
list_for_each_entry(dev, &iommu_list, list) {
if (dev->id == id)
return dev;
}
return NULL;
}
/*
* Initialize EL2 IOMMU driver.
*
* This is a common hypercall for driver initialization. Driver-specific
* arguments are passed in a shared memory buffer. The driver is expected to
* initialize it's page-table bookkeeping.
*/
int __pkvm_iommu_driver_init(struct pkvm_iommu_driver *drv, void *data, size_t size)
{
const struct pkvm_iommu_ops *ops;
int ret = 0;
/* New driver initialization not allowed after __pkvm_iommu_finalize(). */
hyp_spin_lock(&iommu_registration_lock);
if (iommu_finalized) {
ret = -EPERM;
goto out_unlock;
}
ret = __pkvm_register_iommu_driver(drv);
if (ret)
return ret;
if (!drv->ops) {
ret = -EINVAL;
goto out_unlock;
}
if (!driver_acquire_init(drv)) {
ret = -EBUSY;
goto out_unlock;
}
ops = drv->ops;
/* This can change stage-2 mappings. */
if (ops->init) {
ret = hyp_pin_shared_mem(data, data + size);
if (!ret) {
ret = ops->init(data, size);
hyp_unpin_shared_mem(data, data + size);
}
if (ret)
goto out_release;
}
/*
* Walk host stage-2 and pass current mappings to the driver. Start
* accepting host stage-2 updates as soon as the host lock is released.
*/
host_lock_component();
ret = snapshot_host_stage2(drv);
if (!ret)
driver_release_init(drv, /*success=*/true);
host_unlock_component();
out_release:
if (ret)
driver_release_init(drv, /*success=*/false);
out_unlock:
hyp_spin_unlock(&iommu_registration_lock);
return ret;
}
int __pkvm_iommu_register(unsigned long dev_id, unsigned long drv_id,
phys_addr_t dev_pa, size_t dev_size,
unsigned long parent_id,
void *kern_mem_va, size_t mem_size)
{
struct pkvm_iommu *dev = NULL;
struct pkvm_iommu_driver *drv;
void *mem_va = NULL;
int ret = 0;
/* New device registration not allowed after __pkvm_iommu_finalize(). */
hyp_spin_lock(&iommu_registration_lock);
if (iommu_finalized) {
ret = -EPERM;
goto out_unlock;
}
drv = get_driver(drv_id);
if (!drv || !is_driver_ready(drv)) {
ret = -ENOENT;
goto out_unlock;
}
if (!PAGE_ALIGNED(dev_pa) || !PAGE_ALIGNED(dev_size)) {
ret = -EINVAL;
goto out_unlock;
}
if (!is_mmio_range(dev_pa, dev_size)) {
ret = -EINVAL;
goto out_unlock;
}
/*
* Accept memory donation if the host is providing new memory.
* Note: We do not return the memory even if there is an error later.
*/
if (kern_mem_va && mem_size) {
mem_va = kern_hyp_va(kern_mem_va);
if (!PAGE_ALIGNED(mem_va) || !PAGE_ALIGNED(mem_size)) {
ret = -EINVAL;
goto out_unlock;
}
ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(mem_va),
mem_size >> PAGE_SHIFT);
if (ret)
goto out_unlock;
}
host_lock_component();
/* Allocate memory for the new device entry. */
dev = alloc_iommu(drv, mem_va, mem_size);
if (!dev) {
ret = -ENOMEM;
goto out_free;
}
/* Populate the new device entry. */
*dev = (struct pkvm_iommu){
.children = LIST_HEAD_INIT(dev->children),
.id = dev_id,
.ops = drv->ops,
.pa = dev_pa,
.size = dev_size,
};
if (!validate_against_existing_iommus(dev)) {
ret = -EBUSY;
goto out_free;
}
if (parent_id) {
dev->parent = find_iommu_by_id(parent_id);
if (!dev->parent) {
ret = -EINVAL;
goto out_free;
}
if (dev->parent->ops->validate_child) {
ret = dev->parent->ops->validate_child(dev->parent, dev);
if (ret)
goto out_free;
}
}
if (dev->ops->validate) {
ret = dev->ops->validate(dev);
if (ret)
goto out_free;
}
/*
* Unmap the device's MMIO range from host stage-2. If registration
* is successful, future attempts to re-map will be blocked by
* pkvm_iommu_host_stage2_adjust_range.
*/
ret = host_stage2_unmap_reg_locked(dev_pa, dev_size);
if (ret)
goto out_free;
/* Create EL2 mapping for the device. */
ret = __pkvm_create_private_mapping(dev_pa, dev_size,
PAGE_HYP_DEVICE, (unsigned long *)(&dev->va));
if (ret){
goto out_free;
}
/* Register device and prevent host from mapping the MMIO range. */
list_add_tail(&dev->list, &iommu_list);
if (dev->parent)
list_add_tail(&dev->siblings, &dev->parent->children);
out_free:
if (ret)
free_iommu(drv, dev);
host_unlock_component();
out_unlock:
hyp_spin_unlock(&iommu_registration_lock);
return ret;
}
int __pkvm_iommu_finalize(void)
{
int ret = 0;
hyp_spin_lock(&iommu_registration_lock);
if (!iommu_finalized)
iommu_finalized = true;
else
ret = -EPERM;
hyp_spin_unlock(&iommu_registration_lock);
return ret;
}
int __pkvm_iommu_pm_notify(unsigned long dev_id, enum pkvm_iommu_pm_event event)
{
struct pkvm_iommu *dev;
int ret;
host_lock_component();
dev = find_iommu_by_id(dev_id);
if (dev) {
if (event == PKVM_IOMMU_PM_SUSPEND) {
ret = dev->ops->suspend ? dev->ops->suspend(dev) : 0;
if (!ret)
dev->powered = false;
} else if (event == PKVM_IOMMU_PM_RESUME) {
ret = dev->ops->resume ? dev->ops->resume(dev) : 0;
if (!ret)
dev->powered = true;
} else {
ret = -EINVAL;
}
} else {
ret = -ENODEV;
}
host_unlock_component();
return ret;
}
/*
* Check host memory access against IOMMUs' MMIO regions.
* Returns -EPERM if the address is within the bounds of a registered device.
* Otherwise returns zero and adjusts boundaries of the new mapping to avoid
* MMIO regions of registered IOMMUs.
*/
int pkvm_iommu_host_stage2_adjust_range(phys_addr_t addr, phys_addr_t *start,
phys_addr_t *end)
{
struct pkvm_iommu *dev;
phys_addr_t new_start = *start;
phys_addr_t new_end = *end;
phys_addr_t dev_start, dev_end;
assert_host_component_locked();
list_for_each_entry(dev, &iommu_list, list) {
dev_start = dev->pa;
dev_end = dev_start + dev->size;
if (addr < dev_start)
new_end = min(new_end, dev_start);
else if (addr >= dev_end)
new_start = max(new_start, dev_end);
else
return -EPERM;
}
*start = new_start;
*end = new_end;
return 0;
}
bool pkvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u32 esr,
phys_addr_t pa)
{
struct pkvm_iommu *dev;
assert_host_component_locked();
list_for_each_entry(dev, &iommu_list, list) {
if (pa < dev->pa || pa >= dev->pa + dev->size)
continue;
/* No 'powered' check - the host assumes it is powered. */
if (!dev->ops->host_dabt_handler ||
!dev->ops->host_dabt_handler(dev, host_ctxt, esr, pa - dev->pa))
return false;
kvm_skip_host_instr();
return true;
}
return false;
}
void pkvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
enum kvm_pgtable_prot prot)
{
struct pkvm_iommu_driver *drv;
struct pkvm_iommu *dev;
assert_host_component_locked();
hyp_spin_lock(&iommu_drv_lock);
list_for_each_entry(drv, &iommu_drivers, list) {
if (drv && is_driver_ready(drv) && drv->ops->host_stage2_idmap_prepare)
drv->ops->host_stage2_idmap_prepare(start, end, prot);
}
hyp_spin_unlock(&iommu_drv_lock);
list_for_each_entry(dev, &iommu_list, list) {
if (dev->powered && dev->ops->host_stage2_idmap_apply)
dev->ops->host_stage2_idmap_apply(dev, start, end);
}
list_for_each_entry(dev, &iommu_list, list) {
if (dev->powered && dev->ops->host_stage2_idmap_complete)
dev->ops->host_stage2_idmap_complete(dev);
}
}

View File

@ -0,0 +1,321 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2022 - Google LLC
*/
#include <asm/io-mpt-s2mpu.h>
#define GRAN_BYTE(gran) ((gran << V9_MPT_PROT_BITS) | (gran))
#define GRAN_HWORD(gran) ((GRAN_BYTE(gran) << 8) | (GRAN_BYTE(gran)))
#define GRAN_WORD(gran) (((u32)(GRAN_HWORD(gran) << 16) | (GRAN_HWORD(gran))))
#define GRAN_DWORD(gran) ((u64)((u64)GRAN_WORD(gran) << 32) | (u64)(GRAN_WORD(gran)))
#define SMPT_NUM_TO_BYTE(x) ((x) / SMPT_GRAN / SMPT_ELEMS_PER_BYTE(config_prot_bits))
#define BYTE_TO_SMPT_INDEX(x) ((x) / SMPT_WORD_BYTE_RANGE(config_prot_bits))
/*
* MPT table ops can be configured only for one version at runtime,
* these variables will hold version specific data set a run time init, to avoid
* having duplicate code or unnessery check during operations.
*/
static u32 config_prot_bits;
static u32 config_access_shift;
static const u64 *config_lut_prot;
static u32 config_gran_mask;
static u32 this_version;
/*
* page table entries for different protection look up table
* granularity is compile time config, so we can do this also for
* this array without having duplicate arrays
*/
static const u64 v9_mpt_prot_doubleword[] = {
[MPT_PROT_NONE] = 0x0000000000000000 | GRAN_DWORD(SMPT_GRAN_ATTR),
[MPT_PROT_R] = 0x4444444444444444 | GRAN_DWORD(SMPT_GRAN_ATTR),
[MPT_PROT_W] = 0x8888888888888888 | GRAN_DWORD(SMPT_GRAN_ATTR),
[MPT_PROT_RW] = 0xcccccccccccccccc | GRAN_DWORD(SMPT_GRAN_ATTR),
};
static const u64 mpt_prot_doubleword[] = {
[MPT_PROT_NONE] = 0x0000000000000000,
[MPT_PROT_R] = 0x5555555555555555,
[MPT_PROT_W] = 0xaaaaaaaaaaaaaaaa,
[MPT_PROT_RW] = 0xffffffffffffffff,
};
static inline int pte_from_addr_smpt(u32 *smpt, u64 addr)
{
u32 word_idx, idx, pte, val;
word_idx = BYTE_TO_SMPT_INDEX(addr);
val = READ_ONCE(smpt[word_idx]);
idx = (addr / SMPT_GRAN) % SMPT_ELEMS_PER_WORD(config_prot_bits);
pte = (val >> (idx * config_prot_bits)) & ((1 << config_prot_bits)-1);
return pte;
}
static inline int prot_from_addr_smpt(u32 *smpt, u64 addr)
{
int pte = pte_from_addr_smpt(smpt, addr);
return (pte >> config_access_shift);
}
/* Set protection bits of SMPT in a given range without using memset. */
static void __set_smpt_range_slow(u32 *smpt, size_t start_gb_byte,
size_t end_gb_byte, enum mpt_prot prot)
{
size_t i, start_word_byte, end_word_byte, word_idx, first_elem, last_elem;
u32 val;
/* Iterate over u32 words. */
start_word_byte = start_gb_byte;
while (start_word_byte < end_gb_byte) {
/* Determine the range of bytes covered by this word. */
word_idx = BYTE_TO_SMPT_INDEX(start_word_byte);
end_word_byte = min(
ALIGN(start_word_byte + 1, SMPT_WORD_BYTE_RANGE(config_prot_bits)),
end_gb_byte);
/* Identify protection bit offsets within the word. */
first_elem = (start_word_byte / SMPT_GRAN) % SMPT_ELEMS_PER_WORD(config_prot_bits);
last_elem =
((end_word_byte - 1) / SMPT_GRAN) % SMPT_ELEMS_PER_WORD(config_prot_bits);
/* Modify the corresponding word. */
val = READ_ONCE(smpt[word_idx]);
for (i = first_elem; i <= last_elem; i++) {
val &= ~(MPT_PROT_MASK << (i * config_prot_bits + config_access_shift));
val |= prot << (i * config_prot_bits + config_access_shift);
}
WRITE_ONCE(smpt[word_idx], val);
start_word_byte = end_word_byte;
}
}
/* Set protection bits of SMPT in a given range. */
static void __set_smpt_range(u32 *smpt, size_t start_gb_byte,
size_t end_gb_byte, enum mpt_prot prot)
{
size_t interlude_start, interlude_end, interlude_bytes, word_idx;
char prot_byte = (char)config_lut_prot[prot];
if (start_gb_byte >= end_gb_byte)
return;
/* Check if range spans at least one full u32 word. */
interlude_start = ALIGN(start_gb_byte, SMPT_WORD_BYTE_RANGE(config_prot_bits));
interlude_end = ALIGN_DOWN(end_gb_byte, SMPT_WORD_BYTE_RANGE(config_prot_bits));
/*
* If not, fall back to editing bits in the given range.
* sets bit for PTEs that are in less than 32 bits (can't be done by memset)
*/
if (interlude_start >= interlude_end) {
__set_smpt_range_slow(smpt, start_gb_byte, end_gb_byte, prot);
return;
}
/* Use bit-editing for prologue/epilogue, memset for interlude. */
word_idx = BYTE_TO_SMPT_INDEX(interlude_start);
interlude_bytes = SMPT_NUM_TO_BYTE(interlude_end - interlude_start);
/*
* These are pages in the start and at then end that are
* not part of full 32 bit SMPT word.
*/
__set_smpt_range_slow(smpt, start_gb_byte, interlude_start, prot);
memset(&smpt[word_idx], prot_byte, interlude_bytes);
__set_smpt_range_slow(smpt, interlude_end, end_gb_byte, prot);
}
/* Returns true if all SMPT protection bits match 'prot'. */
static bool __is_smpt_uniform(u32 *smpt, enum mpt_prot prot)
{
size_t i;
u64 *doublewords = (u64 *)smpt;
for (i = 0; i < SMPT_NUM_WORDS(config_prot_bits) / 2; i++) {
if (doublewords[i] != config_lut_prot[prot])
return false;
}
return true;
}
/*
* Set protection bits of FMPT/SMPT in a given range.
* Returns flags specifying whether L1/L2 changes need to be made visible
* to the device.
*/
static void __set_fmpt_range(struct fmpt *fmpt, size_t start_gb_byte,
size_t end_gb_byte, enum mpt_prot prot)
{
if (start_gb_byte == 0 && end_gb_byte >= SZ_1G) {
/* Update covers the entire GB region. */
if (fmpt->gran_1g && fmpt->prot == prot) {
fmpt->flags = 0;
return;
}
fmpt->gran_1g = true;
fmpt->prot = prot;
fmpt->flags = MPT_UPDATE_L1;
return;
}
if (fmpt->gran_1g) {
/* GB region currently uses 1G mapping. */
if (fmpt->prot == prot) {
fmpt->flags = 0;
return;
}
/*
* Range has different mapping than the rest of the GB.
* Convert to PAGE_SIZE mapping.
*/
fmpt->gran_1g = false;
__set_smpt_range(fmpt->smpt, 0, start_gb_byte, fmpt->prot);
__set_smpt_range(fmpt->smpt, start_gb_byte, end_gb_byte, prot);
__set_smpt_range(fmpt->smpt, end_gb_byte, SZ_1G, fmpt->prot);
fmpt->flags = MPT_UPDATE_L1 | MPT_UPDATE_L2;
return;
}
/* GB region currently uses PAGE_SIZE mapping. */
__set_smpt_range(fmpt->smpt, start_gb_byte, end_gb_byte, prot);
/* Check if the entire GB region has the same prot bits. */
if (!__is_smpt_uniform(fmpt->smpt, prot)) {
fmpt->flags = MPT_UPDATE_L2;
return;
}
fmpt->gran_1g = true;
fmpt->prot = prot;
fmpt->flags = MPT_UPDATE_L1;
}
static u32 smpt_size(void)
{
return SMPT_SIZE(config_prot_bits);
}
static void __set_l1entry_attr_with_prot(void *dev_va, unsigned int gb,
unsigned int vid, enum mpt_prot prot)
{
writel_relaxed(L1ENTRY_ATTR_1G(prot),
dev_va + REG_NS_L1ENTRY_ATTR(vid, gb));
}
static void __set_l1entry_attr_with_fmpt(void *dev_va, unsigned int gb,
unsigned int vid, struct fmpt *fmpt)
{
if (fmpt->gran_1g) {
__set_l1entry_attr_with_prot(dev_va, gb, vid, fmpt->prot);
} else {
/* Order against writes to the SMPT. */
writel(config_gran_mask | L1ENTRY_ATTR_L2TABLE_EN,
dev_va + REG_NS_L1ENTRY_ATTR(vid, gb));
}
}
static void __set_l1entry_l2table_addr(void *dev_va, unsigned int gb,
unsigned int vid, phys_addr_t addr)
{
/* Order against writes to the SMPT. */
writel(L1ENTRY_L2TABLE_ADDR(addr),
dev_va + REG_NS_L1ENTRY_L2TABLE_ADDR(vid, gb));
}
static void init_with_prot(void *dev_va, enum mpt_prot prot)
{
unsigned int gb, vid;
for_each_gb_and_vid(gb, vid)
__set_l1entry_attr_with_prot(dev_va, gb, vid, prot);
}
static void init_with_mpt(void *dev_va, struct mpt *mpt)
{
unsigned int gb, vid;
struct fmpt *fmpt;
for_each_gb_and_vid(gb, vid) {
fmpt = &mpt->fmpt[gb];
__set_l1entry_l2table_addr(dev_va, gb, vid, __hyp_pa(fmpt->smpt));
__set_l1entry_attr_with_fmpt(dev_va, gb, vid, fmpt);
}
}
static void apply_range(void *dev_va, struct mpt *mpt, u32 first_gb, u32 last_gb)
{
unsigned int gb, vid;
struct fmpt *fmpt;
for_each_gb_in_range(gb, first_gb, last_gb) {
fmpt = &mpt->fmpt[gb];
if (fmpt->flags & MPT_UPDATE_L1) {
for_each_vid(vid)
__set_l1entry_attr_with_fmpt(dev_va, gb, vid, fmpt);
}
}
}
static void prepare_range(struct mpt *mpt, phys_addr_t first_byte,
phys_addr_t last_byte, enum mpt_prot prot)
{
unsigned int first_gb = first_byte / SZ_1G;
unsigned int last_gb = last_byte / SZ_1G;
size_t start_gb_byte, end_gb_byte;
unsigned int gb;
struct fmpt *fmpt;
for_each_gb_in_range(gb, first_gb, last_gb) {
fmpt = &mpt->fmpt[gb];
start_gb_byte = (gb == first_gb) ? first_byte % SZ_1G : 0;
end_gb_byte = (gb == last_gb) ? (last_byte % SZ_1G) + 1 : SZ_1G;
__set_fmpt_range(fmpt, start_gb_byte, end_gb_byte, prot);
if (fmpt->flags & MPT_UPDATE_L2)
kvm_flush_dcache_to_poc(fmpt->smpt, smpt_size());
}
}
static const struct s2mpu_mpt_ops this_ops = {
.smpt_size = smpt_size,
.init_with_prot = init_with_prot,
.init_with_mpt = init_with_mpt,
.apply_range = apply_range,
.prepare_range = prepare_range,
.pte_from_addr_smpt = pte_from_addr_smpt,
};
const struct s2mpu_mpt_ops *s2mpu_get_mpt_ops(struct s2mpu_mpt_cfg cfg)
{
/* If called before with different version return NULL. */
if (WARN_ON(this_version && (this_version != cfg.version)))
return NULL;
/* 2MB granularity not supported in V9 */
if ((cfg.version == S2MPU_VERSION_9) && (SMPT_GRAN_ATTR != L1ENTRY_ATTR_GRAN_2M)) {
config_prot_bits = V9_MPT_PROT_BITS;
config_access_shift = V9_MPT_ACCESS_SHIFT;
config_lut_prot = v9_mpt_prot_doubleword;
config_gran_mask = L1ENTRY_ATTR_GRAN(SMPT_GRAN_ATTR, V9_L1ENTRY_ATTR_GRAN_MASK);
this_version = cfg.version;
return &this_ops;
} else if ((cfg.version == S2MPU_VERSION_2) || (cfg.version == S2MPU_VERSION_1)) {
config_prot_bits = MPT_PROT_BITS;
config_access_shift = MPT_ACCESS_SHIFT;
config_lut_prot = mpt_prot_doubleword;
config_gran_mask = L1ENTRY_ATTR_GRAN(SMPT_GRAN_ATTR, L1ENTRY_ATTR_GRAN_MASK);
this_version = cfg.version;
return &this_ops;
}
return NULL;
}

View File

@ -0,0 +1,703 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2021 - Google LLC
* Author: David Brazdil <dbrazdil@google.com>
*/
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_s2mpu.h>
#include <linux/arm-smccc.h>
#include <nvhe/iommu.h>
#include <nvhe/memory.h>
#include <nvhe/mm.h>
#include <nvhe/spinlock.h>
#include <nvhe/trap_handler.h>
#include <asm/io-mpt-s2mpu.h>
#define SMC_CMD_PREPARE_PD_ONOFF 0x82000410
#define SMC_MODE_POWER_UP 1
#define PA_MAX ((phys_addr_t)SZ_1G * NR_GIGABYTES)
#define SYNC_MAX_RETRIES 5
#define SYNC_TIMEOUT 5
#define SYNC_TIMEOUT_MULTIPLIER 3
#define CTX_CFG_ENTRY(ctxid, nr_ctx, vid) \
(CONTEXT_CFG_VALID_VID_CTX_VID(ctxid, vid) \
| (((ctxid) < (nr_ctx)) ? CONTEXT_CFG_VALID_VID_CTX_VALID(ctxid) : 0))
#define for_each_child(child, dev) \
list_for_each_entry((child), &(dev)->children, siblings)
/* HW version-specific operations. */
struct s2mpu_reg_ops {
int (*init)(struct pkvm_iommu *dev);
void (*set_control_regs)(struct pkvm_iommu *dev);
u32 (*host_mmio_reg_access_mask)(size_t off, bool is_write);
};
struct s2mpu_drv_data {
u32 version;
u32 context_cfg_valid_vid;
};
static const struct s2mpu_mpt_ops *mpt_ops;
static const struct s2mpu_reg_ops *reg_ops;
static struct mpt host_mpt;
const struct pkvm_iommu_ops pkvm_s2mpu_ops;
const struct pkvm_iommu_ops pkvm_sysmmu_sync_ops;
static inline enum mpt_prot prot_to_mpt(enum kvm_pgtable_prot prot)
{
return ((prot & KVM_PGTABLE_PROT_R) ? MPT_PROT_R : 0) |
((prot & KVM_PGTABLE_PROT_W) ? MPT_PROT_W : 0);
}
static bool is_version(struct pkvm_iommu *dev, u32 version)
{
struct s2mpu_drv_data *data = (struct s2mpu_drv_data *)dev->data;
return (data->version & VERSION_CHECK_MASK) == version;
}
static u32 __context_cfg_valid_vid(struct pkvm_iommu *dev, u32 vid_bmap)
{
struct s2mpu_drv_data *data = (struct s2mpu_drv_data *)dev->data;
u8 ctx_vid[NR_CTX_IDS] = { 0 };
unsigned int vid, ctx = 0;
unsigned int num_ctx;
u32 res;
/* Only initialize once. */
if (data->context_cfg_valid_vid)
return data->context_cfg_valid_vid;
num_ctx = readl_relaxed(dev->va + REG_NS_NUM_CONTEXT) & NUM_CONTEXT_MASK;
while (vid_bmap) {
/* Break if we cannot allocate more. */
if (ctx >= num_ctx)
break;
vid = __ffs(vid_bmap);
vid_bmap &= ~BIT(vid);
ctx_vid[ctx++] = vid;
}
/* The following loop was unrolled so bitmasks are constant. */
BUILD_BUG_ON(NR_CTX_IDS != 8);
res = CTX_CFG_ENTRY(0, ctx, ctx_vid[0])
| CTX_CFG_ENTRY(1, ctx, ctx_vid[1])
| CTX_CFG_ENTRY(2, ctx, ctx_vid[2])
| CTX_CFG_ENTRY(3, ctx, ctx_vid[3])
| CTX_CFG_ENTRY(4, ctx, ctx_vid[4])
| CTX_CFG_ENTRY(5, ctx, ctx_vid[5])
| CTX_CFG_ENTRY(6, ctx, ctx_vid[6])
| CTX_CFG_ENTRY(7, ctx, ctx_vid[7]);
data->context_cfg_valid_vid = res;
return res;
}
static int __initialize_v2(struct pkvm_iommu *dev)
{
u32 ssmt_valid_vid_bmap, ctx_cfg;
/* Assume all VIDs may be generated by the connected SSMTs for now. */
ssmt_valid_vid_bmap = ALL_VIDS_BITMAP;
ctx_cfg = __context_cfg_valid_vid(dev, ssmt_valid_vid_bmap);
if (!ctx_cfg)
return -EINVAL;
/*
* Write CONTEXT_CFG_VALID_VID configuration before touching L1ENTRY*
* registers. Writes to those registers are ignored unless there is
* a context ID allocated to the corresponding VID (v2 only).
*/
writel_relaxed(ctx_cfg, dev->va + REG_NS_CONTEXT_CFG_VALID_VID);
return 0;
}
static int __initialize(struct pkvm_iommu *dev)
{
struct s2mpu_drv_data *data = (struct s2mpu_drv_data *)dev->data;
if (!data->version)
data->version = readl_relaxed(dev->va + REG_NS_VERSION);
switch (data->version & VERSION_CHECK_MASK) {
case S2MPU_VERSION_1:
return 0;
case S2MPU_VERSION_2:
return __initialize_v2(dev);
default:
return -EINVAL;
}
}
static void __set_control_regs(struct pkvm_iommu *dev)
{
u32 ctrl0 = 0, irq_vids;
/*
* Note: We set the values of CTRL0, CTRL1 and CFG registers here but we
* still rely on the correctness of their reset values. S2MPUs *must*
* reset to a state where all DMA traffic is blocked until the hypervisor
* writes its configuration to the S2MPU. A malicious EL1 could otherwise
* attempt to bypass the permission checks in the window between powering
* on the S2MPU and this function being called.
*/
/* Enable the S2MPU, otherwise all traffic would be allowed through. */
ctrl0 |= CTRL0_ENABLE;
/*
* Enable interrupts on fault for all VIDs. The IRQ must also be
* specified in DT to get unmasked in the GIC.
*/
ctrl0 |= CTRL0_INTERRUPT_ENABLE;
irq_vids = ALL_VIDS_BITMAP;
/* Return SLVERR/DECERR to device on permission fault. */
ctrl0 |= is_version(dev, S2MPU_VERSION_2) ? CTRL0_FAULT_RESP_TYPE_DECERR
: CTRL0_FAULT_RESP_TYPE_SLVERR;
writel_relaxed(irq_vids, dev->va + REG_NS_INTERRUPT_ENABLE_PER_VID_SET);
writel_relaxed(0, dev->va + REG_NS_CFG);
writel_relaxed(0, dev->va + REG_NS_CTRL1);
writel_relaxed(ctrl0, dev->va + REG_NS_CTRL0);
}
static void __set_control_regs_v9(struct pkvm_iommu *dev)
{
/* Return DECERR to device on permission fault. */
writel_relaxed(ALL_VIDS_BITMAP,
dev->va + REG_NS_V9_CTRL_ERR_RESP_T_PER_VID_SET);
/*
* Enable interrupts on fault for all VIDs. The IRQ must also be
* specified in DT to get unmasked in the GIC.
*/
writel_relaxed(ALL_VIDS_BITMAP,
dev->va + REG_NS_INTERRUPT_ENABLE_PER_VID_SET);
writel_relaxed(0, dev->va + REG_NS_CTRL0);
/* Enable the S2MPU, otherwise all traffic would be allowed through. */
writel_relaxed(ALL_VIDS_BITMAP,
dev->va + REG_NS_V9_CTRL_PROT_EN_PER_VID_SET);
writel_relaxed(0, dev->va + REG_NS_V9_CFG_MPTW_ATTRIBUTE);
}
/*
* Poll the given SFR until its value has all bits of a given mask set.
* Returns true if successful, false if not successful after a given number of
* attempts.
*/
static bool __wait_until(void __iomem *addr, u32 mask, size_t max_attempts)
{
size_t i;
for (i = 0; i < max_attempts; i++) {
if ((readl_relaxed(addr) & mask) == mask)
return true;
}
return false;
}
/* Poll the given SFR as long as its value has all bits of a given mask set. */
static void __wait_while(void __iomem *addr, u32 mask)
{
while ((readl_relaxed(addr) & mask) == mask)
continue;
}
static void __sync_cmd_start(struct pkvm_iommu *sync)
{
writel_relaxed(SYNC_CMD_SYNC, sync->va + REG_NS_SYNC_CMD);
}
static void __invalidation_barrier_slow(struct pkvm_iommu *sync)
{
size_t i, timeout;
/*
* Wait for transactions to drain if SysMMU_SYNCs were registered.
* Assumes that they are in the same power domain as the S2MPU.
*
* The algorithm will try initiating the SYNC if the SYNC_COMP_COMPLETE
* bit has not been set after a given number of attempts, increasing the
* timeout exponentially each time. If this cycle fails a given number
* of times, the algorithm will give up completely to avoid deadlock.
*/
timeout = SYNC_TIMEOUT;
for (i = 0; i < SYNC_MAX_RETRIES; i++) {
__sync_cmd_start(sync);
if (__wait_until(sync->va + REG_NS_SYNC_COMP, SYNC_COMP_COMPLETE, timeout))
break;
timeout *= SYNC_TIMEOUT_MULTIPLIER;
}
}
/* Initiate invalidation barrier. */
static void __invalidation_barrier_init(struct pkvm_iommu *dev)
{
struct pkvm_iommu *sync;
for_each_child(sync, dev)
__sync_cmd_start(sync);
}
/* Wait for invalidation to complete. */
static void __invalidation_barrier_complete(struct pkvm_iommu *dev)
{
struct pkvm_iommu *sync;
/*
* Check if the SYNC_COMP_COMPLETE bit has been set for individual
* devices. If not, fall back to non-parallel invalidation.
*/
for_each_child(sync, dev) {
if (!(readl_relaxed(sync->va + REG_NS_SYNC_COMP) & SYNC_COMP_COMPLETE))
__invalidation_barrier_slow(sync);
}
/* Must not access SFRs while S2MPU is busy invalidating */
if (is_version(dev, S2MPU_VERSION_2) || is_version(dev, S2MPU_VERSION_9)) {
__wait_while(dev->va + REG_NS_STATUS,
STATUS_BUSY | STATUS_ON_INVALIDATING);
}
}
static void __all_invalidation(struct pkvm_iommu *dev)
{
writel_relaxed(INVALIDATION_INVALIDATE, dev->va + REG_NS_ALL_INVALIDATION);
__invalidation_barrier_init(dev);
__invalidation_barrier_complete(dev);
}
static void __range_invalidation_init(struct pkvm_iommu *dev, phys_addr_t first_byte,
phys_addr_t last_byte)
{
u32 start_ppn = first_byte >> RANGE_INVALIDATION_PPN_SHIFT;
u32 end_ppn = last_byte >> RANGE_INVALIDATION_PPN_SHIFT;
writel_relaxed(start_ppn, dev->va + REG_NS_RANGE_INVALIDATION_START_PPN);
writel_relaxed(end_ppn, dev->va + REG_NS_RANGE_INVALIDATION_END_PPN);
writel_relaxed(INVALIDATION_INVALIDATE, dev->va + REG_NS_RANGE_INVALIDATION);
__invalidation_barrier_init(dev);
}
/*
* Initialize S2MPU device and set all GB regions to 1G granularity with
* given protection bits.
*/
static int initialize_with_prot(struct pkvm_iommu *dev, enum mpt_prot prot)
{
int ret;
ret = reg_ops->init(dev);
if (ret)
return ret;
mpt_ops->init_with_prot(dev->va, prot);
__all_invalidation(dev);
/* Set control registers, enable the S2MPU. */
reg_ops->set_control_regs(dev);
return 0;
}
/*
* Initialize S2MPU device, set L2 table addresses and configure L1TABLE_ATTR
* registers according to the given MPT struct.
*/
static int initialize_with_mpt(struct pkvm_iommu *dev, struct mpt *mpt)
{
int ret;
ret = reg_ops->init(dev);
if (ret)
return ret;
mpt_ops->init_with_mpt(dev->va, mpt);
__all_invalidation(dev);
/* Set control registers, enable the S2MPU. */
reg_ops->set_control_regs(dev);
return 0;
}
static bool to_valid_range(phys_addr_t *start, phys_addr_t *end)
{
phys_addr_t new_start = *start;
phys_addr_t new_end = *end;
if (new_end > PA_MAX)
new_end = PA_MAX;
new_start = ALIGN_DOWN(new_start, SMPT_GRAN);
new_end = ALIGN(new_end, SMPT_GRAN);
if (new_start >= new_end)
return false;
*start = new_start;
*end = new_end;
return true;
}
static void __mpt_idmap_prepare(struct mpt *mpt, phys_addr_t first_byte,
phys_addr_t last_byte, enum mpt_prot prot)
{
mpt_ops->prepare_range(mpt, first_byte, last_byte, prot);
}
static void __mpt_idmap_apply(struct pkvm_iommu *dev, struct mpt *mpt,
phys_addr_t first_byte, phys_addr_t last_byte)
{
unsigned int first_gb = first_byte / SZ_1G;
unsigned int last_gb = last_byte / SZ_1G;
mpt_ops->apply_range(dev->va, mpt, first_gb, last_gb);
/* Initiate invalidation, completed in __mdt_idmap_complete. */
__range_invalidation_init(dev, first_byte, last_byte);
}
static void __mpt_idmap_complete(struct pkvm_iommu *dev, struct mpt *mpt)
{
__invalidation_barrier_complete(dev);
}
static void s2mpu_host_stage2_idmap_prepare(phys_addr_t start, phys_addr_t end,
enum kvm_pgtable_prot prot)
{
if (!to_valid_range(&start, &end))
return;
__mpt_idmap_prepare(&host_mpt, start, end - 1, prot_to_mpt(prot));
}
static void s2mpu_host_stage2_idmap_apply(struct pkvm_iommu *dev,
phys_addr_t start, phys_addr_t end)
{
if (!to_valid_range(&start, &end))
return;
__mpt_idmap_apply(dev, &host_mpt, start, end - 1);
}
static void s2mpu_host_stage2_idmap_complete(struct pkvm_iommu *dev)
{
__mpt_idmap_complete(dev, &host_mpt);
}
static int s2mpu_resume(struct pkvm_iommu *dev)
{
/*
* Initialize the S2MPU with the host stage-2 MPT. It is paramount
* that the S2MPU reset state is enabled and blocking all traffic,
* otherwise the host would not be forced to call the resume HVC
* before issuing DMA traffic.
*/
return initialize_with_mpt(dev, &host_mpt);
}
static int s2mpu_suspend(struct pkvm_iommu *dev)
{
/*
* Stop updating the S2MPU when the host informs us about the intention
* to suspend it. Writes to powered-down MMIO registers would trigger
* SErrors in EL1 otherwise. However, hyp must put S2MPU back to
* blocking state first, in case the host does not actually power it
* down and continues issuing DMA traffic.
*/
return initialize_with_prot(dev, MPT_PROT_NONE);
}
static u32 host_mmio_reg_access_mask_v9(size_t off, bool is_write)
{
const u32 no_access = 0;
const u32 read_write = (u32)(-1);
const u32 read_only = is_write ? no_access : read_write;
const u32 write_only = is_write ? read_write : no_access;
switch (off) {
/* Allow reading control registers for debugging. */
case REG_NS_CTRL0:
return read_only & V9_CTRL0_MASK;
case REG_NS_V9_CTRL_ERR_RESP_T_PER_VID_SET:
return read_only & ALL_VIDS_BITMAP;
case REG_NS_V9_CTRL_PROT_EN_PER_VID_SET:
return read_only & ALL_VIDS_BITMAP;
case REG_NS_V9_READ_STLB:
return write_only & (V9_READ_STLB_MASK_TYPEA|V9_READ_STLB_MASK_TYPEB);
case REG_NS_V9_READ_STLB_TPN:
return read_only & V9_READ_STLB_TPN_MASK;
case REG_NS_V9_READ_STLB_TAG_PPN:
return read_only & V9_READ_STLB_TAG_PPN_MASK;
case REG_NS_V9_READ_STLB_TAG_OTHERS:
return read_only & V9_READ_STLB_TAG_OTHERS_MASK;
case REG_NS_V9_READ_STLB_DATA:
return read_only;
case REG_NS_V9_MPTC_INFO:
return read_only & V9_READ_MPTC_INFO_MASK;
case REG_NS_V9_READ_MPTC:
return write_only & V9_READ_MPTC_MASK;
case REG_NS_V9_READ_MPTC_TAG_PPN:
return read_only & V9_READ_MPTC_TAG_PPN_MASK;
case REG_NS_V9_READ_MPTC_TAG_OTHERS:
return read_only & V9_READ_MPTC_TAG_OTHERS_MASK;
case REG_NS_V9_READ_MPTC_DATA:
return read_only;
case REG_NS_V9_PMMU_INFO:
return read_only & V9_READ_PMMU_INFO_MASK;
case REG_NS_V9_READ_PTLB:
return write_only & V9_READ_PTLB_MASK;
case REG_NS_V9_READ_PTLB_TAG:
return read_only & V9_READ_PTLB_TAG_MASK;
case REG_NS_V9_READ_PTLB_DATA_S1_EN_PPN_AP:
return read_only & V9_READ_PTLB_DATA_S1_ENABLE_PPN_AP_MASK;
case REG_NS_V9_READ_PTLB_DATA_S1_DIS_AP_LIST:
return read_only;
case REG_NS_V9_PMMU_INDICATOR:
return read_only & V9_READ_PMMU_INDICATOR_MASK;
case REG_NS_V9_SWALKER_INFO:
return read_only&V9_SWALKER_INFO_MASK;
};
if (off >= REG_NS_V9_PMMU_PTLB_INFO(0) && off < REG_NS_V9_PMMU_PTLB_INFO(V9_MAX_PTLB_NUM))
return read_only&V9_READ_PMMU_PTLB_INFO_MASK;
if (off >= REG_NS_V9_STLB_INFO(0) && off < REG_NS_V9_STLB_INFO(V9_MAX_STLB_NUM))
return read_only&V9_READ_SLTB_INFO_MASK;
return no_access;
}
static u32 host_mmio_reg_access_mask_v1_v2(size_t off, bool is_write)
{
const u32 no_access = 0;
const u32 read_write = (u32)(-1);
const u32 read_only = is_write ? no_access : read_write;
const u32 write_only = is_write ? read_write : no_access;
switch (off) {
/* Allow reading control registers for debugging. */
case REG_NS_CTRL0:
return read_only & CTRL0_MASK;
case REG_NS_CTRL1:
return read_only & CTRL1_MASK;
/* Allow reading MPTC entries for debugging. That involves:
* - writing (set,way) to READ_MPTC
* - reading READ_MPTC_*
*/
case REG_NS_READ_MPTC:
return write_only & READ_MPTC_MASK;
case REG_NS_READ_MPTC_TAG_PPN:
return read_only & READ_MPTC_TAG_PPN_MASK;
case REG_NS_READ_MPTC_TAG_OTHERS:
return read_only & READ_MPTC_TAG_OTHERS_MASK;
case REG_NS_READ_MPTC_DATA:
return read_only;
};
return no_access;
}
static u32 host_mmio_reg_access_mask(size_t off, bool is_write)
{
const u32 no_access = 0;
const u32 read_write = (u32)(-1);
const u32 read_only = is_write ? no_access : read_write;
const u32 write_only = is_write ? read_write : no_access;
u32 masked_off;
switch (off) {
case REG_NS_CFG:
return read_only & CFG_MASK;
/* Allow EL1 IRQ handler to clear interrupts. */
case REG_NS_INTERRUPT_CLEAR:
return write_only & ALL_VIDS_BITMAP;
/* Allow reading number of sets used by MPTC. */
case REG_NS_INFO:
return read_only & INFO_NUM_SET_MASK;
/* Allow EL1 IRQ handler to read bitmap of pending interrupts. */
case REG_NS_FAULT_STATUS:
return read_only & ALL_VIDS_BITMAP;
}
/* Allow reading L1ENTRY registers for debugging. */
if (off >= REG_NS_L1ENTRY_L2TABLE_ADDR(0, 0) &&
off < REG_NS_L1ENTRY_ATTR(NR_VIDS, 0))
return read_only;
/* Allow EL1 IRQ handler to read fault information. */
masked_off = off & ~REG_NS_FAULT_VID_MASK;
if ((masked_off == REG_NS_FAULT_PA_LOW(0)) ||
(masked_off == REG_NS_FAULT_PA_HIGH(0)) ||
(masked_off == REG_NS_FAULT_INFO(0)))
return read_only;
/* Check version-specific registers. */
return reg_ops->host_mmio_reg_access_mask(off, is_write);
}
static bool s2mpu_host_dabt_handler(struct pkvm_iommu *dev,
struct kvm_cpu_context *host_ctxt,
u32 esr, size_t off)
{
bool is_write = esr & ESR_ELx_WNR;
unsigned int len = BIT((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
int rd = (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
u32 mask;
/* Only handle MMIO access with u32 size and alignment. */
if ((len != sizeof(u32)) || (off & (sizeof(u32) - 1)))
return false;
mask = host_mmio_reg_access_mask(off, is_write);
if (!mask)
return false;
if (is_write)
writel_relaxed(cpu_reg(host_ctxt, rd) & mask, dev->va + off);
else
cpu_reg(host_ctxt, rd) = readl_relaxed(dev->va + off) & mask;
return true;
}
/*
* Operations that differ between versions. We need to maintain
* old behaviour were v1 and v2 can be used together.
*/
const struct s2mpu_reg_ops ops_v1_v2 = {
.init = __initialize,
.host_mmio_reg_access_mask = host_mmio_reg_access_mask_v1_v2,
.set_control_regs = __set_control_regs,
};
const struct s2mpu_reg_ops ops_v9 = {
.init = __initialize_v2,
.host_mmio_reg_access_mask = host_mmio_reg_access_mask_v9,
.set_control_regs = __set_control_regs_v9,
};
static int s2mpu_init(void *data, size_t size)
{
struct mpt in_mpt;
u32 *smpt;
phys_addr_t pa;
unsigned int gb;
int ret = 0;
int smpt_nr_pages, smpt_size;
struct s2mpu_mpt_cfg cfg;
if (size != sizeof(in_mpt))
return -EINVAL;
/* The host can concurrently modify 'data'. Copy it to avoid TOCTOU. */
memcpy(&in_mpt, data, sizeof(in_mpt));
cfg.version = in_mpt.version;
/* Make sure the version sent is supported by the driver. */
if ((cfg.version == S2MPU_VERSION_1) || (cfg.version == S2MPU_VERSION_2))
reg_ops = &ops_v1_v2;
else if (cfg.version == S2MPU_VERSION_9)
reg_ops = &ops_v9;
else
return -ENODEV;
/* Get page table operations for this version. */
mpt_ops = s2mpu_get_mpt_ops(cfg);
/* If version is wrong return. */
if (!mpt_ops)
return -EINVAL;
smpt_size = mpt_ops->smpt_size();
smpt_nr_pages = smpt_size / PAGE_SIZE;
/* Take ownership of all SMPT buffers. This will also map them in. */
for_each_gb(gb) {
smpt = kern_hyp_va(in_mpt.fmpt[gb].smpt);
pa = __hyp_pa(smpt);
if (!IS_ALIGNED(pa, smpt_size)) {
ret = -EINVAL;
break;
}
ret = __pkvm_host_donate_hyp(pa >> PAGE_SHIFT, smpt_nr_pages);
if (ret)
break;
host_mpt.fmpt[gb] = (struct fmpt){
.smpt = smpt,
.gran_1g = true,
.prot = MPT_PROT_RW,
};
}
/* Try to return memory back if there was an error. */
if (ret) {
for_each_gb(gb) {
smpt = host_mpt.fmpt[gb].smpt;
if (!smpt)
break;
WARN_ON(__pkvm_hyp_donate_host(__hyp_pa(smpt) >> PAGE_SHIFT,
smpt_nr_pages));
}
memset(&host_mpt, 0, sizeof(host_mpt));
}
return ret;
}
static int s2mpu_validate(struct pkvm_iommu *dev)
{
if (dev->size != S2MPU_MMIO_SIZE)
return -EINVAL;
return 0;
}
static int s2mpu_validate_child(struct pkvm_iommu *dev, struct pkvm_iommu *child)
{
if (child->ops != &pkvm_sysmmu_sync_ops)
return -EINVAL;
return 0;
}
static int sysmmu_sync_validate(struct pkvm_iommu *dev)
{
if (dev->size != SYSMMU_SYNC_S2_MMIO_SIZE)
return -EINVAL;
if (!dev->parent || dev->parent->ops != &pkvm_s2mpu_ops)
return -EINVAL;
return 0;
}
const struct pkvm_iommu_ops pkvm_s2mpu_ops = (struct pkvm_iommu_ops){
.init = s2mpu_init,
.validate = s2mpu_validate,
.validate_child = s2mpu_validate_child,
.resume = s2mpu_resume,
.suspend = s2mpu_suspend,
.host_stage2_idmap_prepare = s2mpu_host_stage2_idmap_prepare,
.host_stage2_idmap_apply = s2mpu_host_stage2_idmap_apply,
.host_stage2_idmap_complete = s2mpu_host_stage2_idmap_complete,
.host_dabt_handler = s2mpu_host_dabt_handler,
.data_size = sizeof(struct s2mpu_drv_data),
};
const struct pkvm_iommu_ops pkvm_sysmmu_sync_ops = (struct pkvm_iommu_ops){
.validate = sysmmu_sync_validate,
};
struct pkvm_iommu_driver pkvm_s2mpu_driver = (struct pkvm_iommu_driver){
.ops = &pkvm_s2mpu_ops,
};
struct pkvm_iommu_driver pkvm_sysmmu_sync_driver = (struct pkvm_iommu_driver){
.ops = &pkvm_sysmmu_sync_ops,
};

File diff suppressed because it is too large Load Diff

View File

@ -14,7 +14,9 @@
#include <nvhe/early_alloc.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
#include <nvhe/modules.h>
#include <nvhe/spinlock.h>
struct kvm_pgtable pkvm_pgtable;
@ -23,7 +25,14 @@ hyp_spinlock_t pkvm_pgd_lock;
struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
unsigned int hyp_memblock_nr;
static u64 __io_map_base;
static u64 __private_range_base;
static u64 __private_range_cur;
struct hyp_fixmap_slot {
u64 addr;
kvm_pte_t *ptep;
};
static DEFINE_PER_CPU(struct hyp_fixmap_slot, fixmap_slots);
static int __pkvm_create_mappings(unsigned long start, unsigned long size,
unsigned long phys, enum kvm_pgtable_prot prot)
@ -42,29 +51,29 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
* @size: The size of the VA range to reserve.
* @haddr: The hypervisor virtual start address of the allocation.
*
* The private virtual address (VA) range is allocated above __io_map_base
* The private virtual address (VA) range is allocated above __private_range_base
* and aligned based on the order of @size.
*
* Return: 0 on success or negative error code on failure.
*/
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
unsigned long base, addr;
unsigned long cur, addr;
int ret = 0;
hyp_spin_lock(&pkvm_pgd_lock);
/* Align the allocation based on the order of its size */
addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
addr = ALIGN(__private_range_cur, PAGE_SIZE << get_order(size));
/* The allocated size is always a multiple of PAGE_SIZE */
base = addr + PAGE_ALIGN(size);
cur = addr + PAGE_ALIGN(size);
/* Are we overflowing on the vmemmap ? */
if (!addr || base > __hyp_vmemmap)
/* Has the private range grown too large ? */
if (!addr || cur > __hyp_vmemmap || (cur - __private_range_base) > __PKVM_PRIVATE_SZ) {
ret = -ENOMEM;
else {
__io_map_base = base;
} else {
__private_range_cur = cur;
*haddr = addr;
}
@ -93,6 +102,48 @@ int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
return err;
}
void *__pkvm_alloc_module_va(u64 nr_pages)
{
unsigned long addr = 0;
pkvm_modules_lock();
if (pkvm_modules_enabled())
pkvm_alloc_private_va_range(nr_pages << PAGE_SHIFT, &addr);
pkvm_modules_unlock();
return (void *)addr;
}
int __pkvm_map_module_page(u64 pfn, void *va, enum kvm_pgtable_prot prot)
{
int ret = -EACCES;
pkvm_modules_lock();
if (!pkvm_modules_enabled())
goto err;
ret = __pkvm_host_donate_hyp(pfn, 1);
if (ret)
goto err;
ret = __pkvm_create_mappings((unsigned long)va, PAGE_SIZE, hyp_pfn_to_phys(pfn), prot);
err:
pkvm_modules_unlock();
return ret;
}
void __pkvm_unmap_module_page(u64 pfn, void *va)
{
pkvm_modules_lock();
if (pkvm_modules_enabled()) {
WARN_ON(__pkvm_hyp_donate_host(pfn, 1));
pkvm_remove_mappings(va, va + PAGE_SIZE);
}
pkvm_modules_unlock();
}
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
{
unsigned long start = (unsigned long)from;
@ -129,13 +180,45 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
return ret;
}
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
void pkvm_remove_mappings(void *from, void *to)
{
unsigned long start, end;
unsigned long size = (unsigned long)to - (unsigned long)from;
hyp_vmemmap_range(phys, size, &start, &end);
hyp_spin_lock(&pkvm_pgd_lock);
WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)from, size) != size);
hyp_spin_unlock(&pkvm_pgd_lock);
}
return __pkvm_create_mappings(start, end - start, back, PAGE_HYP);
int hyp_back_vmemmap(phys_addr_t back)
{
unsigned long i, start, size, end = 0;
int ret;
for (i = 0; i < hyp_memblock_nr; i++) {
start = hyp_memory[i].base;
start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE);
/*
* The begining of the hyp_vmemmap region for the current
* memblock may already be backed by the page backing the end
* the previous region, so avoid mapping it twice.
*/
start = max(start, end);
end = hyp_memory[i].base + hyp_memory[i].size;
end = PAGE_ALIGN((u64)hyp_phys_to_page(end));
if (start >= end)
continue;
size = end - start;
ret = __pkvm_create_mappings(start, size, back, PAGE_HYP);
if (ret)
return ret;
memset(hyp_phys_to_virt(back), 0, size);
back += size;
}
return 0;
}
static void *__hyp_bp_vect_base;
@ -189,6 +272,103 @@ int hyp_map_vectors(void)
return 0;
}
void *hyp_fixmap_map(phys_addr_t phys)
{
struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots);
kvm_pte_t pte, *ptep = slot->ptep;
pte = *ptep;
pte &= ~kvm_phys_to_pte(KVM_PHYS_INVALID);
pte |= kvm_phys_to_pte(phys) | KVM_PTE_VALID;
WRITE_ONCE(*ptep, pte);
dsb(ishst);
return (void *)slot->addr + offset_in_page(phys);
}
static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
{
kvm_pte_t *ptep = slot->ptep;
u64 addr = slot->addr;
WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID);
/*
* Irritatingly, the architecture requires that we use inner-shareable
* broadcast TLB invalidation here in case another CPU speculates
* through our fixmap and decides to create an "amalagamation of the
* values held in the TLB" due to the apparent lack of a
* break-before-make sequence.
*
* https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
*/
dsb(ishst);
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1));
dsb(ish);
isb();
}
void hyp_fixmap_unmap(void)
{
fixmap_clear_slot(this_cpu_ptr(&fixmap_slots));
}
static int __create_fixmap_slot_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)arg);
if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1)
return -EINVAL;
slot->addr = addr;
slot->ptep = ptep;
/*
* Clear the PTE, but keep the page-table page refcount elevated to
* prevent it from ever being freed. This lets us manipulate the PTEs
* by hand safely without ever needing to allocate memory.
*/
fixmap_clear_slot(slot);
return 0;
}
static int create_fixmap_slot(u64 addr, u64 cpu)
{
struct kvm_pgtable_walker walker = {
.cb = __create_fixmap_slot_cb,
.flags = KVM_PGTABLE_WALK_LEAF,
.arg = (void *)cpu,
};
return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
}
int hyp_create_pcpu_fixmap(void)
{
unsigned long addr, i;
int ret;
for (i = 0; i < hyp_nr_cpus; i++) {
ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr);
if (ret)
return ret;
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE,
__hyp_pa(__hyp_bss_start), PAGE_HYP);
if (ret)
return ret;
ret = create_fixmap_slot(addr, i);
if (ret)
return ret;
}
return 0;
}
int hyp_create_idmap(u32 hyp_va_bits)
{
unsigned long start, end;
@ -207,9 +387,43 @@ int hyp_create_idmap(u32 hyp_va_bits)
* with the idmap to place the IOs and the vmemmap. IOs use the lower
* half of the quarter and the vmemmap the upper half.
*/
__io_map_base = start & BIT(hyp_va_bits - 2);
__io_map_base ^= BIT(hyp_va_bits - 2);
__hyp_vmemmap = __io_map_base | BIT(hyp_va_bits - 3);
__private_range_base = start & BIT(hyp_va_bits - 2);
__private_range_base ^= BIT(hyp_va_bits - 2);
__private_range_cur = __private_range_base;
__hyp_vmemmap = __private_range_base | BIT(hyp_va_bits - 3);
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
}
static void *admit_host_page(void *arg)
{
struct kvm_hyp_memcache *host_mc = arg;
if (!host_mc->nr_pages)
return NULL;
/*
* The host still owns the pages in its memcache, so we need to go
* through a full host-to-hyp donation cycle to change it. Fortunately,
* __pkvm_host_donate_hyp() takes care of races for us, so if it
* succeeds we're good to go.
*/
if (__pkvm_host_donate_hyp(hyp_phys_to_pfn(host_mc->head), 1))
return NULL;
return pop_hyp_memcache(host_mc, hyp_phys_to_virt);
}
/* Refill our local memcache by poping pages from the one provided by the host. */
int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
struct kvm_hyp_memcache *host_mc)
{
struct kvm_hyp_memcache tmp = *host_mc;
int ret;
ret = __topup_hyp_memcache(mc, min_pages, admit_host_page,
hyp_virt_to_phys, &tmp);
*host_mc = tmp;
return ret;
}

View File

@ -0,0 +1,26 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/hyp_image.h>
#include <asm/page-def.h>
SECTIONS {
.hyp.text : {
HYP_SECTION_SYMBOL_NAME(.text) = .;
*(.text .text.*)
}
.hyp.bss : {
HYP_SECTION_SYMBOL_NAME(.bss) = .;
*(.bss .bss.*)
}
.hyp.rodata : {
HYP_SECTION_SYMBOL_NAME(.rodata) = .;
*(.rodata .rodata.*)
}
.hyp.data : {
HYP_SECTION_SYMBOL_NAME(.data) = .;
*(.data .data.*)
}
}

View File

@ -0,0 +1,208 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2022 Google LLC
*/
#include <asm/kvm_host.h>
#include <asm/kvm_pkvm_module.h>
#include <nvhe/mem_protect.h>
#include <nvhe/modules.h>
#include <nvhe/mm.h>
#include <nvhe/serial.h>
#include <nvhe/spinlock.h>
#include <nvhe/trap_handler.h>
static void __kvm_flush_dcache_to_poc(void *addr, size_t size)
{
kvm_flush_dcache_to_poc((unsigned long)addr, (unsigned long)size);
}
DEFINE_HYP_SPINLOCK(modules_lock);
bool __pkvm_modules_enabled __ro_after_init;
void pkvm_modules_lock(void)
{
hyp_spin_lock(&modules_lock);
}
void pkvm_modules_unlock(void)
{
hyp_spin_unlock(&modules_lock);
}
bool pkvm_modules_enabled(void)
{
return __pkvm_modules_enabled;
}
static u64 early_lm_pages;
static void *__pkvm_linear_map_early(phys_addr_t phys, size_t size, enum kvm_pgtable_prot prot)
{
void *addr = NULL;
int ret;
if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
return NULL;
pkvm_modules_lock();
if (!__pkvm_modules_enabled)
goto out;
addr = __hyp_va(phys);
ret = pkvm_create_mappings(addr, addr + size, prot);
if (ret)
addr = NULL;
else
early_lm_pages += size >> PAGE_SHIFT;
out:
pkvm_modules_unlock();
return addr;
}
static void __pkvm_linear_unmap_early(void *addr, size_t size)
{
pkvm_modules_lock();
pkvm_remove_mappings(addr, addr + size);
early_lm_pages -= size >> PAGE_SHIFT;
pkvm_modules_unlock();
}
int __pkvm_close_module_registration(void)
{
int ret;
pkvm_modules_lock();
/*
* Page ownership tracking might go out of sync if there are stale
* entries in pKVM's linear map range, so they must really be gone by
* now.
*/
WARN_ON(early_lm_pages);
ret = __pkvm_modules_enabled ? 0 : -EACCES;
if (!ret) {
void *addr = hyp_fixmap_map(__hyp_pa(&__pkvm_modules_enabled));
*(bool *)addr = false;
hyp_fixmap_unmap();
}
pkvm_modules_unlock();
/* The fuse is blown! No way back until reset */
return ret;
}
const struct pkvm_module_ops module_ops = {
.create_private_mapping = __pkvm_create_private_mapping,
.register_serial_driver = __pkvm_register_serial_driver,
.puts = hyp_puts,
.putx64 = hyp_putx64,
.fixmap_map = hyp_fixmap_map,
.fixmap_unmap = hyp_fixmap_unmap,
.linear_map_early = __pkvm_linear_map_early,
.linear_unmap_early = __pkvm_linear_unmap_early,
.flush_dcache_to_poc = __kvm_flush_dcache_to_poc,
.register_host_perm_fault_handler = hyp_register_host_perm_fault_handler,
.protect_host_page = hyp_protect_host_page,
.register_host_smc_handler = __pkvm_register_host_smc_handler,
.register_default_trap_handler = __pkvm_register_default_trap_handler,
.register_illegal_abt_notifier = __pkvm_register_illegal_abt_notifier,
.register_psci_notifier = __pkvm_register_psci_notifier,
.register_hyp_panic_notifier = __pkvm_register_hyp_panic_notifier,
};
int __pkvm_init_module(void *module_init)
{
int (*do_module_init)(const struct pkvm_module_ops *ops) = module_init;
int ret;
pkvm_modules_lock();
if (!pkvm_modules_enabled()) {
ret = -EACCES;
goto err;
}
ret = do_module_init(&module_ops);
err:
pkvm_modules_unlock();
return ret;
}
#define MAX_DYNAMIC_HCALLS 128
atomic_t num_dynamic_hcalls = ATOMIC_INIT(0);
DEFINE_HYP_SPINLOCK(dyn_hcall_lock);
static dyn_hcall_t host_dynamic_hcalls[MAX_DYNAMIC_HCALLS];
int handle_host_dynamic_hcall(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, id, host_ctxt, 0);
dyn_hcall_t hfn;
int dyn_id;
/*
* TODO: static key to protect when no dynamic hcall is registered?
*/
dyn_id = (int)(id - KVM_HOST_SMCCC_ID(0)) -
__KVM_HOST_SMCCC_FUNC___dynamic_hcalls;
if (dyn_id < 0)
return HCALL_UNHANDLED;
cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
/*
* Order access to num_dynamic_hcalls and host_dynamic_hcalls. Paired
* with __pkvm_register_hcall().
*/
if (dyn_id >= atomic_read_acquire(&num_dynamic_hcalls))
goto end;
hfn = READ_ONCE(host_dynamic_hcalls[dyn_id]);
if (!hfn)
goto end;
cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
hfn(host_ctxt);
end:
return HCALL_HANDLED;
}
int __pkvm_register_hcall(unsigned long hvn_hyp_va)
{
dyn_hcall_t hfn = (void *)hvn_hyp_va;
int reserved_id, ret;
pkvm_modules_lock();
if (!pkvm_modules_enabled()) {
ret = -EACCES;
goto err;
}
hyp_spin_lock(&dyn_hcall_lock);
reserved_id = atomic_read(&num_dynamic_hcalls);
if (reserved_id >= MAX_DYNAMIC_HCALLS) {
ret = -ENOMEM;
goto err_hcall_unlock;
}
WRITE_ONCE(host_dynamic_hcalls[reserved_id], hfn);
/*
* Order access to num_dynamic_hcalls and host_dynamic_hcalls. Paired
* with handle_host_dynamic_hcall.
*/
atomic_set_release(&num_dynamic_hcalls, reserved_id + 1);
ret = reserved_id + __KVM_HOST_SMCCC_FUNC___dynamic_hcalls;
err_hcall_unlock:
hyp_spin_unlock(&dyn_hcall_lock);
err:
pkvm_modules_unlock();
return ret;
};

Some files were not shown because too many files have changed in this diff Show More