Snap for 8781170 from 5969016e35
to android-mainline-keystone-qcom-release
Change-Id: Ib74997fa445c284c9642421dcd853a4e38b48c5b
This commit is contained in:
commit
36fb3cbbbf
1
.gitignore
vendored
1
.gitignore
vendored
@ -45,6 +45,7 @@
|
||||
*.symversions
|
||||
*.tab.[ch]
|
||||
*.tar
|
||||
*.usyms
|
||||
*.xz
|
||||
*.zst
|
||||
Module.symvers
|
||||
|
137
Documentation/ABI/testing/sysfs-driver-chromeos-acpi
Normal file
137
Documentation/ABI/testing/sysfs-driver-chromeos-acpi
Normal file
@ -0,0 +1,137 @@
|
||||
What: /sys/bus/platform/devices/GGL0001:*/BINF.2
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns active EC firmware of current boot (boolean).
|
||||
|
||||
== ===============================
|
||||
0 Read only (recovery) firmware.
|
||||
1 Rewritable firmware.
|
||||
== ===============================
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/BINF.3
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns main firmware type for current boot (integer).
|
||||
|
||||
== =====================================
|
||||
0 Recovery.
|
||||
1 Normal.
|
||||
2 Developer.
|
||||
3 Netboot (factory installation only).
|
||||
== =====================================
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/CHSW
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns switch position for Chrome OS specific hardware
|
||||
switches when the firmware is booted (integer).
|
||||
|
||||
==== ===========================================
|
||||
0 No changes.
|
||||
2 Recovery button was pressed.
|
||||
4 Recovery button was pressed (EC firmware).
|
||||
32 Developer switch was enabled.
|
||||
512 Firmware write protection was disabled.
|
||||
==== ===========================================
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/FMAP
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns physical memory address of the start of the main
|
||||
processor firmware flashmap.
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/FRID
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns firmware version for the read-only portion of the
|
||||
main processor firmware.
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/FWID
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns firmware version for the rewritable portion of the
|
||||
main processor firmware.
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.0
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns type of the GPIO signal for the Chrome OS specific
|
||||
GPIO assignments (integer).
|
||||
|
||||
=========== ==================================
|
||||
1 Recovery button.
|
||||
2 Developer mode switch.
|
||||
3 Firmware write protection switch.
|
||||
256 to 511 Debug header GPIO 0 to GPIO 255.
|
||||
=========== ==================================
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.1
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns signal attributes of the GPIO signal (integer bitfield).
|
||||
|
||||
== =======================
|
||||
0 Signal is active low.
|
||||
1 Signal is active high.
|
||||
== =======================
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.2
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns the GPIO number on the specified GPIO
|
||||
controller.
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.3
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns name of the GPIO controller.
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/HWID
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns hardware ID for the Chromebook.
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/MECK
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns the SHA-1 or SHA-256 hash that is read out of the
|
||||
Management Engine extended registers during boot. The hash
|
||||
is exported via ACPI so the OS can verify that the Management
|
||||
Engine firmware has not changed. If Management Engine is not
|
||||
present, or if the firmware was unable to read the extended registers, this buffer size can be zero.
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/VBNV.0
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns offset in CMOS bank 0 of the verified boot non-volatile
|
||||
storage block, counting from the first writable CMOS byte
|
||||
(that is, 'offset = 0' is the byte following the 14 bytes of
|
||||
clock data).
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/VBNV.1
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Return the size in bytes of the verified boot non-volatile
|
||||
storage block.
|
||||
|
||||
What: /sys/bus/platform/devices/GGL0001:*/VDAT
|
||||
Date: May 2022
|
||||
KernelVersion: 5.19
|
||||
Description:
|
||||
Returns the verified boot data block shared between the
|
||||
firmware verification step and the kernel verification step
|
||||
(binary).
|
@ -23,9 +23,10 @@ Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing 'on' or 'off' to this file makes the kdamond starts or
|
||||
stops, respectively. Reading the file returns the keywords
|
||||
based on the current status. Writing 'update_schemes_stats' to
|
||||
the file updates contents of schemes stats files of the
|
||||
kdamond.
|
||||
based on the current status. Writing 'commit' to this file
|
||||
makes the kdamond reads the user inputs in the sysfs files
|
||||
except 'state' again. Writing 'update_schemes_stats' to the
|
||||
file updates contents of schemes stats files of the kdamond.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
|
||||
Date: Mar 2022
|
||||
@ -40,14 +41,24 @@ Description: Writing a number 'N' to this file creates the number of
|
||||
directories for controlling each DAMON context named '0' to
|
||||
'N-1' under the contexts/ directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/avail_operations
|
||||
Date: Apr 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the available monitoring operations
|
||||
sets on the currently running kernel.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/operations
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a keyword for a monitoring operations set ('vaddr' for
|
||||
virtual address spaces monitoring, and 'paddr' for the physical
|
||||
address space monitoring) to this file makes the context to use
|
||||
the operations set. Reading the file returns the keyword for
|
||||
the operations set the context is set to use.
|
||||
virtual address spaces monitoring, 'fvaddr' for fixed virtual
|
||||
address ranges monitoring, and 'paddr' for the physical address
|
||||
space monitoring) to this file makes the context to use the
|
||||
operations set. Reading the file returns the keyword for the
|
||||
operations set the context is set to use.
|
||||
|
||||
Note that only the operations sets that listed in
|
||||
'avail_operations' file are valid inputs.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/sample_us
|
||||
Date: Mar 2022
|
||||
|
@ -343,6 +343,11 @@ Admin can request writeback of those idle pages at right timing via::
|
||||
|
||||
With the command, zram will writeback idle pages from memory to the storage.
|
||||
|
||||
Additionally, if a user choose to writeback only huge and idle pages
|
||||
this can be accomplished with::
|
||||
|
||||
echo huge_idle > /sys/block/zramX/writeback
|
||||
|
||||
If an admin wants to write a specific page in zram device to the backing device,
|
||||
they could write a page index into the interface.
|
||||
|
||||
|
@ -1208,6 +1208,34 @@ PAGE_SIZE multiple when read back.
|
||||
high limit is used and monitored properly, this limit's
|
||||
utility is limited to providing the final safety net.
|
||||
|
||||
memory.reclaim
|
||||
A write-only nested-keyed file which exists for all cgroups.
|
||||
|
||||
This is a simple interface to trigger memory reclaim in the
|
||||
target cgroup.
|
||||
|
||||
This file accepts a single key, the number of bytes to reclaim.
|
||||
No nested keys are currently supported.
|
||||
|
||||
Example::
|
||||
|
||||
echo "1G" > memory.reclaim
|
||||
|
||||
The interface can be later extended with nested keys to
|
||||
configure the reclaim behavior. For example, specify the
|
||||
type of memory to reclaim from (anon, file, ..).
|
||||
|
||||
Please note that the kernel can over or under reclaim from
|
||||
the target cgroup. If less bytes are reclaimed than the
|
||||
specified amount, -EAGAIN is returned.
|
||||
|
||||
memory.peak
|
||||
A read-only single value file which exists on non-root
|
||||
cgroups.
|
||||
|
||||
The max memory usage recorded for the cgroup and its
|
||||
descendants since the creation of the cgroup.
|
||||
|
||||
memory.oom.group
|
||||
A read-write single value file which exists on non-root
|
||||
cgroups. The default value is "0".
|
||||
@ -1326,6 +1354,12 @@ PAGE_SIZE multiple when read back.
|
||||
Amount of cached filesystem data that is swap-backed,
|
||||
such as tmpfs, shm segments, shared anonymous mmap()s
|
||||
|
||||
zswap
|
||||
Amount of memory consumed by the zswap compression backend.
|
||||
|
||||
zswapped
|
||||
Amount of application memory swapped out to zswap.
|
||||
|
||||
file_mapped
|
||||
Amount of cached filesystem data mapped with mmap()
|
||||
|
||||
@ -1516,6 +1550,21 @@ PAGE_SIZE multiple when read back.
|
||||
higher than the limit for an extended period of time. This
|
||||
reduces the impact on the workload and memory management.
|
||||
|
||||
memory.zswap.current
|
||||
A read-only single value file which exists on non-root
|
||||
cgroups.
|
||||
|
||||
The total amount of memory consumed by the zswap compression
|
||||
backend.
|
||||
|
||||
memory.zswap.max
|
||||
A read-write single value file which exists on non-root
|
||||
cgroups. The default is "max".
|
||||
|
||||
Zswap usage hard limit. If a cgroup's zswap pool reaches this
|
||||
limit, it will refuse to take any more stores before existing
|
||||
entries fault back in or are written out to disk.
|
||||
|
||||
memory.pressure
|
||||
A read-only nested-keyed file.
|
||||
|
||||
|
@ -1705,16 +1705,16 @@
|
||||
boot-time allocation of gigantic hugepages is skipped.
|
||||
|
||||
hugetlb_free_vmemmap=
|
||||
[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
|
||||
[KNL] Reguires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
|
||||
enabled.
|
||||
Allows heavy hugetlb users to free up some more
|
||||
memory (7 * PAGE_SIZE for each 2MB hugetlb page).
|
||||
Format: { on | off (default) }
|
||||
Format: { [oO][Nn]/Y/y/1 | [oO][Ff]/N/n/0 (default) }
|
||||
|
||||
on: enable the feature
|
||||
off: disable the feature
|
||||
[oO][Nn]/Y/y/1: enable the feature
|
||||
[oO][Ff]/N/n/0: disable the feature
|
||||
|
||||
Built with CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON=y,
|
||||
Built with CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON=y,
|
||||
the default is on.
|
||||
|
||||
This is not compatible with memory_hotplug.memmap_on_memory.
|
||||
|
@ -66,6 +66,17 @@ Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could do
|
||||
no real monitoring and reclamation due to the watermarks-based activation
|
||||
condition. Refer to below descriptions for the watermarks parameter for this.
|
||||
|
||||
commit_inputs
|
||||
-------------
|
||||
|
||||
Make DAMON_RECLAIM reads the input parameters again, except ``enabled``.
|
||||
|
||||
Input parameters that updated while DAMON_RECLAIM is running are not applied
|
||||
by default. Once this parameter is set as ``Y``, DAMON_RECLAIM reads values
|
||||
of parametrs except ``enabled`` again. Once the re-reading is done, this
|
||||
parameter is set as ``N``. If invalid parameters are found while the
|
||||
re-reading, DAMON_RECLAIM will be disabled.
|
||||
|
||||
min_age
|
||||
-------
|
||||
|
||||
|
@ -68,7 +68,7 @@ comma (","). ::
|
||||
│ kdamonds/nr_kdamonds
|
||||
│ │ 0/state,pid
|
||||
│ │ │ contexts/nr_contexts
|
||||
│ │ │ │ 0/operations
|
||||
│ │ │ │ 0/avail_operations,operations
|
||||
│ │ │ │ │ monitoring_attrs/
|
||||
│ │ │ │ │ │ intervals/sample_us,aggr_us,update_us
|
||||
│ │ │ │ │ │ nr_regions/min,max
|
||||
@ -121,10 +121,11 @@ In each kdamond directory, two files (``state`` and ``pid``) and one directory
|
||||
|
||||
Reading ``state`` returns ``on`` if the kdamond is currently running, or
|
||||
``off`` if it is not running. Writing ``on`` or ``off`` makes the kdamond be
|
||||
in the state. Writing ``update_schemes_stats`` to ``state`` file updates the
|
||||
contents of stats files for each DAMON-based operation scheme of the kdamond.
|
||||
For details of the stats, please refer to :ref:`stats section
|
||||
<sysfs_schemes_stats>`.
|
||||
in the state. Writing ``commit`` to the ``state`` file makes kdamond reads the
|
||||
user inputs in the sysfs files except ``state`` file again. Writing
|
||||
``update_schemes_stats`` to ``state`` file updates the contents of stats files
|
||||
for each DAMON-based operation scheme of the kdamond. For details of the
|
||||
stats, please refer to :ref:`stats section <sysfs_schemes_stats>`.
|
||||
|
||||
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
|
||||
|
||||
@ -143,17 +144,28 @@ be written to the file.
|
||||
contexts/<N>/
|
||||
-------------
|
||||
|
||||
In each context directory, one file (``operations``) and three directories
|
||||
(``monitoring_attrs``, ``targets``, and ``schemes``) exist.
|
||||
In each context directory, two files (``avail_operations`` and ``operations``)
|
||||
and three directories (``monitoring_attrs``, ``targets``, and ``schemes``)
|
||||
exist.
|
||||
|
||||
DAMON supports multiple types of monitoring operations, including those for
|
||||
virtual address space and the physical address space. You can set and get what
|
||||
type of monitoring operations DAMON will use for the context by writing one of
|
||||
below keywords to, and reading from the file.
|
||||
virtual address space and the physical address space. You can get the list of
|
||||
available monitoring operations set on the currently running kernel by reading
|
||||
``avail_operations`` file. Based on the kernel configuration, the file will
|
||||
list some or all of below keywords.
|
||||
|
||||
- vaddr: Monitor virtual address spaces of specific processes
|
||||
- fvaddr: Monitor fixed virtual address ranges
|
||||
- paddr: Monitor the physical address space of the system
|
||||
|
||||
Please refer to :ref:`regions sysfs directory <sysfs_regions>` for detailed
|
||||
differences between the operations sets in terms of the monitoring target
|
||||
regions.
|
||||
|
||||
You can set and get what type of monitoring operations DAMON will use for the
|
||||
context by writing one of the keywords listed in ``avail_operations`` file and
|
||||
reading from the ``operations`` file.
|
||||
|
||||
contexts/<N>/monitoring_attrs/
|
||||
------------------------------
|
||||
|
||||
@ -192,6 +204,8 @@ If you wrote ``vaddr`` to the ``contexts/<N>/operations``, each target should
|
||||
be a process. You can specify the process to DAMON by writing the pid of the
|
||||
process to the ``pid_target`` file.
|
||||
|
||||
.. _sysfs_regions:
|
||||
|
||||
targets/<N>/regions
|
||||
-------------------
|
||||
|
||||
@ -202,9 +216,10 @@ can be covered. However, users could want to set the initial monitoring region
|
||||
to specific address ranges.
|
||||
|
||||
In contrast, DAMON do not automatically sets and updates the monitoring target
|
||||
regions when ``paddr`` monitoring operations set is being used (``paddr`` is
|
||||
written to the ``contexts/<N>/operations``). Therefore, users should set the
|
||||
monitoring target regions by themselves in the case.
|
||||
regions when ``fvaddr`` or ``paddr`` monitoring operations sets are being used
|
||||
(``fvaddr`` or ``paddr`` have written to the ``contexts/<N>/operations``).
|
||||
Therefore, users should set the monitoring target regions by themselves in the
|
||||
cases.
|
||||
|
||||
For such cases, users can explicitly set the initial monitoring target regions
|
||||
as they want, by writing proper values to the files under this directory.
|
||||
|
@ -164,7 +164,7 @@ default_hugepagesz
|
||||
will all result in 256 2M huge pages being allocated. Valid default
|
||||
huge page size is architecture dependent.
|
||||
hugetlb_free_vmemmap
|
||||
When CONFIG_HUGETLB_PAGE_FREE_VMEMMAP is set, this enables freeing
|
||||
When CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP is set, this enables optimizing
|
||||
unused vmemmap pages associated with each HugeTLB page.
|
||||
|
||||
When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages``
|
||||
|
@ -184,6 +184,24 @@ The maximum possible ``pages_sharing/pages_shared`` ratio is limited by the
|
||||
``max_page_sharing`` tunable. To increase the ratio ``max_page_sharing`` must
|
||||
be increased accordingly.
|
||||
|
||||
Monitoring KSM events
|
||||
=====================
|
||||
|
||||
There are some counters in /proc/vmstat that may be used to monitor KSM events.
|
||||
KSM might help save memory, it's a tradeoff by may suffering delay on KSM COW
|
||||
or on swapping in copy. Those events could help users evaluate whether or how
|
||||
to use KSM. For example, if cow_ksm increases too fast, user may decrease the
|
||||
range of madvise(, , MADV_MERGEABLE).
|
||||
|
||||
cow_ksm
|
||||
is incremented every time a KSM page triggers copy on write (COW)
|
||||
when users try to write to a KSM page, we have to make a copy.
|
||||
|
||||
ksm_swpin_copy
|
||||
is incremented every time a KSM page is copied when swapping in
|
||||
note that KSM page might be copied when swapping in because do_swap_page()
|
||||
cannot do all the locking needed to reconstitute a cross-anon_vma KSM page.
|
||||
|
||||
--
|
||||
Izik Eidus,
|
||||
Hugh Dickins, 17 Nov 2009
|
||||
|
@ -62,6 +62,7 @@ Currently, these files are in /proc/sys/vm:
|
||||
- overcommit_memory
|
||||
- overcommit_ratio
|
||||
- page-cluster
|
||||
- page_lock_unfairness
|
||||
- panic_on_oom
|
||||
- percpu_pagelist_high_fraction
|
||||
- stat_interval
|
||||
@ -561,6 +562,45 @@ Change the minimum size of the hugepage pool.
|
||||
See Documentation/admin-guide/mm/hugetlbpage.rst
|
||||
|
||||
|
||||
hugetlb_optimize_vmemmap
|
||||
========================
|
||||
|
||||
This knob is not available when memory_hotplug.memmap_on_memory (kernel parameter)
|
||||
is configured or the size of 'struct page' (a structure defined in
|
||||
include/linux/mm_types.h) is not power of two (an unusual system config could
|
||||
result in this).
|
||||
|
||||
Enable (set to 1) or disable (set to 0) the feature of optimizing vmemmap pages
|
||||
associated with each HugeTLB page.
|
||||
|
||||
Once enabled, the vmemmap pages of subsequent allocation of HugeTLB pages from
|
||||
buddy allocator will be optimized (7 pages per 2MB HugeTLB page and 4095 pages
|
||||
per 1GB HugeTLB page), whereas already allocated HugeTLB pages will not be
|
||||
optimized. When those optimized HugeTLB pages are freed from the HugeTLB pool
|
||||
to the buddy allocator, the vmemmap pages representing that range needs to be
|
||||
remapped again and the vmemmap pages discarded earlier need to be rellocated
|
||||
again. If your use case is that HugeTLB pages are allocated 'on the fly' (e.g.
|
||||
never explicitly allocating HugeTLB pages with 'nr_hugepages' but only set
|
||||
'nr_overcommit_hugepages', those overcommitted HugeTLB pages are allocated 'on
|
||||
the fly') instead of being pulled from the HugeTLB pool, you should weigh the
|
||||
benefits of memory savings against the more overhead (~2x slower than before)
|
||||
of allocation or freeing HugeTLB pages between the HugeTLB pool and the buddy
|
||||
allocator. Another behavior to note is that if the system is under heavy memory
|
||||
pressure, it could prevent the user from freeing HugeTLB pages from the HugeTLB
|
||||
pool to the buddy allocator since the allocation of vmemmap pages could be
|
||||
failed, you have to retry later if your system encounter this situation.
|
||||
|
||||
Once disabled, the vmemmap pages of subsequent allocation of HugeTLB pages from
|
||||
buddy allocator will not be optimized meaning the extra overhead at allocation
|
||||
time from buddy allocator disappears, whereas already optimized HugeTLB pages
|
||||
will not be affected. If you want to make sure there are no optimized HugeTLB
|
||||
pages, you can set "nr_hugepages" to 0 first and then disable this. Note that
|
||||
writing 0 to nr_hugepages will make any "in use" HugeTLB pages become surplus
|
||||
pages. So, those surplus pages are still optimized until they are no longer
|
||||
in use. You would need to wait for those surplus pages to be released before
|
||||
there are no optimized pages in the system.
|
||||
|
||||
|
||||
nr_hugepages_mempolicy
|
||||
======================
|
||||
|
||||
@ -754,6 +794,14 @@ extra faults and I/O delays for following faults if they would have been part of
|
||||
that consecutive pages readahead would have brought in.
|
||||
|
||||
|
||||
page_lock_unfairness
|
||||
====================
|
||||
|
||||
This value determines the number of times that the page lock can be
|
||||
stolen from under a waiter. After the lock is stolen the number of times
|
||||
specified in this file (default is 5), the "fair lock handoff" semantics
|
||||
will apply, and the waiter will only be awakened if the lock can be taken.
|
||||
|
||||
panic_on_oom
|
||||
============
|
||||
|
||||
|
@ -290,6 +290,8 @@ infrastructure:
|
||||
+------------------------------+---------+---------+
|
||||
| RPRES | [7-4] | y |
|
||||
+------------------------------+---------+---------+
|
||||
| WFXT | [3-0] | y |
|
||||
+------------------------------+---------+---------+
|
||||
|
||||
|
||||
Appendix I: Example
|
||||
|
@ -297,6 +297,10 @@ HWCAP2_SME_FA64
|
||||
|
||||
Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
|
||||
|
||||
HWCAP2_WFXT
|
||||
|
||||
Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010.
|
||||
|
||||
4. Unused AT_HWCAP bits
|
||||
-----------------------
|
||||
|
||||
|
@ -4,39 +4,76 @@ The Kernel Address Sanitizer (KASAN)
|
||||
Overview
|
||||
--------
|
||||
|
||||
KernelAddressSANitizer (KASAN) is a dynamic memory safety error detector
|
||||
designed to find out-of-bound and use-after-free bugs. KASAN has three modes:
|
||||
Kernel Address Sanitizer (KASAN) is a dynamic memory safety error detector
|
||||
designed to find out-of-bounds and use-after-free bugs.
|
||||
|
||||
1. generic KASAN (similar to userspace ASan),
|
||||
2. software tag-based KASAN (similar to userspace HWASan),
|
||||
3. hardware tag-based KASAN (based on hardware memory tagging).
|
||||
KASAN has three modes:
|
||||
|
||||
Generic KASAN is mainly used for debugging due to a large memory overhead.
|
||||
Software tag-based KASAN can be used for dogfood testing as it has a lower
|
||||
memory overhead that allows using it with real workloads. Hardware tag-based
|
||||
KASAN comes with low memory and performance overheads and, therefore, can be
|
||||
used in production. Either as an in-field memory bug detector or as a security
|
||||
mitigation.
|
||||
1. Generic KASAN
|
||||
2. Software Tag-Based KASAN
|
||||
3. Hardware Tag-Based KASAN
|
||||
|
||||
Software KASAN modes (#1 and #2) use compile-time instrumentation to insert
|
||||
validity checks before every memory access and, therefore, require a compiler
|
||||
version that supports that.
|
||||
Generic KASAN, enabled with CONFIG_KASAN_GENERIC, is the mode intended for
|
||||
debugging, similar to userspace ASan. This mode is supported on many CPU
|
||||
architectures, but it has significant performance and memory overheads.
|
||||
|
||||
Generic KASAN is supported in GCC and Clang. With GCC, it requires version
|
||||
8.3.0 or later. Any supported Clang version is compatible, but detection of
|
||||
out-of-bounds accesses for global variables is only supported since Clang 11.
|
||||
Software Tag-Based KASAN or SW_TAGS KASAN, enabled with CONFIG_KASAN_SW_TAGS,
|
||||
can be used for both debugging and dogfood testing, similar to userspace HWASan.
|
||||
This mode is only supported for arm64, but its moderate memory overhead allows
|
||||
using it for testing on memory-restricted devices with real workloads.
|
||||
|
||||
Software tag-based KASAN mode is only supported in Clang.
|
||||
Hardware Tag-Based KASAN or HW_TAGS KASAN, enabled with CONFIG_KASAN_HW_TAGS,
|
||||
is the mode intended to be used as an in-field memory bug detector or as a
|
||||
security mitigation. This mode only works on arm64 CPUs that support MTE
|
||||
(Memory Tagging Extension), but it has low memory and performance overheads and
|
||||
thus can be used in production.
|
||||
|
||||
The hardware KASAN mode (#3) relies on hardware to perform the checks but
|
||||
still requires a compiler version that supports memory tagging instructions.
|
||||
This mode is supported in GCC 10+ and Clang 12+.
|
||||
For details about the memory and performance impact of each KASAN mode, see the
|
||||
descriptions of the corresponding Kconfig options.
|
||||
|
||||
Both software KASAN modes work with SLUB and SLAB memory allocators,
|
||||
while the hardware tag-based KASAN currently only supports SLUB.
|
||||
The Generic and the Software Tag-Based modes are commonly referred to as the
|
||||
software modes. The Software Tag-Based and the Hardware Tag-Based modes are
|
||||
referred to as the tag-based modes.
|
||||
|
||||
Currently, generic KASAN is supported for the x86_64, arm, arm64, xtensa, s390,
|
||||
and riscv architectures, and tag-based KASAN modes are supported only for arm64.
|
||||
Support
|
||||
-------
|
||||
|
||||
Architectures
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and
|
||||
xtensa, and the tag-based KASAN modes are supported only on arm64.
|
||||
|
||||
Compilers
|
||||
~~~~~~~~~
|
||||
|
||||
Software KASAN modes use compile-time instrumentation to insert validity checks
|
||||
before every memory access and thus require a compiler version that provides
|
||||
support for that. The Hardware Tag-Based mode relies on hardware to perform
|
||||
these checks but still requires a compiler version that supports the memory
|
||||
tagging instructions.
|
||||
|
||||
Generic KASAN requires GCC version 8.3.0 or later
|
||||
or any Clang version supported by the kernel.
|
||||
|
||||
Software Tag-Based KASAN requires GCC 11+
|
||||
or any Clang version supported by the kernel.
|
||||
|
||||
Hardware Tag-Based KASAN requires GCC 10+ or Clang 12+.
|
||||
|
||||
Memory types
|
||||
~~~~~~~~~~~~
|
||||
|
||||
Generic KASAN supports finding bugs in all of slab, page_alloc, vmap, vmalloc,
|
||||
stack, and global memory.
|
||||
|
||||
Software Tag-Based KASAN supports slab, page_alloc, vmalloc, and stack memory.
|
||||
|
||||
Hardware Tag-Based KASAN supports slab, page_alloc, and non-executable vmalloc
|
||||
memory.
|
||||
|
||||
For slab, both software KASAN modes support SLUB and SLAB allocators, while
|
||||
Hardware Tag-Based KASAN only supports SLUB.
|
||||
|
||||
Usage
|
||||
-----
|
||||
@ -45,18 +82,59 @@ To enable KASAN, configure the kernel with::
|
||||
|
||||
CONFIG_KASAN=y
|
||||
|
||||
and choose between ``CONFIG_KASAN_GENERIC`` (to enable generic KASAN),
|
||||
``CONFIG_KASAN_SW_TAGS`` (to enable software tag-based KASAN), and
|
||||
``CONFIG_KASAN_HW_TAGS`` (to enable hardware tag-based KASAN).
|
||||
and choose between ``CONFIG_KASAN_GENERIC`` (to enable Generic KASAN),
|
||||
``CONFIG_KASAN_SW_TAGS`` (to enable Software Tag-Based KASAN), and
|
||||
``CONFIG_KASAN_HW_TAGS`` (to enable Hardware Tag-Based KASAN).
|
||||
|
||||
For software modes, also choose between ``CONFIG_KASAN_OUTLINE`` and
|
||||
For the software modes, also choose between ``CONFIG_KASAN_OUTLINE`` and
|
||||
``CONFIG_KASAN_INLINE``. Outline and inline are compiler instrumentation types.
|
||||
The former produces a smaller binary while the latter is 1.1-2 times faster.
|
||||
The former produces a smaller binary while the latter is up to 2 times faster.
|
||||
|
||||
To include alloc and free stack traces of affected slab objects into reports,
|
||||
enable ``CONFIG_STACKTRACE``. To include alloc and free stack traces of affected
|
||||
physical pages, enable ``CONFIG_PAGE_OWNER`` and boot with ``page_owner=on``.
|
||||
|
||||
Boot parameters
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
KASAN is affected by the generic ``panic_on_warn`` command line parameter.
|
||||
When it is enabled, KASAN panics the kernel after printing a bug report.
|
||||
|
||||
By default, KASAN prints a bug report only for the first invalid memory access.
|
||||
With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This
|
||||
effectively disables ``panic_on_warn`` for KASAN reports.
|
||||
|
||||
Alternatively, independent of ``panic_on_warn``, the ``kasan.fault=`` boot
|
||||
parameter can be used to control panic and reporting behaviour:
|
||||
|
||||
- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
|
||||
report or also panic the kernel (default: ``report``). The panic happens even
|
||||
if ``kasan_multi_shot`` is enabled.
|
||||
|
||||
Hardware Tag-Based KASAN mode (see the section about various modes below) is
|
||||
intended for use in production as a security mitigation. Therefore, it supports
|
||||
additional boot parameters that allow disabling KASAN or controlling features:
|
||||
|
||||
- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
|
||||
|
||||
- ``kasan.mode=sync``, ``=async`` or ``=asymm`` controls whether KASAN
|
||||
is configured in synchronous, asynchronous or asymmetric mode of
|
||||
execution (default: ``sync``).
|
||||
Synchronous mode: a bad access is detected immediately when a tag
|
||||
check fault occurs.
|
||||
Asynchronous mode: a bad access detection is delayed. When a tag check
|
||||
fault occurs, the information is stored in hardware (in the TFSR_EL1
|
||||
register for arm64). The kernel periodically checks the hardware and
|
||||
only reports tag faults during these checks.
|
||||
Asymmetric mode: a bad access is detected synchronously on reads and
|
||||
asynchronously on writes.
|
||||
|
||||
- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
|
||||
allocations (default: ``on``).
|
||||
|
||||
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
|
||||
traces collection (default: ``on``).
|
||||
|
||||
Error reports
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
@ -146,7 +224,7 @@ is either 8 or 16 aligned bytes depending on KASAN mode. Each number in the
|
||||
memory state section of the report shows the state of one of the memory
|
||||
granules that surround the accessed address.
|
||||
|
||||
For generic KASAN, the size of each memory granule is 8. The state of each
|
||||
For Generic KASAN, the size of each memory granule is 8. The state of each
|
||||
granule is encoded in one shadow byte. Those 8 bytes can be accessible,
|
||||
partially accessible, freed, or be a part of a redzone. KASAN uses the following
|
||||
encoding for each shadow byte: 00 means that all 8 bytes of the corresponding
|
||||
@ -171,47 +249,6 @@ traces point to places in code that interacted with the object but that are not
|
||||
directly present in the bad access stack trace. Currently, this includes
|
||||
call_rcu() and workqueue queuing.
|
||||
|
||||
Boot parameters
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
KASAN is affected by the generic ``panic_on_warn`` command line parameter.
|
||||
When it is enabled, KASAN panics the kernel after printing a bug report.
|
||||
|
||||
By default, KASAN prints a bug report only for the first invalid memory access.
|
||||
With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This
|
||||
effectively disables ``panic_on_warn`` for KASAN reports.
|
||||
|
||||
Alternatively, independent of ``panic_on_warn`` the ``kasan.fault=`` boot
|
||||
parameter can be used to control panic and reporting behaviour:
|
||||
|
||||
- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
|
||||
report or also panic the kernel (default: ``report``). The panic happens even
|
||||
if ``kasan_multi_shot`` is enabled.
|
||||
|
||||
Hardware tag-based KASAN mode (see the section about various modes below) is
|
||||
intended for use in production as a security mitigation. Therefore, it supports
|
||||
additional boot parameters that allow disabling KASAN or controlling features:
|
||||
|
||||
- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
|
||||
|
||||
- ``kasan.mode=sync``, ``=async`` or ``=asymm`` controls whether KASAN
|
||||
is configured in synchronous, asynchronous or asymmetric mode of
|
||||
execution (default: ``sync``).
|
||||
Synchronous mode: a bad access is detected immediately when a tag
|
||||
check fault occurs.
|
||||
Asynchronous mode: a bad access detection is delayed. When a tag check
|
||||
fault occurs, the information is stored in hardware (in the TFSR_EL1
|
||||
register for arm64). The kernel periodically checks the hardware and
|
||||
only reports tag faults during these checks.
|
||||
Asymmetric mode: a bad access is detected synchronously on reads and
|
||||
asynchronously on writes.
|
||||
|
||||
- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
|
||||
allocations (default: ``on``).
|
||||
|
||||
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
|
||||
traces collection (default: ``on``).
|
||||
|
||||
Implementation details
|
||||
----------------------
|
||||
|
||||
@ -250,49 +287,46 @@ outline-instrumented kernel.
|
||||
Generic KASAN is the only mode that delays the reuse of freed objects via
|
||||
quarantine (see mm/kasan/quarantine.c for implementation).
|
||||
|
||||
Software tag-based KASAN
|
||||
Software Tag-Based KASAN
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Software tag-based KASAN uses a software memory tagging approach to checking
|
||||
Software Tag-Based KASAN uses a software memory tagging approach to checking
|
||||
access validity. It is currently only implemented for the arm64 architecture.
|
||||
|
||||
Software tag-based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs
|
||||
Software Tag-Based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs
|
||||
to store a pointer tag in the top byte of kernel pointers. It uses shadow memory
|
||||
to store memory tags associated with each 16-byte memory cell (therefore, it
|
||||
dedicates 1/16th of the kernel memory for shadow memory).
|
||||
|
||||
On each memory allocation, software tag-based KASAN generates a random tag, tags
|
||||
On each memory allocation, Software Tag-Based KASAN generates a random tag, tags
|
||||
the allocated memory with this tag, and embeds the same tag into the returned
|
||||
pointer.
|
||||
|
||||
Software tag-based KASAN uses compile-time instrumentation to insert checks
|
||||
Software Tag-Based KASAN uses compile-time instrumentation to insert checks
|
||||
before each memory access. These checks make sure that the tag of the memory
|
||||
that is being accessed is equal to the tag of the pointer that is used to access
|
||||
this memory. In case of a tag mismatch, software tag-based KASAN prints a bug
|
||||
this memory. In case of a tag mismatch, Software Tag-Based KASAN prints a bug
|
||||
report.
|
||||
|
||||
Software tag-based KASAN also has two instrumentation modes (outline, which
|
||||
Software Tag-Based KASAN also has two instrumentation modes (outline, which
|
||||
emits callbacks to check memory accesses; and inline, which performs the shadow
|
||||
memory checks inline). With outline instrumentation mode, a bug report is
|
||||
printed from the function that performs the access check. With inline
|
||||
instrumentation, a ``brk`` instruction is emitted by the compiler, and a
|
||||
dedicated ``brk`` handler is used to print bug reports.
|
||||
|
||||
Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
Software Tag-Based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||
reserved to tag freed memory regions.
|
||||
|
||||
Software tag-based KASAN currently only supports tagging of slab, page_alloc,
|
||||
and vmalloc memory.
|
||||
|
||||
Hardware tag-based KASAN
|
||||
Hardware Tag-Based KASAN
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Hardware tag-based KASAN is similar to the software mode in concept but uses
|
||||
Hardware Tag-Based KASAN is similar to the software mode in concept but uses
|
||||
hardware memory tagging support instead of compiler instrumentation and
|
||||
shadow memory.
|
||||
|
||||
Hardware tag-based KASAN is currently only implemented for arm64 architecture
|
||||
Hardware Tag-Based KASAN is currently only implemented for arm64 architecture
|
||||
and based on both arm64 Memory Tagging Extension (MTE) introduced in ARMv8.5
|
||||
Instruction Set Architecture and Top Byte Ignore (TBI).
|
||||
|
||||
@ -302,21 +336,18 @@ access, hardware makes sure that the tag of the memory that is being accessed is
|
||||
equal to the tag of the pointer that is used to access this memory. In case of a
|
||||
tag mismatch, a fault is generated, and a report is printed.
|
||||
|
||||
Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
Hardware Tag-Based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||
reserved to tag freed memory regions.
|
||||
|
||||
Hardware tag-based KASAN currently only supports tagging of slab, page_alloc,
|
||||
and VM_ALLOC-based vmalloc memory.
|
||||
|
||||
If the hardware does not support MTE (pre ARMv8.5), hardware tag-based KASAN
|
||||
If the hardware does not support MTE (pre ARMv8.5), Hardware Tag-Based KASAN
|
||||
will not be enabled. In this case, all KASAN boot parameters are ignored.
|
||||
|
||||
Note that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being
|
||||
enabled. Even when ``kasan.mode=off`` is provided or when the hardware does not
|
||||
support MTE (but supports TBI).
|
||||
|
||||
Hardware tag-based KASAN only reports the first found bug. After that, MTE tag
|
||||
Hardware Tag-Based KASAN only reports the first found bug. After that, MTE tag
|
||||
checking gets disabled.
|
||||
|
||||
Shadow memory
|
||||
@ -414,19 +445,18 @@ generic ``noinstr`` one.
|
||||
Note that disabling compiler instrumentation (either on a per-file or a
|
||||
per-function basis) makes KASAN ignore the accesses that happen directly in
|
||||
that code for software KASAN modes. It does not help when the accesses happen
|
||||
indirectly (through calls to instrumented functions) or with the hardware
|
||||
tag-based mode that does not use compiler instrumentation.
|
||||
indirectly (through calls to instrumented functions) or with Hardware
|
||||
Tag-Based KASAN, which does not use compiler instrumentation.
|
||||
|
||||
For software KASAN modes, to disable KASAN reports in a part of the kernel code
|
||||
for the current task, annotate this part of the code with a
|
||||
``kasan_disable_current()``/``kasan_enable_current()`` section. This also
|
||||
disables the reports for indirect accesses that happen through function calls.
|
||||
|
||||
For tag-based KASAN modes (include the hardware one), to disable access
|
||||
checking, use ``kasan_reset_tag()`` or ``page_kasan_tag_reset()``. Note that
|
||||
temporarily disabling access checking via ``page_kasan_tag_reset()`` requires
|
||||
saving and restoring the per-page KASAN tag via
|
||||
``page_kasan_tag``/``page_kasan_tag_set``.
|
||||
For tag-based KASAN modes, to disable access checking, use
|
||||
``kasan_reset_tag()`` or ``page_kasan_tag_reset()``. Note that temporarily
|
||||
disabling access checking via ``page_kasan_tag_reset()`` requires saving and
|
||||
restoring the per-page KASAN tag via ``page_kasan_tag``/``page_kasan_tag_set``.
|
||||
|
||||
Tests
|
||||
~~~~~
|
||||
|
@ -0,0 +1,64 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/gpio/gpio-consumer-common.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Common GPIO lines
|
||||
|
||||
maintainers:
|
||||
- Bartosz Golaszewski <brgl@bgdev.pl>
|
||||
- Linus Walleij <linus.walleij@linaro.org>
|
||||
|
||||
description:
|
||||
Pay attention to using proper GPIO flag (e.g. GPIO_ACTIVE_LOW) for the GPIOs
|
||||
using inverted signal (e.g. RESETN).
|
||||
|
||||
select: true
|
||||
|
||||
properties:
|
||||
enable-gpios:
|
||||
maxItems: 1
|
||||
description:
|
||||
GPIO connected to the enable control pin.
|
||||
|
||||
reset-gpios:
|
||||
description:
|
||||
GPIO (or GPIOs for power sequence) connected to the device reset pin
|
||||
(e.g. RESET or RESETN).
|
||||
|
||||
powerdown-gpios:
|
||||
maxItems: 1
|
||||
description:
|
||||
GPIO connected to the power down pin (hardware power down or power cut,
|
||||
e.g. PD or PWDN).
|
||||
|
||||
pwdn-gpios:
|
||||
maxItems: 1
|
||||
description: Use powerdown-gpios
|
||||
deprecated: true
|
||||
|
||||
wakeup-gpios:
|
||||
maxItems: 1
|
||||
description:
|
||||
GPIO connected to the pin waking up the device from suspend or other
|
||||
power-saving modes.
|
||||
|
||||
allOf:
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- mmc-pwrseq-simple
|
||||
then:
|
||||
properties:
|
||||
reset-gpios:
|
||||
minItems: 1
|
||||
maxItems: 32
|
||||
else:
|
||||
properties:
|
||||
reset-gpios:
|
||||
maxItems: 1
|
||||
|
||||
additionalProperties: true
|
@ -30,6 +30,7 @@ properties:
|
||||
- maxim,max7325
|
||||
- maxim,max7326
|
||||
- maxim,max7327
|
||||
- nxp,pca6408
|
||||
- nxp,pca6416
|
||||
- nxp,pca9505
|
||||
- nxp,pca9506
|
||||
|
@ -28,10 +28,11 @@ properties:
|
||||
- enum:
|
||||
- realtek,rtl8380-gpio
|
||||
- realtek,rtl8390-gpio
|
||||
- realtek,rtl9300-gpio
|
||||
- realtek,rtl9310-gpio
|
||||
- const: realtek,otto-gpio
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
reg: true
|
||||
|
||||
"#gpio-cells":
|
||||
const: 2
|
||||
@ -50,6 +51,23 @@ properties:
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: realtek,rtl9300-gpio
|
||||
then:
|
||||
properties:
|
||||
reg:
|
||||
items:
|
||||
- description: GPIO and interrupt control
|
||||
- description: interrupt CPU map
|
||||
else:
|
||||
properties:
|
||||
reg:
|
||||
items:
|
||||
- description: GPIO and interrupt control
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
@ -74,5 +92,17 @@ examples:
|
||||
interrupt-parent = <&rtlintc>;
|
||||
interrupts = <23>;
|
||||
};
|
||||
- |
|
||||
gpio@3300 {
|
||||
compatible = "realtek,rtl9300-gpio", "realtek,otto-gpio";
|
||||
reg = <0x3300 0x1c>, <0x3338 0x8>;
|
||||
gpio-controller;
|
||||
#gpio-cells = <2>;
|
||||
ngpios = <24>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <2>;
|
||||
interrupt-parent = <&rtlintc>;
|
||||
interrupts = <13>;
|
||||
};
|
||||
|
||||
...
|
||||
|
@ -51,6 +51,11 @@ properties:
|
||||
- items:
|
||||
- const: renesas,gpio-r8a779a0 # R-Car V3U
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- renesas,gpio-r8a779f0 # R-Car S4-8
|
||||
- const: renesas,rcar-gen4-gpio # R-Car Gen4
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
|
@ -52,6 +52,23 @@ properties:
|
||||
<child-interrupt-base parent-interrupt-base length> triplets.
|
||||
$ref: /schemas/types.yaml#/definitions/uint32-matrix
|
||||
|
||||
patternProperties:
|
||||
"^.+-hog(-[0-9]+)?$":
|
||||
type: object
|
||||
properties:
|
||||
gpio-hog: true
|
||||
gpios: true
|
||||
input: true
|
||||
output-high: true
|
||||
output-low: true
|
||||
line-name: true
|
||||
|
||||
required:
|
||||
- gpio-hog
|
||||
- gpios
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
@ -11,14 +11,15 @@ maintainers:
|
||||
|
||||
description: |
|
||||
The MTK ADSP mailbox Inter-Processor Communication (IPC) enables the SoC
|
||||
to ommunicate with ADSP by passing messages through two mailbox channels.
|
||||
to communicate with ADSP by passing messages through two mailbox channels.
|
||||
The MTK ADSP mailbox IPC also provides the ability for one processor to
|
||||
signal the other processor using interrupts.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: mediatek,mt8195-adsp-mbox
|
||||
enum:
|
||||
- mediatek,mt8195-adsp-mbox
|
||||
- mediatek,mt8186-adsp-mbox
|
||||
|
||||
"#mbox-cells":
|
||||
const: 0
|
||||
|
@ -26,6 +26,15 @@ description: |
|
||||
second cell is used to identify the mailbox that the client is going
|
||||
to use.
|
||||
|
||||
For shared mailboxes, the first cell composed of two fields:
|
||||
- bits 15..8:
|
||||
A bit mask of flags that further specifies the type of shared
|
||||
mailbox to be used (based on the data size). If no flag is
|
||||
specified then, 32-bit shared mailbox is used.
|
||||
- bits 7..0:
|
||||
Defines the type of the mailbox to be used. This field should be
|
||||
TEGRA_HSP_MBOX_TYPE_SM for shared mailboxes.
|
||||
|
||||
For doorbells, the second cell specifies the index of the doorbell to
|
||||
use.
|
||||
|
||||
|
@ -62,23 +62,14 @@ additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/mailbox/qcom-ipcc.h>
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/mailbox/qcom-ipcc.h>
|
||||
|
||||
mailbox@408000 {
|
||||
compatible = "qcom,sm8250-ipcc", "qcom,ipcc";
|
||||
reg = <0x408000 0x1000>;
|
||||
interrupts = <GIC_SPI 229 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <3>;
|
||||
#mbox-cells = <2>;
|
||||
};
|
||||
|
||||
smp2p-modem {
|
||||
compatible = "qcom,smp2p";
|
||||
interrupts-extended = <&ipcc_mproc IPCC_CLIENT_MPSS
|
||||
IPCC_MPROC_SIGNAL_SMP2P IRQ_TYPE_EDGE_RISING>;
|
||||
mboxes = <&ipcc_mproc IPCC_CLIENT_MPSS IPCC_MPROC_SIGNAL_SMP2P>;
|
||||
|
||||
/* Other SMP2P fields */
|
||||
};
|
||||
mailbox@408000 {
|
||||
compatible = "qcom,sm8250-ipcc", "qcom,ipcc";
|
||||
reg = <0x408000 0x1000>;
|
||||
interrupts = <GIC_SPI 229 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <3>;
|
||||
#mbox-cells = <2>;
|
||||
};
|
||||
|
@ -30,15 +30,11 @@ properties:
|
||||
items:
|
||||
- description: rx channel occupied
|
||||
- description: tx channel free
|
||||
- description: wakeup source
|
||||
minItems: 2
|
||||
|
||||
interrupt-names:
|
||||
items:
|
||||
- const: rx
|
||||
- const: tx
|
||||
- const: wakeup
|
||||
minItems: 2
|
||||
|
||||
wakeup-source: true
|
||||
|
||||
@ -70,10 +66,9 @@ examples:
|
||||
#mbox-cells = <1>;
|
||||
reg = <0x4c001000 0x400>;
|
||||
st,proc-id = <0>;
|
||||
interrupts-extended = <&intc GIC_SPI 100 IRQ_TYPE_NONE>,
|
||||
<&intc GIC_SPI 101 IRQ_TYPE_NONE>,
|
||||
<&aiec 62 1>;
|
||||
interrupt-names = "rx", "tx", "wakeup";
|
||||
interrupts-extended = <&exti 61 1>,
|
||||
<&intc GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-names = "rx", "tx";
|
||||
clocks = <&rcc_clk IPCC>;
|
||||
wakeup-source;
|
||||
};
|
||||
|
@ -258,8 +258,9 @@ prototypes::
|
||||
int (*launder_folio)(struct folio *);
|
||||
bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
|
||||
int (*error_remove_page)(struct address_space *, struct page *);
|
||||
int (*swap_activate)(struct file *);
|
||||
int (*swap_activate)(struct swap_info_struct *sis, struct file *f, sector_t *span)
|
||||
int (*swap_deactivate)(struct file *);
|
||||
int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
|
||||
|
||||
locking rules:
|
||||
All except dirty_folio and free_folio may block
|
||||
@ -287,6 +288,7 @@ is_partially_uptodate: yes
|
||||
error_remove_page: yes
|
||||
swap_activate: no
|
||||
swap_deactivate: no
|
||||
swap_rw: yes, unlocks
|
||||
====================== ======================== ========= ===============
|
||||
|
||||
->write_begin(), ->write_end() and ->read_folio() may be called from
|
||||
@ -386,15 +388,19 @@ cleaned, or an error value if not. Note that in order to prevent the folio
|
||||
getting mapped back in and redirtied, it needs to be kept locked
|
||||
across the entire operation.
|
||||
|
||||
->swap_activate will be called with a non-zero argument on
|
||||
files backing (non block device backed) swapfiles. A return value
|
||||
of zero indicates success, in which case this file can be used for
|
||||
backing swapspace. The swapspace operations will be proxied to the
|
||||
address space operations.
|
||||
->swap_activate() will be called to prepare the given file for swap. It
|
||||
should perform any validation and preparation necessary to ensure that
|
||||
writes can be performed with minimal memory allocation. It should call
|
||||
add_swap_extent(), or the helper iomap_swapfile_activate(), and return
|
||||
the number of extents added. If IO should be submitted through
|
||||
->swap_rw(), it should set SWP_FS_OPS, otherwise IO will be submitted
|
||||
directly to the block device ``sis->bdev``.
|
||||
|
||||
->swap_deactivate() will be called in the sys_swapoff()
|
||||
path after ->swap_activate() returned success.
|
||||
|
||||
->swap_rw will be called for swap IO if SWP_FS_OPS was set by ->swap_activate().
|
||||
|
||||
file_lock_operations
|
||||
====================
|
||||
|
||||
|
@ -942,56 +942,73 @@ can be substantial. In many cases there are other means to find out
|
||||
additional memory using subsystem specific interfaces, for instance
|
||||
/proc/net/sockstat for TCP memory allocations.
|
||||
|
||||
The following is from a 16GB PIII, which has highmem enabled.
|
||||
You may not have all of these fields.
|
||||
Example output. You may not have all of these fields.
|
||||
|
||||
::
|
||||
|
||||
> cat /proc/meminfo
|
||||
|
||||
MemTotal: 16344972 kB
|
||||
MemFree: 13634064 kB
|
||||
MemAvailable: 14836172 kB
|
||||
Buffers: 3656 kB
|
||||
Cached: 1195708 kB
|
||||
SwapCached: 0 kB
|
||||
Active: 891636 kB
|
||||
Inactive: 1077224 kB
|
||||
HighTotal: 15597528 kB
|
||||
HighFree: 13629632 kB
|
||||
LowTotal: 747444 kB
|
||||
LowFree: 4432 kB
|
||||
SwapTotal: 0 kB
|
||||
SwapFree: 0 kB
|
||||
Dirty: 968 kB
|
||||
Writeback: 0 kB
|
||||
AnonPages: 861800 kB
|
||||
Mapped: 280372 kB
|
||||
Shmem: 644 kB
|
||||
KReclaimable: 168048 kB
|
||||
Slab: 284364 kB
|
||||
SReclaimable: 159856 kB
|
||||
SUnreclaim: 124508 kB
|
||||
PageTables: 24448 kB
|
||||
NFS_Unstable: 0 kB
|
||||
Bounce: 0 kB
|
||||
WritebackTmp: 0 kB
|
||||
CommitLimit: 7669796 kB
|
||||
Committed_AS: 100056 kB
|
||||
VmallocTotal: 112216 kB
|
||||
VmallocUsed: 428 kB
|
||||
VmallocChunk: 111088 kB
|
||||
Percpu: 62080 kB
|
||||
HardwareCorrupted: 0 kB
|
||||
AnonHugePages: 49152 kB
|
||||
ShmemHugePages: 0 kB
|
||||
ShmemPmdMapped: 0 kB
|
||||
MemTotal: 32858820 kB
|
||||
MemFree: 21001236 kB
|
||||
MemAvailable: 27214312 kB
|
||||
Buffers: 581092 kB
|
||||
Cached: 5587612 kB
|
||||
SwapCached: 0 kB
|
||||
Active: 3237152 kB
|
||||
Inactive: 7586256 kB
|
||||
Active(anon): 94064 kB
|
||||
Inactive(anon): 4570616 kB
|
||||
Active(file): 3143088 kB
|
||||
Inactive(file): 3015640 kB
|
||||
Unevictable: 0 kB
|
||||
Mlocked: 0 kB
|
||||
SwapTotal: 0 kB
|
||||
SwapFree: 0 kB
|
||||
Zswap: 1904 kB
|
||||
Zswapped: 7792 kB
|
||||
Dirty: 12 kB
|
||||
Writeback: 0 kB
|
||||
AnonPages: 4654780 kB
|
||||
Mapped: 266244 kB
|
||||
Shmem: 9976 kB
|
||||
KReclaimable: 517708 kB
|
||||
Slab: 660044 kB
|
||||
SReclaimable: 517708 kB
|
||||
SUnreclaim: 142336 kB
|
||||
KernelStack: 11168 kB
|
||||
PageTables: 20540 kB
|
||||
NFS_Unstable: 0 kB
|
||||
Bounce: 0 kB
|
||||
WritebackTmp: 0 kB
|
||||
CommitLimit: 16429408 kB
|
||||
Committed_AS: 7715148 kB
|
||||
VmallocTotal: 34359738367 kB
|
||||
VmallocUsed: 40444 kB
|
||||
VmallocChunk: 0 kB
|
||||
Percpu: 29312 kB
|
||||
HardwareCorrupted: 0 kB
|
||||
AnonHugePages: 4149248 kB
|
||||
ShmemHugePages: 0 kB
|
||||
ShmemPmdMapped: 0 kB
|
||||
FileHugePages: 0 kB
|
||||
FilePmdMapped: 0 kB
|
||||
CmaTotal: 0 kB
|
||||
CmaFree: 0 kB
|
||||
HugePages_Total: 0
|
||||
HugePages_Free: 0
|
||||
HugePages_Rsvd: 0
|
||||
HugePages_Surp: 0
|
||||
Hugepagesize: 2048 kB
|
||||
Hugetlb: 0 kB
|
||||
DirectMap4k: 401152 kB
|
||||
DirectMap2M: 10008576 kB
|
||||
DirectMap1G: 24117248 kB
|
||||
|
||||
MemTotal
|
||||
Total usable RAM (i.e. physical RAM minus a few reserved
|
||||
bits and the kernel binary code)
|
||||
MemFree
|
||||
The sum of LowFree+HighFree
|
||||
Total free RAM. On highmem systems, the sum of LowFree+HighFree
|
||||
MemAvailable
|
||||
An estimate of how much memory is available for starting new
|
||||
applications, without swapping. Calculated from MemFree,
|
||||
@ -1005,8 +1022,9 @@ Buffers
|
||||
Relatively temporary storage for raw disk blocks
|
||||
shouldn't get tremendously large (20MB or so)
|
||||
Cached
|
||||
in-memory cache for files read from the disk (the
|
||||
pagecache). Doesn't include SwapCached
|
||||
In-memory cache for files read from the disk (the
|
||||
pagecache) as well as tmpfs & shmem.
|
||||
Doesn't include SwapCached.
|
||||
SwapCached
|
||||
Memory that once was swapped out, is swapped back in but
|
||||
still also is in the swapfile (if memory is needed it
|
||||
@ -1018,6 +1036,11 @@ Active
|
||||
Inactive
|
||||
Memory which has been less recently used. It is more
|
||||
eligible to be reclaimed for other purposes
|
||||
Unevictable
|
||||
Memory allocated for userspace which cannot be reclaimed, such
|
||||
as mlocked pages, ramfs backing pages, secret memfd pages etc.
|
||||
Mlocked
|
||||
Memory locked with mlock().
|
||||
HighTotal, HighFree
|
||||
Highmem is all memory above ~860MB of physical memory.
|
||||
Highmem areas are for use by userspace programs, or
|
||||
@ -1034,26 +1057,20 @@ SwapTotal
|
||||
SwapFree
|
||||
Memory which has been evicted from RAM, and is temporarily
|
||||
on the disk
|
||||
Zswap
|
||||
Memory consumed by the zswap backend (compressed size)
|
||||
Zswapped
|
||||
Amount of anonymous memory stored in zswap (original size)
|
||||
Dirty
|
||||
Memory which is waiting to get written back to the disk
|
||||
Writeback
|
||||
Memory which is actively being written back to the disk
|
||||
AnonPages
|
||||
Non-file backed pages mapped into userspace page tables
|
||||
HardwareCorrupted
|
||||
The amount of RAM/memory in KB, the kernel identifies as
|
||||
corrupted.
|
||||
AnonHugePages
|
||||
Non-file backed huge pages mapped into userspace page tables
|
||||
Mapped
|
||||
files which have been mmaped, such as libraries
|
||||
Shmem
|
||||
Total memory used by shared memory (shmem) and tmpfs
|
||||
ShmemHugePages
|
||||
Memory used by shared memory (shmem) and tmpfs allocated
|
||||
with huge pages
|
||||
ShmemPmdMapped
|
||||
Shared memory mapped into userspace with huge pages
|
||||
KReclaimable
|
||||
Kernel allocations that the kernel will attempt to reclaim
|
||||
under memory pressure. Includes SReclaimable (below), and other
|
||||
@ -1064,9 +1081,10 @@ SReclaimable
|
||||
Part of Slab, that might be reclaimed, such as caches
|
||||
SUnreclaim
|
||||
Part of Slab, that cannot be reclaimed on memory pressure
|
||||
KernelStack
|
||||
Memory consumed by the kernel stacks of all tasks
|
||||
PageTables
|
||||
amount of memory dedicated to the lowest level of page
|
||||
tables.
|
||||
Memory consumed by userspace page tables
|
||||
NFS_Unstable
|
||||
Always zero. Previous counted pages which had been written to
|
||||
the server, but has not been committed to stable storage.
|
||||
@ -1098,7 +1116,7 @@ Committed_AS
|
||||
has been allocated by processes, even if it has not been
|
||||
"used" by them as of yet. A process which malloc()'s 1G
|
||||
of memory, but only touches 300M of it will show up as
|
||||
using 1G. This 1G is memory which has been "committed" to
|
||||
using 1G. This 1G is memory which has been "committed" to
|
||||
by the VM and can be used at any time by the allocating
|
||||
application. With strict overcommit enabled on the system
|
||||
(mode 2 in 'vm.overcommit_memory'), allocations which would
|
||||
@ -1107,7 +1125,7 @@ Committed_AS
|
||||
not fail due to lack of memory once that memory has been
|
||||
successfully allocated.
|
||||
VmallocTotal
|
||||
total size of vmalloc memory area
|
||||
total size of vmalloc virtual address space
|
||||
VmallocUsed
|
||||
amount of vmalloc area which is used
|
||||
VmallocChunk
|
||||
@ -1115,6 +1133,30 @@ VmallocChunk
|
||||
Percpu
|
||||
Memory allocated to the percpu allocator used to back percpu
|
||||
allocations. This stat excludes the cost of metadata.
|
||||
HardwareCorrupted
|
||||
The amount of RAM/memory in KB, the kernel identifies as
|
||||
corrupted.
|
||||
AnonHugePages
|
||||
Non-file backed huge pages mapped into userspace page tables
|
||||
ShmemHugePages
|
||||
Memory used by shared memory (shmem) and tmpfs allocated
|
||||
with huge pages
|
||||
ShmemPmdMapped
|
||||
Shared memory mapped into userspace with huge pages
|
||||
FileHugePages
|
||||
Memory used for filesystem data (page cache) allocated
|
||||
with huge pages
|
||||
FilePmdMapped
|
||||
Page cache mapped into userspace with huge pages
|
||||
CmaTotal
|
||||
Memory reserved for the Contiguous Memory Allocator (CMA)
|
||||
CmaFree
|
||||
Free remaining memory in the CMA reserves
|
||||
HugePages_Total, HugePages_Free, HugePages_Rsvd, HugePages_Surp, Hugepagesize, Hugetlb
|
||||
See Documentation/admin-guide/mm/hugetlbpage.rst.
|
||||
DirectMap4k, DirectMap2M, DirectMap1G
|
||||
Breakdown of page table sizes used in the kernel's
|
||||
identity mapping of RAM
|
||||
|
||||
vmallocinfo
|
||||
~~~~~~~~~~~
|
||||
|
@ -749,8 +749,9 @@ cache in your filesystem. The following members are defined:
|
||||
size_t count);
|
||||
void (*is_dirty_writeback)(struct folio *, bool *, bool *);
|
||||
int (*error_remove_page) (struct mapping *mapping, struct page *page);
|
||||
int (*swap_activate)(struct file *);
|
||||
int (*swap_activate)(struct swap_info_struct *sis, struct file *f, sector_t *span)
|
||||
int (*swap_deactivate)(struct file *);
|
||||
int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
|
||||
};
|
||||
|
||||
``writepage``
|
||||
@ -948,15 +949,21 @@ cache in your filesystem. The following members are defined:
|
||||
unless you have them locked or reference counts increased.
|
||||
|
||||
``swap_activate``
|
||||
Called when swapon is used on a file to allocate space if
|
||||
necessary and pin the block lookup information in memory. A
|
||||
return value of zero indicates success, in which case this file
|
||||
can be used to back swapspace.
|
||||
|
||||
Called to prepare the given file for swap. It should perform
|
||||
any validation and preparation necessary to ensure that writes
|
||||
can be performed with minimal memory allocation. It should call
|
||||
add_swap_extent(), or the helper iomap_swapfile_activate(), and
|
||||
return the number of extents added. If IO should be submitted
|
||||
through ->swap_rw(), it should set SWP_FS_OPS, otherwise IO will
|
||||
be submitted directly to the block device ``sis->bdev``.
|
||||
|
||||
``swap_deactivate``
|
||||
Called during swapoff on files where swap_activate was
|
||||
successful.
|
||||
|
||||
``swap_rw``
|
||||
Called to read or write swap pages when SWP_FS_OPS is set.
|
||||
|
||||
The File Object
|
||||
===============
|
||||
|
363
Documentation/firmware-guide/acpi/chromeos-acpi-device.rst
Normal file
363
Documentation/firmware-guide/acpi/chromeos-acpi-device.rst
Normal file
@ -0,0 +1,363 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=====================
|
||||
Chrome OS ACPI Device
|
||||
=====================
|
||||
|
||||
Hardware functionality specific to Chrome OS is exposed through a Chrome OS ACPI device.
|
||||
The plug and play ID of a Chrome OS ACPI device is GGL0001. GGL is a valid PNP ID of Google.
|
||||
PNP ID can be used with the ACPI devices according to the guidelines. The following ACPI
|
||||
objects are supported:
|
||||
|
||||
.. flat-table:: Supported ACPI Objects
|
||||
:widths: 1 2
|
||||
:header-rows: 1
|
||||
|
||||
* - Object
|
||||
- Description
|
||||
|
||||
* - CHSW
|
||||
- Chrome OS switch positions
|
||||
|
||||
* - HWID
|
||||
- Chrome OS hardware ID
|
||||
|
||||
* - FWID
|
||||
- Chrome OS firmware version
|
||||
|
||||
* - FRID
|
||||
- Chrome OS read-only firmware version
|
||||
|
||||
* - BINF
|
||||
- Chrome OS boot information
|
||||
|
||||
* - GPIO
|
||||
- Chrome OS GPIO assignments
|
||||
|
||||
* - VBNV
|
||||
- Chrome OS NVRAM locations
|
||||
|
||||
* - VDTA
|
||||
- Chrome OS verified boot data
|
||||
|
||||
* - FMAP
|
||||
- Chrome OS flashmap base address
|
||||
|
||||
* - MLST
|
||||
- Chrome OS method list
|
||||
|
||||
CHSW (Chrome OS switch positions)
|
||||
=================================
|
||||
This control method returns the switch positions for Chrome OS specific hardware switches.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
Result code:
|
||||
------------
|
||||
An integer containing the switch positions as bitfields:
|
||||
|
||||
.. flat-table::
|
||||
:widths: 1 2
|
||||
|
||||
* - 0x00000002
|
||||
- Recovery button was pressed when x86 firmware booted.
|
||||
|
||||
* - 0x00000004
|
||||
- Recovery button was pressed when EC firmware booted. (required if EC EEPROM is
|
||||
rewritable; otherwise optional)
|
||||
|
||||
* - 0x00000020
|
||||
- Developer switch was enabled when x86 firmware booted.
|
||||
|
||||
* - 0x00000200
|
||||
- Firmware write protection was disabled when x86 firmware booted. (required if
|
||||
firmware write protection is controlled through x86 BIOS; otherwise optional)
|
||||
|
||||
All other bits are reserved and should be set to 0.
|
||||
|
||||
HWID (Chrome OS hardware ID)
|
||||
============================
|
||||
This control method returns the hardware ID for the Chromebook.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
Result code:
|
||||
------------
|
||||
A null-terminated ASCII string containing the hardware ID from the Model-Specific Data area of
|
||||
EEPROM.
|
||||
|
||||
Note that the hardware ID can be up to 256 characters long, including the terminating null.
|
||||
|
||||
FWID (Chrome OS firmware version)
|
||||
=================================
|
||||
This control method returns the firmware version for the rewritable portion of the main
|
||||
processor firmware.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
Result code:
|
||||
------------
|
||||
A null-terminated ASCII string containing the complete firmware version for the rewritable
|
||||
portion of the main processor firmware.
|
||||
|
||||
FRID (Chrome OS read-only firmware version)
|
||||
===========================================
|
||||
This control method returns the firmware version for the read-only portion of the main
|
||||
processor firmware.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
Result code:
|
||||
------------
|
||||
A null-terminated ASCII string containing the complete firmware version for the read-only
|
||||
(bootstrap + recovery ) portion of the main processor firmware.
|
||||
|
||||
BINF (Chrome OS boot information)
|
||||
=================================
|
||||
This control method returns information about the current boot.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
Result code:
|
||||
------------
|
||||
|
||||
.. code-block::
|
||||
|
||||
Package {
|
||||
Reserved1
|
||||
Reserved2
|
||||
Active EC Firmware
|
||||
Active Main Firmware Type
|
||||
Reserved5
|
||||
}
|
||||
|
||||
.. flat-table::
|
||||
:widths: 1 1 2
|
||||
:header-rows: 1
|
||||
|
||||
* - Field
|
||||
- Format
|
||||
- Description
|
||||
|
||||
* - Reserved1
|
||||
- DWORD
|
||||
- Set to 256 (0x100). This indicates this field is no longer used.
|
||||
|
||||
* - Reserved2
|
||||
- DWORD
|
||||
- Set to 256 (0x100). This indicates this field is no longer used.
|
||||
|
||||
* - Active EC firmware
|
||||
- DWORD
|
||||
- The EC firmware which was used during boot.
|
||||
|
||||
- 0 - Read-only (recovery) firmware
|
||||
- 1 - Rewritable firmware.
|
||||
|
||||
Set to 0 if EC firmware is always read-only.
|
||||
|
||||
* - Active Main Firmware Type
|
||||
- DWORD
|
||||
- The main firmware type which was used during boot.
|
||||
|
||||
- 0 - Recovery
|
||||
- 1 - Normal
|
||||
- 2 - Developer
|
||||
- 3 - netboot (factory installation only)
|
||||
|
||||
Other values are reserved.
|
||||
|
||||
* - Reserved5
|
||||
- DWORD
|
||||
- Set to 256 (0x100). This indicates this field is no longer used.
|
||||
|
||||
GPIO (Chrome OS GPIO assignments)
|
||||
=================================
|
||||
This control method returns information about Chrome OS specific GPIO assignments for
|
||||
Chrome OS hardware, so the kernel can directly control that hardware.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
Result code:
|
||||
------------
|
||||
.. code-block::
|
||||
|
||||
Package {
|
||||
Package {
|
||||
// First GPIO assignment
|
||||
Signal Type //DWORD
|
||||
Attributes //DWORD
|
||||
Controller Offset //DWORD
|
||||
Controller Name //ASCIIZ
|
||||
},
|
||||
...
|
||||
Package {
|
||||
// Last GPIO assignment
|
||||
Signal Type //DWORD
|
||||
Attributes //DWORD
|
||||
Controller Offset //DWORD
|
||||
Controller Name //ASCIIZ
|
||||
}
|
||||
}
|
||||
|
||||
Where ASCIIZ means a null-terminated ASCII string.
|
||||
|
||||
.. flat-table::
|
||||
:widths: 1 1 2
|
||||
:header-rows: 1
|
||||
|
||||
* - Field
|
||||
- Format
|
||||
- Description
|
||||
|
||||
* - Signal Type
|
||||
- DWORD
|
||||
- Type of GPIO signal
|
||||
|
||||
- 0x00000001 - Recovery button
|
||||
- 0x00000002 - Developer mode switch
|
||||
- 0x00000003 - Firmware write protection switch
|
||||
- 0x00000100 - Debug header GPIO 0
|
||||
- ...
|
||||
- 0x000001FF - Debug header GPIO 255
|
||||
|
||||
Other values are reserved.
|
||||
|
||||
* - Attributes
|
||||
- DWORD
|
||||
- Signal attributes as bitfields:
|
||||
|
||||
- 0x00000001 - Signal is active-high (for button, a GPIO value
|
||||
of 1 means the button is pressed; for switches, a GPIO value
|
||||
of 1 means the switch is enabled). If this bit is 0, the signal
|
||||
is active low. Set to 0 for debug header GPIOs.
|
||||
|
||||
* - Controller Offset
|
||||
- DWORD
|
||||
- GPIO number on the specified controller.
|
||||
|
||||
* - Controller Name
|
||||
- ASCIIZ
|
||||
- Name of the controller for the GPIO.
|
||||
Currently supported names:
|
||||
"NM10" - Intel NM10 chip
|
||||
|
||||
VBNV (Chrome OS NVRAM locations)
|
||||
================================
|
||||
This control method returns information about the NVRAM (CMOS) locations used to
|
||||
communicate with the BIOS.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
Result code:
|
||||
------------
|
||||
.. code-block::
|
||||
|
||||
Package {
|
||||
NV Storage Block Offset //DWORD
|
||||
NV Storage Block Size //DWORD
|
||||
}
|
||||
|
||||
.. flat-table::
|
||||
:widths: 1 1 2
|
||||
:header-rows: 1
|
||||
|
||||
* - Field
|
||||
- Format
|
||||
- Description
|
||||
|
||||
* - NV Storage Block Offset
|
||||
- DWORD
|
||||
- Offset in CMOS bank 0 of the verified boot non-volatile storage block, counting from
|
||||
the first writable CMOS byte (that is, offset=0 is the byte following the 14 bytes of
|
||||
clock data).
|
||||
|
||||
* - NV Storage Block Size
|
||||
- DWORD
|
||||
- Size in bytes of the verified boot non-volatile storage block.
|
||||
|
||||
FMAP (Chrome OS flashmap address)
|
||||
=================================
|
||||
This control method returns the physical memory address of the start of the main processor
|
||||
firmware flashmap.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
NoneResult code:
|
||||
----------------
|
||||
A DWORD containing the physical memory address of the start of the main processor firmware
|
||||
flashmap.
|
||||
|
||||
VDTA (Chrome OS verified boot data)
|
||||
===================================
|
||||
This control method returns the verified boot data block shared between the firmware
|
||||
verification step and the kernel verification step.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
Result code:
|
||||
------------
|
||||
A buffer containing the verified boot data block.
|
||||
|
||||
MECK (Management Engine Checksum)
|
||||
=================================
|
||||
This control method returns the SHA-1 or SHA-256 hash that is read out of the Management
|
||||
Engine extended registers during boot. The hash is exported via ACPI so the OS can verify that
|
||||
the ME firmware has not changed. If Management Engine is not present, or if the firmware was
|
||||
unable to read the extended registers, this buffer can be zero.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
Result code:
|
||||
------------
|
||||
A buffer containing the ME hash.
|
||||
|
||||
MLST (Chrome OS method list)
|
||||
============================
|
||||
This control method returns a list of the other control methods supported by the Chrome OS
|
||||
hardware device.
|
||||
|
||||
Arguments:
|
||||
----------
|
||||
None
|
||||
|
||||
Result code:
|
||||
------------
|
||||
A package containing a list of null-terminated ASCII strings, one for each control method
|
||||
supported by the Chrome OS hardware device, not including the MLST method itself.
|
||||
For this version of the specification, the result is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
Package {
|
||||
"CHSW",
|
||||
"FWID",
|
||||
"HWID",
|
||||
"FRID",
|
||||
"BINF",
|
||||
"GPIO",
|
||||
"VBNV",
|
||||
"FMAP",
|
||||
"VDTA",
|
||||
"MECK"
|
||||
}
|
@ -29,3 +29,4 @@ ACPI Support
|
||||
non-d0-probe
|
||||
extcon-intel-int3496
|
||||
intel-pmc-mux
|
||||
chromeos-acpi-device
|
||||
|
@ -693,6 +693,8 @@ in documenting basic Kconfig syntax a more precise definition of Kconfig
|
||||
semantics is welcomed. One project deduced Kconfig semantics through
|
||||
the use of the xconfig configurator [1]_. Work should be done to confirm if
|
||||
the deduced semantics matches our intended Kconfig design goals.
|
||||
Another project formalized a denotational semantics of a core subset of
|
||||
the Kconfig language [10]_.
|
||||
|
||||
Having well defined semantics can be useful for tools for practical
|
||||
evaluation of dependencies, for instance one such case was work to
|
||||
@ -700,6 +702,8 @@ express in boolean abstraction of the inferred semantics of Kconfig to
|
||||
translate Kconfig logic into boolean formulas and run a SAT solver on this to
|
||||
find dead code / features (always inactive), 114 dead features were found in
|
||||
Linux using this methodology [1]_ (Section 8: Threats to validity).
|
||||
The kismet tool, based on the semantics in [10]_, finds abuses of reverse
|
||||
dependencies and has led to dozens of committed fixes to Linux Kconfig files [11]_.
|
||||
|
||||
Confirming this could prove useful as Kconfig stands as one of the leading
|
||||
industrial variability modeling languages [1]_ [2]_. Its study would help
|
||||
@ -738,3 +742,5 @@ https://kernelnewbies.org/KernelProjects/kconfig-sat
|
||||
.. [7] https://vamos.cs.fau.de
|
||||
.. [8] https://undertaker.cs.fau.de
|
||||
.. [9] https://www4.cs.fau.de/Publications/2011/tartler_11_eurosys.pdf
|
||||
.. [10] https://paulgazzillo.com/papers/esecfse21.pdf
|
||||
.. [11] https://github.com/paulgazz/kmax
|
||||
|
@ -30,6 +30,8 @@ ignore define LIRC_CAN_REC
|
||||
|
||||
ignore define LIRC_CAN_SEND_MASK
|
||||
ignore define LIRC_CAN_REC_MASK
|
||||
ignore define LIRC_CAN_SET_REC_FILTER
|
||||
ignore define LIRC_CAN_NOTIFY_DECODE
|
||||
|
||||
# Obsolete ioctls
|
||||
|
||||
|
@ -982,12 +982,22 @@ memory.
|
||||
__u8 pad2[30];
|
||||
};
|
||||
|
||||
If the KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag is returned from the
|
||||
KVM_CAP_XEN_HVM check, it may be set in the flags field of this ioctl.
|
||||
This requests KVM to generate the contents of the hypercall page
|
||||
automatically; hypercalls will be intercepted and passed to userspace
|
||||
through KVM_EXIT_XEN. In this case, all of the blob size and address
|
||||
fields must be zero.
|
||||
If certain flags are returned from the KVM_CAP_XEN_HVM check, they may
|
||||
be set in the flags field of this ioctl:
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag requests KVM to generate
|
||||
the contents of the hypercall page automatically; hypercalls will be
|
||||
intercepted and passed to userspace through KVM_EXIT_XEN. In this
|
||||
ase, all of the blob size and address fields must be zero.
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_EVTCHN_SEND flag indicates to KVM that userspace
|
||||
will always use the KVM_XEN_HVM_EVTCHN_SEND ioctl to deliver event
|
||||
channel interrupts rather than manipulating the guest's shared_info
|
||||
structures directly. This, in turn, may allow KVM to enable features
|
||||
such as intercepting the SCHEDOP_poll hypercall to accelerate PV
|
||||
spinlock operation for the guest. Userspace may still use the ioctl
|
||||
to deliver events if it was advertised, even if userspace does not
|
||||
send this indication that it will always do so
|
||||
|
||||
No other flags are currently valid in the struct kvm_xen_hvm_config.
|
||||
|
||||
@ -1476,14 +1486,43 @@ Possible values are:
|
||||
[s390]
|
||||
KVM_MP_STATE_LOAD the vcpu is in a special load/startup state
|
||||
[s390]
|
||||
KVM_MP_STATE_SUSPENDED the vcpu is in a suspend state and is waiting
|
||||
for a wakeup event [arm64]
|
||||
========================== ===============================================
|
||||
|
||||
On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
|
||||
in-kernel irqchip, the multiprocessing state must be maintained by userspace on
|
||||
these architectures.
|
||||
|
||||
For arm64/riscv:
|
||||
^^^^^^^^^^^^^^^^
|
||||
For arm64:
|
||||
^^^^^^^^^^
|
||||
|
||||
If a vCPU is in the KVM_MP_STATE_SUSPENDED state, KVM will emulate the
|
||||
architectural execution of a WFI instruction.
|
||||
|
||||
If a wakeup event is recognized, KVM will exit to userspace with a
|
||||
KVM_SYSTEM_EVENT exit, where the event type is KVM_SYSTEM_EVENT_WAKEUP. If
|
||||
userspace wants to honor the wakeup, it must set the vCPU's MP state to
|
||||
KVM_MP_STATE_RUNNABLE. If it does not, KVM will continue to await a wakeup
|
||||
event in subsequent calls to KVM_RUN.
|
||||
|
||||
.. warning::
|
||||
|
||||
If userspace intends to keep the vCPU in a SUSPENDED state, it is
|
||||
strongly recommended that userspace take action to suppress the
|
||||
wakeup event (such as masking an interrupt). Otherwise, subsequent
|
||||
calls to KVM_RUN will immediately exit with a KVM_SYSTEM_EVENT_WAKEUP
|
||||
event and inadvertently waste CPU cycles.
|
||||
|
||||
Additionally, if userspace takes action to suppress a wakeup event,
|
||||
it is strongly recommended that it also restores the vCPU to its
|
||||
original state when the vCPU is made RUNNABLE again. For example,
|
||||
if userspace masked a pending interrupt to suppress the wakeup,
|
||||
the interrupt should be unmasked before returning control to the
|
||||
guest.
|
||||
|
||||
For riscv:
|
||||
^^^^^^^^^^
|
||||
|
||||
The only states that are valid are KVM_MP_STATE_STOPPED and
|
||||
KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
|
||||
@ -1887,22 +1926,25 @@ the future.
|
||||
4.55 KVM_SET_TSC_KHZ
|
||||
--------------------
|
||||
|
||||
:Capability: KVM_CAP_TSC_CONTROL
|
||||
:Capability: KVM_CAP_TSC_CONTROL / KVM_CAP_VM_TSC_CONTROL
|
||||
:Architectures: x86
|
||||
:Type: vcpu ioctl
|
||||
:Type: vcpu ioctl / vm ioctl
|
||||
:Parameters: virtual tsc_khz
|
||||
:Returns: 0 on success, -1 on error
|
||||
|
||||
Specifies the tsc frequency for the virtual machine. The unit of the
|
||||
frequency is KHz.
|
||||
|
||||
If the KVM_CAP_VM_TSC_CONTROL capability is advertised, this can also
|
||||
be used as a vm ioctl to set the initial tsc frequency of subsequently
|
||||
created vCPUs.
|
||||
|
||||
4.56 KVM_GET_TSC_KHZ
|
||||
--------------------
|
||||
|
||||
:Capability: KVM_CAP_GET_TSC_KHZ
|
||||
:Capability: KVM_CAP_GET_TSC_KHZ / KVM_CAP_VM_TSC_CONTROL
|
||||
:Architectures: x86
|
||||
:Type: vcpu ioctl
|
||||
:Type: vcpu ioctl / vm ioctl
|
||||
:Parameters: none
|
||||
:Returns: virtual tsc-khz on success, negative value on error
|
||||
|
||||
@ -2601,6 +2643,24 @@ EINVAL.
|
||||
After the vcpu's SVE configuration is finalized, further attempts to
|
||||
write this register will fail with EPERM.
|
||||
|
||||
arm64 bitmap feature firmware pseudo-registers have the following bit pattern::
|
||||
|
||||
0x6030 0000 0016 <regno:16>
|
||||
|
||||
The bitmap feature firmware registers exposes the hypercall services that
|
||||
are available for userspace to configure. The set bits corresponds to the
|
||||
services that are available for the guests to access. By default, KVM
|
||||
sets all the supported bits during VM initialization. The userspace can
|
||||
discover the available services via KVM_GET_ONE_REG, and write back the
|
||||
bitmap corresponding to the features that it wishes guests to see via
|
||||
KVM_SET_ONE_REG.
|
||||
|
||||
Note: These registers are immutable once any of the vCPUs of the VM has
|
||||
run at least once. A KVM_SET_ONE_REG in such a scenario will return
|
||||
a -EBUSY to userspace.
|
||||
|
||||
(See Documentation/virt/kvm/arm/hypercalls.rst for more details.)
|
||||
|
||||
|
||||
MIPS registers are mapped using the lower 32 bits. The upper 16 of that is
|
||||
the register group type:
|
||||
@ -3754,12 +3814,18 @@ in case of KVM_S390_MEMOP_F_CHECK_ONLY), the ioctl returns a positive
|
||||
error number indicating the type of exception. This exception is also
|
||||
raised directly at the corresponding VCPU if the flag
|
||||
KVM_S390_MEMOP_F_INJECT_EXCEPTION is set.
|
||||
On protection exceptions, unless specified otherwise, the injected
|
||||
translation-exception identifier (TEID) indicates suppression.
|
||||
|
||||
If the KVM_S390_MEMOP_F_SKEY_PROTECTION flag is set, storage key
|
||||
protection is also in effect and may cause exceptions if accesses are
|
||||
prohibited given the access key designated by "key"; the valid range is 0..15.
|
||||
KVM_S390_MEMOP_F_SKEY_PROTECTION is available if KVM_CAP_S390_MEM_OP_EXTENSION
|
||||
is > 0.
|
||||
Since the accessed memory may span multiple pages and those pages might have
|
||||
different storage keys, it is possible that a protection exception occurs
|
||||
after memory has been modified. In this case, if the exception is injected,
|
||||
the TEID does not indicate suppression.
|
||||
|
||||
Absolute read/write:
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
@ -5216,7 +5282,25 @@ have deterministic behavior.
|
||||
struct {
|
||||
__u64 gfn;
|
||||
} shared_info;
|
||||
__u64 pad[4];
|
||||
struct {
|
||||
__u32 send_port;
|
||||
__u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
|
||||
__u32 flags;
|
||||
union {
|
||||
struct {
|
||||
__u32 port;
|
||||
__u32 vcpu;
|
||||
__u32 priority;
|
||||
} port;
|
||||
struct {
|
||||
__u32 port; /* Zero for eventfd */
|
||||
__s32 fd;
|
||||
} eventfd;
|
||||
__u32 padding[4];
|
||||
} deliver;
|
||||
} evtchn;
|
||||
__u32 xen_version;
|
||||
__u64 pad[8];
|
||||
} u;
|
||||
};
|
||||
|
||||
@ -5247,6 +5331,30 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
|
||||
|
||||
KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
|
||||
Sets the exception vector used to deliver Xen event channel upcalls.
|
||||
This is the HVM-wide vector injected directly by the hypervisor
|
||||
(not through the local APIC), typically configured by a guest via
|
||||
HVM_PARAM_CALLBACK_IRQ.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_EVTCHN
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It configures
|
||||
an outbound port number for interception of EVTCHNOP_send requests
|
||||
from the guest. A given sending port number may be directed back
|
||||
to a specified vCPU (by APIC ID) / port / priority on the guest,
|
||||
or to trigger events on an eventfd. The vCPU and priority can be
|
||||
changed by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call,
|
||||
but other fields cannot change for a given sending port. A port
|
||||
mapping is removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags
|
||||
field.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_XEN_VERSION
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It configures
|
||||
the 32-bit version code returned to the guest when it invokes the
|
||||
XENVER_version call; typically (XEN_MAJOR << 16 | XEN_MINOR). PV
|
||||
Xen guests will often use this to as a dummy hypercall to trigger
|
||||
event channel delivery, so responding within the kernel without
|
||||
exiting to userspace is beneficial.
|
||||
|
||||
4.127 KVM_XEN_HVM_GET_ATTR
|
||||
--------------------------
|
||||
@ -5258,7 +5366,8 @@ KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
Allows Xen VM attributes to be read. For the structure and types,
|
||||
see KVM_XEN_HVM_SET_ATTR above.
|
||||
see KVM_XEN_HVM_SET_ATTR above. The KVM_XEN_ATTR_TYPE_EVTCHN
|
||||
attribute cannot be read.
|
||||
|
||||
4.128 KVM_XEN_VCPU_SET_ATTR
|
||||
---------------------------
|
||||
@ -5285,6 +5394,13 @@ see KVM_XEN_HVM_SET_ATTR above.
|
||||
__u64 time_blocked;
|
||||
__u64 time_offline;
|
||||
} runstate;
|
||||
__u32 vcpu_id;
|
||||
struct {
|
||||
__u32 port;
|
||||
__u32 priority;
|
||||
__u64 expires_ns;
|
||||
} timer;
|
||||
__u8 vector;
|
||||
} u;
|
||||
};
|
||||
|
||||
@ -5326,6 +5442,27 @@ KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
|
||||
or RUNSTATE_offline) to set the current accounted state as of the
|
||||
adjusted state_entry_time.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the Xen
|
||||
vCPU ID of the given vCPU, to allow timer-related VCPU operations to
|
||||
be intercepted by KVM.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_TIMER
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the
|
||||
event channel port/priority for the VIRQ_TIMER of the vCPU, as well
|
||||
as allowing a pending timer to be saved/restored.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the
|
||||
per-vCPU local APIC upcall vector, configured by a Xen guest with
|
||||
the HVMOP_set_evtchn_upcall_vector hypercall. This is typically
|
||||
used by Windows guests, and is distinct from the HVM-wide upcall
|
||||
vector configured with HVM_PARAM_CALLBACK_IRQ.
|
||||
|
||||
|
||||
4.129 KVM_XEN_VCPU_GET_ATTR
|
||||
---------------------------
|
||||
|
||||
@ -5645,6 +5782,25 @@ enabled with ``arch_prctl()``, but this may change in the future.
|
||||
The offsets of the state save areas in struct kvm_xsave follow the contents
|
||||
of CPUID leaf 0xD on the host.
|
||||
|
||||
4.135 KVM_XEN_HVM_EVTCHN_SEND
|
||||
-----------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND
|
||||
:Architectures: x86
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_irq_routing_xen_evtchn
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
|
||||
::
|
||||
|
||||
struct kvm_irq_routing_xen_evtchn {
|
||||
__u32 port;
|
||||
__u32 vcpu;
|
||||
__u32 priority;
|
||||
};
|
||||
|
||||
This ioctl injects an event channel interrupt directly to the guest vCPU.
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
@ -5987,6 +6143,9 @@ should put the acknowledged interrupt vector into the 'epr' field.
|
||||
#define KVM_SYSTEM_EVENT_SHUTDOWN 1
|
||||
#define KVM_SYSTEM_EVENT_RESET 2
|
||||
#define KVM_SYSTEM_EVENT_CRASH 3
|
||||
#define KVM_SYSTEM_EVENT_WAKEUP 4
|
||||
#define KVM_SYSTEM_EVENT_SUSPEND 5
|
||||
#define KVM_SYSTEM_EVENT_SEV_TERM 6
|
||||
__u32 type;
|
||||
__u32 ndata;
|
||||
__u64 data[16];
|
||||
@ -6011,6 +6170,13 @@ Valid values for 'type' are:
|
||||
has requested a crash condition maintenance. Userspace can choose
|
||||
to ignore the request, or to gather VM memory core dump and/or
|
||||
reset/shutdown of the VM.
|
||||
- KVM_SYSTEM_EVENT_SEV_TERM -- an AMD SEV guest requested termination.
|
||||
The guest physical address of the guest's GHCB is stored in `data[0]`.
|
||||
- KVM_SYSTEM_EVENT_WAKEUP -- the exiting vCPU is in a suspended state and
|
||||
KVM has recognized a wakeup event. Userspace may honor this event by
|
||||
marking the exiting vCPU as runnable, or deny it and call KVM_RUN again.
|
||||
- KVM_SYSTEM_EVENT_SUSPEND -- the guest has requested a suspension of
|
||||
the VM.
|
||||
|
||||
If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain
|
||||
architecture specific information for the system-level event. Only
|
||||
@ -6027,6 +6193,32 @@ Previous versions of Linux defined a `flags` member in this struct. The
|
||||
field is now aliased to `data[0]`. Userspace can assume that it is only
|
||||
written if ndata is greater than 0.
|
||||
|
||||
For arm/arm64:
|
||||
--------------
|
||||
|
||||
KVM_SYSTEM_EVENT_SUSPEND exits are enabled with the
|
||||
KVM_CAP_ARM_SYSTEM_SUSPEND VM capability. If a guest invokes the PSCI
|
||||
SYSTEM_SUSPEND function, KVM will exit to userspace with this event
|
||||
type.
|
||||
|
||||
It is the sole responsibility of userspace to implement the PSCI
|
||||
SYSTEM_SUSPEND call according to ARM DEN0022D.b 5.19 "SYSTEM_SUSPEND".
|
||||
KVM does not change the vCPU's state before exiting to userspace, so
|
||||
the call parameters are left in-place in the vCPU registers.
|
||||
|
||||
Userspace is _required_ to take action for such an exit. It must
|
||||
either:
|
||||
|
||||
- Honor the guest request to suspend the VM. Userspace can request
|
||||
in-kernel emulation of suspension by setting the calling vCPU's
|
||||
state to KVM_MP_STATE_SUSPENDED. Userspace must configure the vCPU's
|
||||
state according to the parameters passed to the PSCI function when
|
||||
the calling vCPU is resumed. See ARM DEN0022D.b 5.19.1 "Intended use"
|
||||
for details on the function parameters.
|
||||
|
||||
- Deny the guest request to suspend the VM. See ARM DEN0022D.b 5.19.2
|
||||
"Caller responsibilities" for possible return values.
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_IOAPIC_EOI */
|
||||
@ -7147,6 +7339,15 @@ The valid bits in cap.args[0] are:
|
||||
Additionally, when this quirk is disabled,
|
||||
KVM clears CPUID.01H:ECX[bit 3] if
|
||||
IA32_MISC_ENABLE[bit 18] is cleared.
|
||||
|
||||
KVM_X86_QUIRK_FIX_HYPERCALL_INSN By default, KVM rewrites guest
|
||||
VMMCALL/VMCALL instructions to match the
|
||||
vendor's hypercall instruction for the
|
||||
system. When this quirk is disabled, KVM
|
||||
will no longer rewrite invalid guest
|
||||
hypercall instructions. Executing the
|
||||
incorrect hypercall instruction will
|
||||
generate a #UD within the guest.
|
||||
=================================== ============================================
|
||||
|
||||
8. Other capabilities.
|
||||
@ -7624,8 +7825,9 @@ PVHVM guests. Valid flags are::
|
||||
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
|
||||
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
|
||||
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 3)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
|
||||
ioctl is available, for the guest to set its hypercall page.
|
||||
@ -7649,6 +7851,14 @@ The KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL flag indicates that IRQ routing entries
|
||||
of the type KVM_IRQ_ROUTING_XEN_EVTCHN are supported, with the priority
|
||||
field set to indicate 2 level event channel delivery.
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_EVTCHN_SEND flag indicates that KVM supports
|
||||
injecting event channel events directly into the guest with the
|
||||
KVM_XEN_HVM_EVTCHN_SEND ioctl. It also indicates support for the
|
||||
KVM_XEN_ATTR_TYPE_EVTCHN/XEN_VERSION HVM attributes and the
|
||||
KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID/TIMER/UPCALL_VECTOR vCPU attributes.
|
||||
related to event channel delivery, timers, and the XENVER_version
|
||||
interception.
|
||||
|
||||
8.31 KVM_CAP_PPC_MULTITCE
|
||||
-------------------------
|
||||
|
||||
@ -7736,6 +7946,16 @@ At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting
|
||||
this capability will disable PMU virtualization for that VM. Usermode
|
||||
should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
|
||||
|
||||
8.36 KVM_CAP_ARM_SYSTEM_SUSPEND
|
||||
-------------------------------
|
||||
|
||||
:Capability: KVM_CAP_ARM_SYSTEM_SUSPEND
|
||||
:Architectures: arm64
|
||||
:Type: vm
|
||||
|
||||
When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
|
||||
type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
|
||||
|
||||
9. Known KVM API problems
|
||||
=========================
|
||||
|
||||
|
138
Documentation/virt/kvm/arm/hypercalls.rst
Normal file
138
Documentation/virt/kvm/arm/hypercalls.rst
Normal file
@ -0,0 +1,138 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=======================
|
||||
ARM Hypercall Interface
|
||||
=======================
|
||||
|
||||
KVM handles the hypercall services as requested by the guests. New hypercall
|
||||
services are regularly made available by the ARM specification or by KVM (as
|
||||
vendor services) if they make sense from a virtualization point of view.
|
||||
|
||||
This means that a guest booted on two different versions of KVM can observe
|
||||
two different "firmware" revisions. This could cause issues if a given guest
|
||||
is tied to a particular version of a hypercall service, or if a migration
|
||||
causes a different version to be exposed out of the blue to an unsuspecting
|
||||
guest.
|
||||
|
||||
In order to remedy this situation, KVM exposes a set of "firmware
|
||||
pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
|
||||
interface. These registers can be saved/restored by userspace, and set
|
||||
to a convenient value as required.
|
||||
|
||||
The following registers are defined:
|
||||
|
||||
* KVM_REG_ARM_PSCI_VERSION:
|
||||
|
||||
KVM implements the PSCI (Power State Coordination Interface)
|
||||
specification in order to provide services such as CPU on/off, reset
|
||||
and power-off to the guest.
|
||||
|
||||
- Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
|
||||
(and thus has already been initialized)
|
||||
- Returns the current PSCI version on GET_ONE_REG (defaulting to the
|
||||
highest PSCI version implemented by KVM and compatible with v0.2)
|
||||
- Allows any PSCI version implemented by KVM and compatible with
|
||||
v0.2 to be set with SET_ONE_REG
|
||||
- Affects the whole VM (even if the register view is per-vcpu)
|
||||
|
||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
Holds the state of the firmware support to mitigate CVE-2017-5715, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_1 in [1].
|
||||
|
||||
Accepted values are:
|
||||
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
|
||||
KVM does not offer
|
||||
firmware support for the workaround. The mitigation status for the
|
||||
guest is unknown.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
|
||||
The workaround HVC call is
|
||||
available to the guest and required for the mitigation.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
|
||||
The workaround HVC call
|
||||
is available to the guest, but it is not needed on this VCPU.
|
||||
|
||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
Holds the state of the firmware support to mitigate CVE-2018-3639, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_2 in [1]_.
|
||||
|
||||
Accepted values are:
|
||||
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
|
||||
A workaround is not
|
||||
available. KVM does not offer firmware support for the workaround.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
|
||||
The workaround state is
|
||||
unknown. KVM does not offer firmware support for the workaround.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
|
||||
The workaround is available,
|
||||
and can be disabled by a vCPU. If
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
|
||||
this vCPU.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
|
||||
The workaround is always active on this vCPU or it is not needed.
|
||||
|
||||
|
||||
Bitmap Feature Firmware Registers
|
||||
---------------------------------
|
||||
|
||||
Contrary to the above registers, the following registers exposes the
|
||||
hypercall services in the form of a feature-bitmap to the userspace. This
|
||||
bitmap is translated to the services that are available to the guest.
|
||||
There is a register defined per service call owner and can be accessed via
|
||||
GET/SET_ONE_REG interface.
|
||||
|
||||
By default, these registers are set with the upper limit of the features
|
||||
that are supported. This way userspace can discover all the usable
|
||||
hypercall services via GET_ONE_REG. The user-space can write-back the
|
||||
desired bitmap back via SET_ONE_REG. The features for the registers that
|
||||
are untouched, probably because userspace isn't aware of them, will be
|
||||
exposed as is to the guest.
|
||||
|
||||
Note that KVM will not allow the userspace to configure the registers
|
||||
anymore once any of the vCPUs has run at least once. Instead, it will
|
||||
return a -EBUSY.
|
||||
|
||||
The pseudo-firmware bitmap register are as follows:
|
||||
|
||||
* KVM_REG_ARM_STD_BMAP:
|
||||
Controls the bitmap of the ARM Standard Secure Service Calls.
|
||||
|
||||
The following bits are accepted:
|
||||
|
||||
Bit-0: KVM_REG_ARM_STD_BIT_TRNG_V1_0:
|
||||
The bit represents the services offered under v1.0 of ARM True Random
|
||||
Number Generator (TRNG) specification, ARM DEN0098.
|
||||
|
||||
* KVM_REG_ARM_STD_HYP_BMAP:
|
||||
Controls the bitmap of the ARM Standard Hypervisor Service Calls.
|
||||
|
||||
The following bits are accepted:
|
||||
|
||||
Bit-0: KVM_REG_ARM_STD_HYP_BIT_PV_TIME:
|
||||
The bit represents the Paravirtualized Time service as represented by
|
||||
ARM DEN0057A.
|
||||
|
||||
* KVM_REG_ARM_VENDOR_HYP_BMAP:
|
||||
Controls the bitmap of the Vendor specific Hypervisor Service Calls.
|
||||
|
||||
The following bits are accepted:
|
||||
|
||||
Bit-0: KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT
|
||||
The bit represents the ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID
|
||||
and ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID function-ids.
|
||||
|
||||
Bit-1: KVM_REG_ARM_VENDOR_HYP_BIT_PTP:
|
||||
The bit represents the Precision Time Protocol KVM service.
|
||||
|
||||
Errors:
|
||||
|
||||
======= =============================================================
|
||||
-ENOENT Unknown register accessed.
|
||||
-EBUSY Attempt a 'write' to the register after the VM has started.
|
||||
-EINVAL Invalid bitmap written to the register.
|
||||
======= =============================================================
|
||||
|
||||
.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
|
@ -8,6 +8,6 @@ ARM
|
||||
:maxdepth: 2
|
||||
|
||||
hyp-abi
|
||||
psci
|
||||
hypercalls
|
||||
pvtime
|
||||
ptp_kvm
|
||||
|
@ -1,77 +0,0 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=========================================
|
||||
Power State Coordination Interface (PSCI)
|
||||
=========================================
|
||||
|
||||
KVM implements the PSCI (Power State Coordination Interface)
|
||||
specification in order to provide services such as CPU on/off, reset
|
||||
and power-off to the guest.
|
||||
|
||||
The PSCI specification is regularly updated to provide new features,
|
||||
and KVM implements these updates if they make sense from a virtualization
|
||||
point of view.
|
||||
|
||||
This means that a guest booted on two different versions of KVM can
|
||||
observe two different "firmware" revisions. This could cause issues if
|
||||
a given guest is tied to a particular PSCI revision (unlikely), or if
|
||||
a migration causes a different PSCI version to be exposed out of the
|
||||
blue to an unsuspecting guest.
|
||||
|
||||
In order to remedy this situation, KVM exposes a set of "firmware
|
||||
pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
|
||||
interface. These registers can be saved/restored by userspace, and set
|
||||
to a convenient value if required.
|
||||
|
||||
The following register is defined:
|
||||
|
||||
* KVM_REG_ARM_PSCI_VERSION:
|
||||
|
||||
- Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
|
||||
(and thus has already been initialized)
|
||||
- Returns the current PSCI version on GET_ONE_REG (defaulting to the
|
||||
highest PSCI version implemented by KVM and compatible with v0.2)
|
||||
- Allows any PSCI version implemented by KVM and compatible with
|
||||
v0.2 to be set with SET_ONE_REG
|
||||
- Affects the whole VM (even if the register view is per-vcpu)
|
||||
|
||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
Holds the state of the firmware support to mitigate CVE-2017-5715, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_1 in [1].
|
||||
|
||||
Accepted values are:
|
||||
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
|
||||
KVM does not offer
|
||||
firmware support for the workaround. The mitigation status for the
|
||||
guest is unknown.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
|
||||
The workaround HVC call is
|
||||
available to the guest and required for the mitigation.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
|
||||
The workaround HVC call
|
||||
is available to the guest, but it is not needed on this VCPU.
|
||||
|
||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
Holds the state of the firmware support to mitigate CVE-2018-3639, as
|
||||
offered by KVM to the guest via a HVC call. The workaround is described
|
||||
under SMCCC_ARCH_WORKAROUND_2 in [1]_.
|
||||
|
||||
Accepted values are:
|
||||
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
|
||||
A workaround is not
|
||||
available. KVM does not offer firmware support for the workaround.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
|
||||
The workaround state is
|
||||
unknown. KVM does not offer firmware support for the workaround.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
|
||||
The workaround is available,
|
||||
and can be disabled by a vCPU. If
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
|
||||
this vCPU.
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
|
||||
The workaround is always active on this vCPU or it is not needed.
|
||||
|
||||
.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
|
@ -202,6 +202,10 @@ Shadow pages contain the following information:
|
||||
Is 1 if the MMU instance cannot use A/D bits. EPT did not have A/D
|
||||
bits before Haswell; shadow EPT page tables also cannot use A/D bits
|
||||
if the L1 hypervisor does not enable them.
|
||||
role.passthrough:
|
||||
The page is not backed by a guest page table, but its first entry
|
||||
points to one. This is set if NPT uses 5-level page tables (host
|
||||
CR4.LA57=1) and is shadowing L1's 4-level NPT (L1 CR4.LA57=1).
|
||||
gfn:
|
||||
Either the guest page table containing the translations shadowed by this
|
||||
page, or the base page frame for linear translations. See role.direct.
|
||||
|
@ -50,61 +50,74 @@ space when they use mm context tags.
|
||||
Temporary Virtual Mappings
|
||||
==========================
|
||||
|
||||
The kernel contains several ways of creating temporary mappings:
|
||||
The kernel contains several ways of creating temporary mappings. The following
|
||||
list shows them in order of preference of use.
|
||||
|
||||
* vmap(). This can be used to make a long duration mapping of multiple
|
||||
physical pages into a contiguous virtual space. It needs global
|
||||
synchronization to unmap.
|
||||
* kmap_local_page(). This function is used to require short term mappings.
|
||||
It can be invoked from any context (including interrupts) but the mappings
|
||||
can only be used in the context which acquired them.
|
||||
|
||||
* kmap(). This permits a short duration mapping of a single page. It needs
|
||||
global synchronization, but is amortized somewhat. It is also prone to
|
||||
deadlocks when using in a nested fashion, and so it is not recommended for
|
||||
new code.
|
||||
This function should be preferred, where feasible, over all the others.
|
||||
|
||||
These mappings are thread-local and CPU-local, meaning that the mapping
|
||||
can only be accessed from within this thread and the thread is bound the
|
||||
CPU while the mapping is active. Even if the thread is preempted (since
|
||||
preemption is never disabled by the function) the CPU can not be
|
||||
unplugged from the system via CPU-hotplug until the mapping is disposed.
|
||||
|
||||
It's valid to take pagefaults in a local kmap region, unless the context
|
||||
in which the local mapping is acquired does not allow it for other reasons.
|
||||
|
||||
kmap_local_page() always returns a valid virtual address and it is assumed
|
||||
that kunmap_local() will never fail.
|
||||
|
||||
Nesting kmap_local_page() and kmap_atomic() mappings is allowed to a certain
|
||||
extent (up to KMAP_TYPE_NR) but their invocations have to be strictly ordered
|
||||
because the map implementation is stack based. See kmap_local_page() kdocs
|
||||
(included in the "Functions" section) for details on how to manage nested
|
||||
mappings.
|
||||
|
||||
* kmap_atomic(). This permits a very short duration mapping of a single
|
||||
page. Since the mapping is restricted to the CPU that issued it, it
|
||||
performs well, but the issuing task is therefore required to stay on that
|
||||
CPU until it has finished, lest some other task displace its mappings.
|
||||
|
||||
kmap_atomic() may also be used by interrupt contexts, since it is does not
|
||||
sleep and the caller may not sleep until after kunmap_atomic() is called.
|
||||
kmap_atomic() may also be used by interrupt contexts, since it does not
|
||||
sleep and the callers too may not sleep until after kunmap_atomic() is
|
||||
called.
|
||||
|
||||
It may be assumed that k[un]map_atomic() won't fail.
|
||||
Each call of kmap_atomic() in the kernel creates a non-preemptible section
|
||||
and disable pagefaults. This could be a source of unwanted latency. Therefore
|
||||
users should prefer kmap_local_page() instead of kmap_atomic().
|
||||
|
||||
It is assumed that k[un]map_atomic() won't fail.
|
||||
|
||||
Using kmap_atomic
|
||||
=================
|
||||
* kmap(). This should be used to make short duration mapping of a single
|
||||
page with no restrictions on preemption or migration. It comes with an
|
||||
overhead as mapping space is restricted and protected by a global lock
|
||||
for synchronization. When mapping is no longer needed, the address that
|
||||
the page was mapped to must be released with kunmap().
|
||||
|
||||
When and where to use kmap_atomic() is straightforward. It is used when code
|
||||
wants to access the contents of a page that might be allocated from high memory
|
||||
(see __GFP_HIGHMEM), for example a page in the pagecache. The API has two
|
||||
functions, and they can be used in a manner similar to the following::
|
||||
Mapping changes must be propagated across all the CPUs. kmap() also
|
||||
requires global TLB invalidation when the kmap's pool wraps and it might
|
||||
block when the mapping space is fully utilized until a slot becomes
|
||||
available. Therefore, kmap() is only callable from preemptible context.
|
||||
|
||||
/* Find the page of interest. */
|
||||
struct page *page = find_get_page(mapping, offset);
|
||||
All the above work is necessary if a mapping must last for a relatively
|
||||
long time but the bulk of high-memory mappings in the kernel are
|
||||
short-lived and only used in one place. This means that the cost of
|
||||
kmap() is mostly wasted in such cases. kmap() was not intended for long
|
||||
term mappings but it has morphed in that direction and its use is
|
||||
strongly discouraged in newer code and the set of the preceding functions
|
||||
should be preferred.
|
||||
|
||||
/* Gain access to the contents of that page. */
|
||||
void *vaddr = kmap_atomic(page);
|
||||
On 64-bit systems, calls to kmap_local_page(), kmap_atomic() and kmap() have
|
||||
no real work to do because a 64-bit address space is more than sufficient to
|
||||
address all the physical memory whose pages are permanently mapped.
|
||||
|
||||
/* Do something to the contents of that page. */
|
||||
memset(vaddr, 0, PAGE_SIZE);
|
||||
|
||||
/* Unmap that page. */
|
||||
kunmap_atomic(vaddr);
|
||||
|
||||
Note that the kunmap_atomic() call takes the result of the kmap_atomic() call
|
||||
not the argument.
|
||||
|
||||
If you need to map two pages because you want to copy from one page to
|
||||
another you need to keep the kmap_atomic calls strictly nested, like::
|
||||
|
||||
vaddr1 = kmap_atomic(page1);
|
||||
vaddr2 = kmap_atomic(page2);
|
||||
|
||||
memcpy(vaddr1, vaddr2, PAGE_SIZE);
|
||||
|
||||
kunmap_atomic(vaddr2);
|
||||
kunmap_atomic(vaddr1);
|
||||
* vmap(). This can be used to make a long duration mapping of multiple
|
||||
physical pages into a contiguous virtual space. It needs global
|
||||
synchronization to unmap.
|
||||
|
||||
|
||||
Cost of Temporary Mappings
|
||||
@ -145,3 +158,10 @@ The general recommendation is that you don't use more than 8GiB on a 32-bit
|
||||
machine - although more might work for you and your workload, you're pretty
|
||||
much on your own - don't expect kernel developers to really care much if things
|
||||
come apart.
|
||||
|
||||
|
||||
Functions
|
||||
=========
|
||||
|
||||
.. kernel-doc:: include/linux/highmem.h
|
||||
.. kernel-doc:: include/linux/highmem-internal.h
|
||||
|
@ -63,5 +63,6 @@ above structured documentation, or deleted if it has served its purpose.
|
||||
transhuge
|
||||
unevictable-lru
|
||||
vmalloced-kernel-stacks
|
||||
vmemmap_dedup
|
||||
z3fold
|
||||
zsmalloc
|
||||
|
@ -121,6 +121,14 @@ Usage
|
||||
-r Sort by memory release time.
|
||||
-s Sort by stack trace.
|
||||
-t Sort by times (default).
|
||||
--sort <order> Specify sorting order. Sorting syntax is [+|-]key[,[+|-]key[,...]].
|
||||
Choose a key from the **STANDARD FORMAT SPECIFIERS** section. The "+" is
|
||||
optional since default direction is increasing numerical or lexicographic
|
||||
order. Mixed use of abbreviated and complete-form of keys is allowed.
|
||||
|
||||
Examples:
|
||||
./page_owner_sort <input> <output> --sort=n,+pid,-tgid
|
||||
./page_owner_sort <input> <output> --sort=at
|
||||
|
||||
additional function::
|
||||
|
||||
@ -129,7 +137,6 @@ Usage
|
||||
Specify culling rules.Culling syntax is key[,key[,...]].Choose a
|
||||
multi-letter key from the **STANDARD FORMAT SPECIFIERS** section.
|
||||
|
||||
|
||||
<rules> is a single argument in the form of a comma-separated list,
|
||||
which offers a way to specify individual culling rules. The recognized
|
||||
keywords are described in the **STANDARD FORMAT SPECIFIERS** section below.
|
||||
@ -137,7 +144,6 @@ Usage
|
||||
the STANDARD SORT KEYS section below. Mixed use of abbreviated and
|
||||
complete-form of keys is allowed.
|
||||
|
||||
|
||||
Examples:
|
||||
./page_owner_sort <input> <output> --cull=stacktrace
|
||||
./page_owner_sort <input> <output> --cull=st,pid,name
|
||||
@ -147,17 +153,44 @@ Usage
|
||||
-f Filter out the information of blocks whose memory has been released.
|
||||
|
||||
Select:
|
||||
--pid <PID> Select by pid.
|
||||
--tgid <TGID> Select by tgid.
|
||||
--name <command> Select by task command name.
|
||||
--pid <pidlist> Select by pid. This selects the blocks whose process ID
|
||||
numbers appear in <pidlist>.
|
||||
--tgid <tgidlist> Select by tgid. This selects the blocks whose thread
|
||||
group ID numbers appear in <tgidlist>.
|
||||
--name <cmdlist> Select by task command name. This selects the blocks whose
|
||||
task command name appear in <cmdlist>.
|
||||
|
||||
<pidlist>, <tgidlist>, <cmdlist> are single arguments in the form of a comma-separated list,
|
||||
which offers a way to specify individual selecting rules.
|
||||
|
||||
|
||||
Examples:
|
||||
./page_owner_sort <input> <output> --pid=1
|
||||
./page_owner_sort <input> <output> --tgid=1,2,3
|
||||
./page_owner_sort <input> <output> --name name1,name2
|
||||
|
||||
STANDARD FORMAT SPECIFIERS
|
||||
==========================
|
||||
::
|
||||
|
||||
For --sort option:
|
||||
|
||||
KEY LONG DESCRIPTION
|
||||
p pid process ID
|
||||
tg tgid thread group ID
|
||||
n name task command name
|
||||
st stacktrace stack trace of the page allocation
|
||||
T txt full text of block
|
||||
ft free_ts timestamp of the page when it was released
|
||||
at alloc_ts timestamp of the page when it was allocated
|
||||
ator allocator memory allocator for pages
|
||||
|
||||
For --curl option:
|
||||
|
||||
KEY LONG DESCRIPTION
|
||||
p pid process ID
|
||||
tg tgid thread group ID
|
||||
n name task command name
|
||||
f free whether the page has been released or not
|
||||
st stacktrace stace trace of the page allocation
|
||||
st stacktrace stack trace of the page allocation
|
||||
ator allocator memory allocator for pages
|
||||
|
223
Documentation/vm/vmemmap_dedup.rst
Normal file
223
Documentation/vm/vmemmap_dedup.rst
Normal file
@ -0,0 +1,223 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=========================================
|
||||
A vmemmap diet for HugeTLB and Device DAX
|
||||
=========================================
|
||||
|
||||
HugeTLB
|
||||
=======
|
||||
|
||||
The struct page structures (page structs) are used to describe a physical
|
||||
page frame. By default, there is a one-to-one mapping from a page frame to
|
||||
it's corresponding page struct.
|
||||
|
||||
HugeTLB pages consist of multiple base page size pages and is supported by many
|
||||
architectures. See Documentation/admin-guide/mm/hugetlbpage.rst for more
|
||||
details. On the x86-64 architecture, HugeTLB pages of size 2MB and 1GB are
|
||||
currently supported. Since the base page size on x86 is 4KB, a 2MB HugeTLB page
|
||||
consists of 512 base pages and a 1GB HugeTLB page consists of 4096 base pages.
|
||||
For each base page, there is a corresponding page struct.
|
||||
|
||||
Within the HugeTLB subsystem, only the first 4 page structs are used to
|
||||
contain unique information about a HugeTLB page. __NR_USED_SUBPAGE provides
|
||||
this upper limit. The only 'useful' information in the remaining page structs
|
||||
is the compound_head field, and this field is the same for all tail pages.
|
||||
|
||||
By removing redundant page structs for HugeTLB pages, memory can be returned
|
||||
to the buddy allocator for other uses.
|
||||
|
||||
Different architectures support different HugeTLB pages. For example, the
|
||||
following table is the HugeTLB page size supported by x86 and arm64
|
||||
architectures. Because arm64 supports 4k, 16k, and 64k base pages and
|
||||
supports contiguous entries, so it supports many kinds of sizes of HugeTLB
|
||||
page.
|
||||
|
||||
+--------------+-----------+-----------------------------------------------+
|
||||
| Architecture | Page Size | HugeTLB Page Size |
|
||||
+--------------+-----------+-----------+-----------+-----------+-----------+
|
||||
| x86-64 | 4KB | 2MB | 1GB | | |
|
||||
+--------------+-----------+-----------+-----------+-----------+-----------+
|
||||
| | 4KB | 64KB | 2MB | 32MB | 1GB |
|
||||
| +-----------+-----------+-----------+-----------+-----------+
|
||||
| arm64 | 16KB | 2MB | 32MB | 1GB | |
|
||||
| +-----------+-----------+-----------+-----------+-----------+
|
||||
| | 64KB | 2MB | 512MB | 16GB | |
|
||||
+--------------+-----------+-----------+-----------+-----------+-----------+
|
||||
|
||||
When the system boot up, every HugeTLB page has more than one struct page
|
||||
structs which size is (unit: pages)::
|
||||
|
||||
struct_size = HugeTLB_Size / PAGE_SIZE * sizeof(struct page) / PAGE_SIZE
|
||||
|
||||
Where HugeTLB_Size is the size of the HugeTLB page. We know that the size
|
||||
of the HugeTLB page is always n times PAGE_SIZE. So we can get the following
|
||||
relationship::
|
||||
|
||||
HugeTLB_Size = n * PAGE_SIZE
|
||||
|
||||
Then::
|
||||
|
||||
struct_size = n * PAGE_SIZE / PAGE_SIZE * sizeof(struct page) / PAGE_SIZE
|
||||
= n * sizeof(struct page) / PAGE_SIZE
|
||||
|
||||
We can use huge mapping at the pud/pmd level for the HugeTLB page.
|
||||
|
||||
For the HugeTLB page of the pmd level mapping, then::
|
||||
|
||||
struct_size = n * sizeof(struct page) / PAGE_SIZE
|
||||
= PAGE_SIZE / sizeof(pte_t) * sizeof(struct page) / PAGE_SIZE
|
||||
= sizeof(struct page) / sizeof(pte_t)
|
||||
= 64 / 8
|
||||
= 8 (pages)
|
||||
|
||||
Where n is how many pte entries which one page can contains. So the value of
|
||||
n is (PAGE_SIZE / sizeof(pte_t)).
|
||||
|
||||
This optimization only supports 64-bit system, so the value of sizeof(pte_t)
|
||||
is 8. And this optimization also applicable only when the size of struct page
|
||||
is a power of two. In most cases, the size of struct page is 64 bytes (e.g.
|
||||
x86-64 and arm64). So if we use pmd level mapping for a HugeTLB page, the
|
||||
size of struct page structs of it is 8 page frames which size depends on the
|
||||
size of the base page.
|
||||
|
||||
For the HugeTLB page of the pud level mapping, then::
|
||||
|
||||
struct_size = PAGE_SIZE / sizeof(pmd_t) * struct_size(pmd)
|
||||
= PAGE_SIZE / 8 * 8 (pages)
|
||||
= PAGE_SIZE (pages)
|
||||
|
||||
Where the struct_size(pmd) is the size of the struct page structs of a
|
||||
HugeTLB page of the pmd level mapping.
|
||||
|
||||
E.g.: A 2MB HugeTLB page on x86_64 consists in 8 page frames while 1GB
|
||||
HugeTLB page consists in 4096.
|
||||
|
||||
Next, we take the pmd level mapping of the HugeTLB page as an example to
|
||||
show the internal implementation of this optimization. There are 8 pages
|
||||
struct page structs associated with a HugeTLB page which is pmd mapped.
|
||||
|
||||
Here is how things look before optimization::
|
||||
|
||||
HugeTLB struct pages(8 pages) page frame(8 pages)
|
||||
+-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
|
||||
| | | 0 | -------------> | 0 |
|
||||
| | +-----------+ +-----------+
|
||||
| | | 1 | -------------> | 1 |
|
||||
| | +-----------+ +-----------+
|
||||
| | | 2 | -------------> | 2 |
|
||||
| | +-----------+ +-----------+
|
||||
| | | 3 | -------------> | 3 |
|
||||
| | +-----------+ +-----------+
|
||||
| | | 4 | -------------> | 4 |
|
||||
| PMD | +-----------+ +-----------+
|
||||
| level | | 5 | -------------> | 5 |
|
||||
| mapping | +-----------+ +-----------+
|
||||
| | | 6 | -------------> | 6 |
|
||||
| | +-----------+ +-----------+
|
||||
| | | 7 | -------------> | 7 |
|
||||
| | +-----------+ +-----------+
|
||||
| |
|
||||
| |
|
||||
| |
|
||||
+-----------+
|
||||
|
||||
The value of page->compound_head is the same for all tail pages. The first
|
||||
page of page structs (page 0) associated with the HugeTLB page contains the 4
|
||||
page structs necessary to describe the HugeTLB. The only use of the remaining
|
||||
pages of page structs (page 1 to page 7) is to point to page->compound_head.
|
||||
Therefore, we can remap pages 1 to 7 to page 0. Only 1 page of page structs
|
||||
will be used for each HugeTLB page. This will allow us to free the remaining
|
||||
7 pages to the buddy allocator.
|
||||
|
||||
Here is how things look after remapping::
|
||||
|
||||
HugeTLB struct pages(8 pages) page frame(8 pages)
|
||||
+-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
|
||||
| | | 0 | -------------> | 0 |
|
||||
| | +-----------+ +-----------+
|
||||
| | | 1 | ---------------^ ^ ^ ^ ^ ^ ^
|
||||
| | +-----------+ | | | | | |
|
||||
| | | 2 | -----------------+ | | | | |
|
||||
| | +-----------+ | | | | |
|
||||
| | | 3 | -------------------+ | | | |
|
||||
| | +-----------+ | | | |
|
||||
| | | 4 | ---------------------+ | | |
|
||||
| PMD | +-----------+ | | |
|
||||
| level | | 5 | -----------------------+ | |
|
||||
| mapping | +-----------+ | |
|
||||
| | | 6 | -------------------------+ |
|
||||
| | +-----------+ |
|
||||
| | | 7 | ---------------------------+
|
||||
| | +-----------+
|
||||
| |
|
||||
| |
|
||||
| |
|
||||
+-----------+
|
||||
|
||||
When a HugeTLB is freed to the buddy system, we should allocate 7 pages for
|
||||
vmemmap pages and restore the previous mapping relationship.
|
||||
|
||||
For the HugeTLB page of the pud level mapping. It is similar to the former.
|
||||
We also can use this approach to free (PAGE_SIZE - 1) vmemmap pages.
|
||||
|
||||
Apart from the HugeTLB page of the pmd/pud level mapping, some architectures
|
||||
(e.g. aarch64) provides a contiguous bit in the translation table entries
|
||||
that hints to the MMU to indicate that it is one of a contiguous set of
|
||||
entries that can be cached in a single TLB entry.
|
||||
|
||||
The contiguous bit is used to increase the mapping size at the pmd and pte
|
||||
(last) level. So this type of HugeTLB page can be optimized only when its
|
||||
size of the struct page structs is greater than 1 page.
|
||||
|
||||
Notice: The head vmemmap page is not freed to the buddy allocator and all
|
||||
tail vmemmap pages are mapped to the head vmemmap page frame. So we can see
|
||||
more than one struct page struct with PG_head (e.g. 8 per 2 MB HugeTLB page)
|
||||
associated with each HugeTLB page. The compound_head() can handle this
|
||||
correctly (more details refer to the comment above compound_head()).
|
||||
|
||||
Device DAX
|
||||
==========
|
||||
|
||||
The device-dax interface uses the same tail deduplication technique explained
|
||||
in the previous chapter, except when used with the vmemmap in
|
||||
the device (altmap).
|
||||
|
||||
The following page sizes are supported in DAX: PAGE_SIZE (4K on x86_64),
|
||||
PMD_SIZE (2M on x86_64) and PUD_SIZE (1G on x86_64).
|
||||
|
||||
The differences with HugeTLB are relatively minor.
|
||||
|
||||
It only use 3 page structs for storing all information as opposed
|
||||
to 4 on HugeTLB pages.
|
||||
|
||||
There's no remapping of vmemmap given that device-dax memory is not part of
|
||||
System RAM ranges initialized at boot. Thus the tail page deduplication
|
||||
happens at a later stage when we populate the sections. HugeTLB reuses the
|
||||
the head vmemmap page representing, whereas device-dax reuses the tail
|
||||
vmemmap page. This results in only half of the savings compared to HugeTLB.
|
||||
|
||||
Deduplicated tail pages are not mapped read-only.
|
||||
|
||||
Here's how things look like on device-dax after the sections are populated::
|
||||
|
||||
+-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
|
||||
| | | 0 | -------------> | 0 |
|
||||
| | +-----------+ +-----------+
|
||||
| | | 1 | -------------> | 1 |
|
||||
| | +-----------+ +-----------+
|
||||
| | | 2 | ----------------^ ^ ^ ^ ^ ^
|
||||
| | +-----------+ | | | | |
|
||||
| | | 3 | ------------------+ | | | |
|
||||
| | +-----------+ | | | |
|
||||
| | | 4 | --------------------+ | | |
|
||||
| PMD | +-----------+ | | |
|
||||
| level | | 5 | ----------------------+ | |
|
||||
| mapping | +-----------+ | |
|
||||
| | | 6 | ------------------------+ |
|
||||
| | +-----------+ |
|
||||
| | | 7 | --------------------------+
|
||||
| | +-----------+
|
||||
| |
|
||||
| |
|
||||
| |
|
||||
+-----------+
|
14
MAINTAINERS
14
MAINTAINERS
@ -5027,6 +5027,7 @@ F: Documentation/admin-guide/cgroup-v1/
|
||||
F: Documentation/admin-guide/cgroup-v2.rst
|
||||
F: include/linux/cgroup*
|
||||
F: kernel/cgroup/
|
||||
F: tools/testing/selftests/cgroup/
|
||||
|
||||
CONTROL GROUP - BLOCK IO CONTROLLER (BLKIO)
|
||||
M: Tejun Heo <tj@kernel.org>
|
||||
@ -5060,6 +5061,8 @@ L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: mm/memcontrol.c
|
||||
F: mm/swap_cgroup.c
|
||||
F: tools/testing/selftests/cgroup/test_kmem.c
|
||||
F: tools/testing/selftests/cgroup/test_memcontrol.c
|
||||
|
||||
CORETEMP HARDWARE MONITORING DRIVER
|
||||
M: Fenghua Yu <fenghua.yu@intel.com>
|
||||
@ -9064,16 +9067,20 @@ S: Orphan
|
||||
F: Documentation/networking/device_drivers/ethernet/huawei/hinic.rst
|
||||
F: drivers/net/ethernet/huawei/hinic/
|
||||
|
||||
HUGETLB FILESYSTEM
|
||||
HUGETLB SUBSYSTEM
|
||||
M: Mike Kravetz <mike.kravetz@oracle.com>
|
||||
M: Muchun Song <songmuchun@bytedance.com>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages
|
||||
F: Documentation/admin-guide/mm/hugetlbpage.rst
|
||||
F: Documentation/vm/hugetlbfs_reserv.rst
|
||||
F: Documentation/vm/vmemmap_dedup.rst
|
||||
F: fs/hugetlbfs/
|
||||
F: include/linux/hugetlb.h
|
||||
F: mm/hugetlb.c
|
||||
F: mm/hugetlb_vmemmap.c
|
||||
F: mm/hugetlb_vmemmap.h
|
||||
|
||||
HVA ST MEDIA DRIVER
|
||||
M: Jean-Christophe Trotin <jean-christophe.trotin@foss.st.com>
|
||||
@ -10830,6 +10837,8 @@ T: git git://github.com/kvm-riscv/linux.git
|
||||
F: arch/riscv/include/asm/kvm*
|
||||
F: arch/riscv/include/uapi/asm/kvm*
|
||||
F: arch/riscv/kvm/
|
||||
F: tools/testing/selftests/kvm/*/riscv/
|
||||
F: tools/testing/selftests/kvm/riscv/
|
||||
|
||||
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
|
||||
M: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
@ -10844,9 +10853,12 @@ F: Documentation/virt/kvm/s390*
|
||||
F: arch/s390/include/asm/gmap.h
|
||||
F: arch/s390/include/asm/kvm*
|
||||
F: arch/s390/include/uapi/asm/kvm*
|
||||
F: arch/s390/include/uapi/asm/uvdevice.h
|
||||
F: arch/s390/kernel/uv.c
|
||||
F: arch/s390/kvm/
|
||||
F: arch/s390/mm/gmap.c
|
||||
F: drivers/s390/char/uvdevice.c
|
||||
F: tools/testing/selftests/drivers/s390x/uvdevice/
|
||||
F: tools/testing/selftests/kvm/*/s390x/
|
||||
F: tools/testing/selftests/kvm/s390x/
|
||||
|
||||
|
16
Makefile
16
Makefile
@ -454,6 +454,7 @@ else
|
||||
HOSTCC = gcc
|
||||
HOSTCXX = g++
|
||||
endif
|
||||
HOSTPKG_CONFIG = pkg-config
|
||||
|
||||
KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
|
||||
-O2 -fomit-frame-pointer -std=gnu11 \
|
||||
@ -551,7 +552,7 @@ KBUILD_LDFLAGS_MODULE :=
|
||||
KBUILD_LDFLAGS :=
|
||||
CLANG_FLAGS :=
|
||||
|
||||
export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
|
||||
export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC HOSTPKG_CONFIG
|
||||
export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
|
||||
export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
|
||||
export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
|
||||
@ -1331,11 +1332,12 @@ scripts_unifdef: scripts_basic
|
||||
# Install
|
||||
|
||||
# Many distributions have the custom install script, /sbin/installkernel.
|
||||
# If DKMS is installed, 'make install' will eventually recuses back
|
||||
# to the this Makefile to build and install external modules.
|
||||
# If DKMS is installed, 'make install' will eventually recurse back
|
||||
# to this Makefile to build and install external modules.
|
||||
# Cancel sub_make_done so that options such as M=, V=, etc. are parsed.
|
||||
|
||||
install: sub_make_done :=
|
||||
quiet_cmd_install = INSTALL $(INSTALL_PATH)
|
||||
cmd_install = unset sub_make_done; $(srctree)/scripts/install.sh
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tools
|
||||
@ -1691,6 +1693,7 @@ help:
|
||||
@echo ' 1: warnings which may be relevant and do not occur too often'
|
||||
@echo ' 2: warnings which occur quite often but may still be relevant'
|
||||
@echo ' 3: more obscure warnings, can most likely be ignored'
|
||||
@echo ' e: warnings are being treated as errors'
|
||||
@echo ' Multiple levels can be combined with W=12 or W=123'
|
||||
@echo ''
|
||||
@echo 'Execute "make" or "make all" to build all targets marked with [*] '
|
||||
@ -1835,7 +1838,8 @@ ifdef single-build
|
||||
|
||||
# .ko is special because modpost is needed
|
||||
single-ko := $(sort $(filter %.ko, $(MAKECMDGOALS)))
|
||||
single-no-ko := $(sort $(patsubst %.ko,%.mod, $(MAKECMDGOALS)))
|
||||
single-no-ko := $(filter-out $(single-ko), $(MAKECMDGOALS)) \
|
||||
$(foreach x, o mod, $(patsubst %.ko, %.$x, $(single-ko)))
|
||||
|
||||
$(single-ko): single_modpost
|
||||
@:
|
||||
@ -1891,7 +1895,7 @@ clean: $(clean-dirs)
|
||||
-o -name '*.ko.*' \
|
||||
-o -name '*.dtb' -o -name '*.dtbo' -o -name '*.dtb.S' -o -name '*.dt.yaml' \
|
||||
-o -name '*.dwo' -o -name '*.lst' \
|
||||
-o -name '*.su' -o -name '*.mod' \
|
||||
-o -name '*.su' -o -name '*.mod' -o -name '*.usyms' \
|
||||
-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
|
||||
-o -name '*.lex.c' -o -name '*.tab.[ch]' \
|
||||
-o -name '*.asn1.[ch]' \
|
||||
|
1
OWNERS
1
OWNERS
@ -8,5 +8,6 @@ adelva@google.com
|
||||
gregkh@google.com
|
||||
maennich@google.com
|
||||
saravanak@google.com
|
||||
smuckle@google.com
|
||||
surenb@google.com
|
||||
tkjos@google.com
|
||||
|
@ -18,7 +18,7 @@ extern void clear_page(void *page);
|
||||
#define clear_user_page(page, vaddr, pg) clear_page(page)
|
||||
|
||||
#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
|
||||
alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vmaddr)
|
||||
alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
|
||||
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
|
||||
|
||||
extern void copy_page(void * _to, void * _from);
|
||||
|
@ -312,9 +312,9 @@ $(BOOT_TARGETS): vmlinux
|
||||
$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
|
||||
@$(kecho) ' Kernel: $(boot)/$@ is ready'
|
||||
|
||||
$(INSTALL_TARGETS): KBUILD_IMAGE = $(boot)/$(patsubst %install,%Image,$@)
|
||||
$(INSTALL_TARGETS):
|
||||
$(CONFIG_SHELL) $(srctree)/$(boot)/install.sh "$(KERNELRELEASE)" \
|
||||
$(boot)/$(patsubst %install,%Image,$@) System.map "$(INSTALL_PATH)"
|
||||
$(call cmd,install)
|
||||
|
||||
PHONY += vdso_install
|
||||
vdso_install:
|
||||
|
21
arch/arm/boot/install.sh
Normal file → Executable file
21
arch/arm/boot/install.sh
Normal file → Executable file
@ -1,7 +1,5 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# arch/arm/boot/install.sh
|
||||
#
|
||||
# This file is subject to the terms and conditions of the GNU General Public
|
||||
# License. See the file "COPYING" in the main directory of this archive
|
||||
# for more details.
|
||||
@ -18,25 +16,6 @@
|
||||
# $2 - kernel image file
|
||||
# $3 - kernel map file
|
||||
# $4 - default install path (blank if root directory)
|
||||
#
|
||||
|
||||
verify () {
|
||||
if [ ! -f "$1" ]; then
|
||||
echo "" 1>&2
|
||||
echo " *** Missing file: $1" 1>&2
|
||||
echo ' *** You need to run "make" before "make install".' 1>&2
|
||||
echo "" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Make sure the files actually exist
|
||||
verify "$2"
|
||||
verify "$3"
|
||||
|
||||
# User may have a custom install script
|
||||
if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
|
||||
if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
|
||||
|
||||
if [ "$(basename $2)" = "zImage" ]; then
|
||||
# Compressed install
|
||||
|
@ -472,11 +472,10 @@ static int __init da830_evm_ui_expander_setup(struct i2c_client *client,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int da830_evm_ui_expander_teardown(struct i2c_client *client, int gpio,
|
||||
static void da830_evm_ui_expander_teardown(struct i2c_client *client, int gpio,
|
||||
unsigned ngpio, void *context)
|
||||
{
|
||||
gpio_free(gpio + 6);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pcf857x_platform_data __initdata da830_evm_ui_expander_info = {
|
||||
|
@ -365,14 +365,13 @@ evm_led_setup(struct i2c_client *client, int gpio, unsigned ngpio, void *c)
|
||||
return status;
|
||||
}
|
||||
|
||||
static int
|
||||
static void
|
||||
evm_led_teardown(struct i2c_client *client, int gpio, unsigned ngpio, void *c)
|
||||
{
|
||||
if (evm_led_dev) {
|
||||
platform_device_unregister(evm_led_dev);
|
||||
evm_led_dev = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pcf857x_platform_data pcf_data_u2 = {
|
||||
@ -427,7 +426,7 @@ evm_u18_setup(struct i2c_client *client, int gpio, unsigned ngpio, void *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
static void
|
||||
evm_u18_teardown(struct i2c_client *client, int gpio, unsigned ngpio, void *c)
|
||||
{
|
||||
gpio_free(gpio + 1);
|
||||
@ -438,7 +437,6 @@ evm_u18_teardown(struct i2c_client *client, int gpio, unsigned ngpio, void *c)
|
||||
device_remove_file(&client->dev, &dev_attr_user_sw);
|
||||
gpio_free(sw_gpio);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pcf857x_platform_data pcf_data_u18 = {
|
||||
@ -487,7 +485,7 @@ evm_u35_setup(struct i2c_client *client, int gpio, unsigned ngpio, void *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
static void
|
||||
evm_u35_teardown(struct i2c_client *client, int gpio, unsigned ngpio, void *c)
|
||||
{
|
||||
gpio_free(gpio + 7);
|
||||
@ -497,7 +495,6 @@ evm_u35_teardown(struct i2c_client *client, int gpio, unsigned ngpio, void *c)
|
||||
gpio_free(gpio + 2);
|
||||
gpio_free(gpio + 1);
|
||||
gpio_free(gpio + 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pcf857x_platform_data pcf_data_u35 = {
|
||||
|
@ -314,15 +314,13 @@ static int evm_pcf_setup(struct i2c_client *client, int gpio,
|
||||
return evm_led_setup(client, gpio+4, 4, c);
|
||||
}
|
||||
|
||||
static int evm_pcf_teardown(struct i2c_client *client, int gpio,
|
||||
static void evm_pcf_teardown(struct i2c_client *client, int gpio,
|
||||
unsigned int ngpio, void *c)
|
||||
{
|
||||
BUG_ON(ngpio < 8);
|
||||
|
||||
evm_sw_teardown(client, gpio, 4, c);
|
||||
evm_led_teardown(client, gpio+4, 4, c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pcf857x_platform_data pcf_data = {
|
||||
|
@ -45,6 +45,7 @@ config ARM64
|
||||
select ARCH_HAS_SYSCALL_WRAPPER
|
||||
select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT
|
||||
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
|
||||
select ARCH_HAS_VM_GET_PAGE_PROT
|
||||
select ARCH_HAS_ZONE_DMA_SET if EXPERT
|
||||
select ARCH_HAVE_ELF_PROT
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
@ -91,11 +92,13 @@ config ARM64
|
||||
select ARCH_SUPPORTS_ATOMIC_RMW
|
||||
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
|
||||
select ARCH_SUPPORTS_NUMA_BALANCING
|
||||
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
|
||||
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
|
||||
select ARCH_WANT_DEFAULT_BPF_JIT
|
||||
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
|
||||
select ARCH_WANT_FRAME_POINTERS
|
||||
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
|
||||
select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
|
||||
select ARCH_WANT_LD_ORPHAN_WARN
|
||||
select ARCH_WANTS_NO_INSTR
|
||||
select ARCH_HAS_UBSAN_SANITIZE_ALL
|
||||
|
@ -165,11 +165,9 @@ Image: vmlinux
|
||||
Image.%: Image
|
||||
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
|
||||
|
||||
install: install-image := Image
|
||||
zinstall: install-image := Image.gz
|
||||
install: KBUILD_IMAGE := $(boot)/Image
|
||||
install zinstall:
|
||||
$(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
|
||||
$(boot)/$(install-image) System.map "$(INSTALL_PATH)"
|
||||
$(call cmd,install)
|
||||
|
||||
PHONY += vdso_install
|
||||
vdso_install:
|
||||
|
21
arch/arm64/boot/install.sh
Normal file → Executable file
21
arch/arm64/boot/install.sh
Normal file → Executable file
@ -1,7 +1,5 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# arch/arm64/boot/install.sh
|
||||
#
|
||||
# This file is subject to the terms and conditions of the GNU General Public
|
||||
# License. See the file "COPYING" in the main directory of this archive
|
||||
# for more details.
|
||||
@ -18,25 +16,6 @@
|
||||
# $2 - kernel image file
|
||||
# $3 - kernel map file
|
||||
# $4 - default install path (blank if root directory)
|
||||
#
|
||||
|
||||
verify () {
|
||||
if [ ! -f "$1" ]; then
|
||||
echo "" 1>&2
|
||||
echo " *** Missing file: $1" 1>&2
|
||||
echo ' *** You need to run "make" before "make install".' 1>&2
|
||||
echo "" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Make sure the files actually exist
|
||||
verify "$2"
|
||||
verify "$3"
|
||||
|
||||
# User may have a custom install script
|
||||
if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
|
||||
if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
|
||||
|
||||
if [ "$(basename $2)" = "Image.gz" ]; then
|
||||
# Compressed install
|
||||
|
@ -38,14 +38,9 @@ CONFIG_BOOT_CONFIG=y
|
||||
# CONFIG_SYSFS_SYSCALL is not set
|
||||
# CONFIG_FHANDLE is not set
|
||||
CONFIG_KALLSYMS_ALL=y
|
||||
CONFIG_USERFAULTFD=y
|
||||
# CONFIG_RSEQ is not set
|
||||
CONFIG_EMBEDDED=y
|
||||
# CONFIG_COMPAT_BRK is not set
|
||||
# CONFIG_SLAB_MERGE_DEFAULT is not set
|
||||
CONFIG_SLAB_FREELIST_RANDOM=y
|
||||
CONFIG_SLAB_FREELIST_HARDENED=y
|
||||
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
|
||||
CONFIG_PROFILING=y
|
||||
CONFIG_ARCH_SUNXI=y
|
||||
CONFIG_ARCH_HISI=y
|
||||
@ -87,8 +82,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
|
||||
CONFIG_KPROBES=y
|
||||
CONFIG_JUMP_LABEL=y
|
||||
CONFIG_SHADOW_CALL_STACK=y
|
||||
CONFIG_LTO_CLANG_FULL=y
|
||||
CONFIG_CFI_CLANG=y
|
||||
CONFIG_MODULES=y
|
||||
CONFIG_MODULE_UNLOAD=y
|
||||
CONFIG_MODVERSIONS=y
|
||||
@ -102,6 +95,10 @@ CONFIG_BFQ_GROUP_IOSCHED=y
|
||||
CONFIG_GKI_HACKS_TO_FIX=y
|
||||
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
|
||||
CONFIG_BINFMT_MISC=y
|
||||
# CONFIG_SLAB_MERGE_DEFAULT is not set
|
||||
CONFIG_SLAB_FREELIST_RANDOM=y
|
||||
CONFIG_SLAB_FREELIST_HARDENED=y
|
||||
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
|
||||
CONFIG_MEMORY_HOTPLUG=y
|
||||
CONFIG_MEMORY_HOTREMOVE=y
|
||||
CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
|
||||
@ -110,9 +107,9 @@ CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
|
||||
CONFIG_CMA=y
|
||||
CONFIG_CMA_DEBUGFS=y
|
||||
CONFIG_CMA_AREAS=16
|
||||
CONFIG_ZSMALLOC=m
|
||||
# CONFIG_ZONE_DMA is not set
|
||||
CONFIG_ANON_VMA_NAME=y
|
||||
CONFIG_USERFAULTFD=y
|
||||
CONFIG_NET=y
|
||||
CONFIG_PACKET=y
|
||||
CONFIG_UNIX=y
|
||||
|
@ -16,7 +16,11 @@
|
||||
|
||||
#define sev() asm volatile("sev" : : : "memory")
|
||||
#define wfe() asm volatile("wfe" : : : "memory")
|
||||
#define wfet(val) asm volatile("msr s0_3_c1_c0_0, %0" \
|
||||
: : "r" (val) : "memory")
|
||||
#define wfi() asm volatile("wfi" : : : "memory")
|
||||
#define wfit(val) asm volatile("msr s0_3_c1_c0_1, %0" \
|
||||
: : "r" (val) : "memory")
|
||||
|
||||
#define isb() asm volatile("isb" : : : "memory")
|
||||
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define __ASM_CACHE_H
|
||||
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/mte-def.h>
|
||||
|
||||
#define CTR_L1IP_SHIFT 14
|
||||
#define CTR_L1IP_MASK 3
|
||||
@ -49,15 +50,21 @@
|
||||
*/
|
||||
#define ARCH_DMA_MINALIGN (128)
|
||||
|
||||
#ifdef CONFIG_KASAN_SW_TAGS
|
||||
#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
|
||||
#elif defined(CONFIG_KASAN_HW_TAGS)
|
||||
#define ARCH_SLAB_MINALIGN MTE_GRANULE_SIZE
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/kasan-enabled.h>
|
||||
|
||||
#ifdef CONFIG_KASAN_SW_TAGS
|
||||
#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
|
||||
#elif defined(CONFIG_KASAN_HW_TAGS)
|
||||
static inline unsigned int arch_slab_minalign(void)
|
||||
{
|
||||
return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE :
|
||||
__alignof__(unsigned long long);
|
||||
}
|
||||
#define arch_slab_minalign() arch_slab_minalign()
|
||||
#endif
|
||||
|
||||
#define ICACHEF_ALIASING 0
|
||||
#define ICACHEF_VPIPT 1
|
||||
|
@ -118,6 +118,10 @@
|
||||
|
||||
#define APPLE_CPU_PART_M1_ICESTORM 0x022
|
||||
#define APPLE_CPU_PART_M1_FIRESTORM 0x023
|
||||
#define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024
|
||||
#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
|
||||
#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
|
||||
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
|
||||
|
||||
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
|
||||
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
|
||||
@ -164,6 +168,10 @@
|
||||
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
|
||||
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
|
||||
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
|
||||
#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
|
||||
#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
|
||||
#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
|
||||
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
|
||||
|
||||
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
|
||||
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
|
||||
|
@ -135,7 +135,10 @@
|
||||
#define ESR_ELx_CV (UL(1) << 24)
|
||||
#define ESR_ELx_COND_SHIFT (20)
|
||||
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
|
||||
#define ESR_ELx_WFx_ISS_TI (UL(1) << 0)
|
||||
#define ESR_ELx_WFx_ISS_RN (UL(0x1F) << 5)
|
||||
#define ESR_ELx_WFx_ISS_RV (UL(1) << 2)
|
||||
#define ESR_ELx_WFx_ISS_TI (UL(3) << 0)
|
||||
#define ESR_ELx_WFx_ISS_WFxT (UL(2) << 0)
|
||||
#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0)
|
||||
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
|
||||
#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1)
|
||||
@ -148,7 +151,8 @@
|
||||
#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
|
||||
|
||||
/* ESR value templates for specific events */
|
||||
#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI)
|
||||
#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | \
|
||||
(ESR_ELx_WFx_ISS_TI & ~ESR_ELx_WFx_ISS_WFxT))
|
||||
#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \
|
||||
ESR_ELx_WFx_ISS_WFI)
|
||||
|
||||
|
@ -39,8 +39,8 @@ extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
|
||||
extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
||||
unsigned long addr, pte_t *ptep);
|
||||
#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
|
||||
extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep);
|
||||
extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep);
|
||||
#define __HAVE_ARCH_HUGE_PTE_CLEAR
|
||||
extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, unsigned long sz);
|
||||
|
@ -117,6 +117,7 @@
|
||||
#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32)
|
||||
#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32)
|
||||
#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64)
|
||||
#define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT)
|
||||
|
||||
/*
|
||||
* This yields a mask that user programs can use to figure out what
|
||||
|
@ -80,11 +80,12 @@
|
||||
* FMO: Override CPSR.F and enable signaling with VF
|
||||
* SWIO: Turn set/way invalidates into set/way clean+invalidate
|
||||
* PTW: Take a stage2 fault if a stage1 walk steps in device memory
|
||||
* TID3: Trap EL1 reads of group 3 ID registers
|
||||
*/
|
||||
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
|
||||
HCR_BSU_IS | HCR_FB | HCR_TACR | \
|
||||
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
|
||||
HCR_FMO | HCR_IMO | HCR_PTW )
|
||||
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 )
|
||||
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
|
||||
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
|
||||
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
|
||||
|
@ -169,6 +169,7 @@ struct kvm_nvhe_init_params {
|
||||
unsigned long tcr_el2;
|
||||
unsigned long tpidr_el2;
|
||||
unsigned long stack_hyp_va;
|
||||
unsigned long stack_pa;
|
||||
phys_addr_t pgd_pa;
|
||||
unsigned long hcr_el2;
|
||||
unsigned long vttbr;
|
||||
|
@ -87,13 +87,6 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (vcpu_el1_is_32bit(vcpu))
|
||||
vcpu->arch.hcr_el2 &= ~HCR_RW;
|
||||
else
|
||||
/*
|
||||
* TID3: trap feature register accesses that we virtualise.
|
||||
* For now this is conditional, since no AArch32 feature regs
|
||||
* are currently virtualised.
|
||||
*/
|
||||
vcpu->arch.hcr_el2 |= HCR_TID3;
|
||||
|
||||
if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
|
||||
vcpu_el1_is_32bit(vcpu))
|
||||
|
@ -46,6 +46,7 @@
|
||||
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
|
||||
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
|
||||
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
|
||||
#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
|
||||
|
||||
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
||||
KVM_DIRTY_LOG_INITIALLY_SET)
|
||||
@ -101,15 +102,25 @@ struct kvm_s2_mmu {
|
||||
struct kvm_arch_memory_slot {
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kvm_smccc_features: Descriptor of the hypercall services exposed to the guests
|
||||
*
|
||||
* @std_bmap: Bitmap of standard secure service calls
|
||||
* @std_hyp_bmap: Bitmap of standard hypervisor service calls
|
||||
* @vendor_hyp_bmap: Bitmap of vendor specific hypervisor service calls
|
||||
*/
|
||||
struct kvm_smccc_features {
|
||||
unsigned long std_bmap;
|
||||
unsigned long std_hyp_bmap;
|
||||
unsigned long vendor_hyp_bmap;
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
struct kvm_s2_mmu mmu;
|
||||
|
||||
/* VTCR_EL2 value for this VM */
|
||||
u64 vtcr;
|
||||
|
||||
/* The maximum number of vCPUs depends on the used GIC model */
|
||||
int max_vcpus;
|
||||
|
||||
/* Interrupt controller */
|
||||
struct vgic_dist vgic;
|
||||
|
||||
@ -136,6 +147,8 @@ struct kvm_arch {
|
||||
*/
|
||||
#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3
|
||||
#define KVM_ARCH_FLAG_EL1_32BIT 4
|
||||
/* PSCI SYSTEM_SUSPEND enabled for the guest */
|
||||
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
@ -150,6 +163,9 @@ struct kvm_arch {
|
||||
|
||||
u8 pfr0_csv2;
|
||||
u8 pfr0_csv3;
|
||||
|
||||
/* Hypercall features firmware registers' descriptor */
|
||||
struct kvm_smccc_features smccc_feat;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_fault_info {
|
||||
@ -254,14 +270,8 @@ struct kvm_cpu_context {
|
||||
struct kvm_vcpu *__hyp_running_vcpu;
|
||||
};
|
||||
|
||||
struct kvm_pmu_events {
|
||||
u32 events_host;
|
||||
u32 events_guest;
|
||||
};
|
||||
|
||||
struct kvm_host_data {
|
||||
struct kvm_cpu_context host_ctxt;
|
||||
struct kvm_pmu_events pmu_events;
|
||||
};
|
||||
|
||||
struct kvm_host_psci_config {
|
||||
@ -368,8 +378,8 @@ struct kvm_vcpu_arch {
|
||||
u32 mdscr_el1;
|
||||
} guest_debug_preserved;
|
||||
|
||||
/* vcpu power-off state */
|
||||
bool power_off;
|
||||
/* vcpu power state */
|
||||
struct kvm_mp_state mp_state;
|
||||
|
||||
/* Don't run the guest (internal implementation need) */
|
||||
bool pause;
|
||||
@ -455,6 +465,7 @@ struct kvm_vcpu_arch {
|
||||
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
|
||||
#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
|
||||
#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */
|
||||
#define KVM_ARM64_WFIT (1 << 17) /* WFIT instruction trapped */
|
||||
|
||||
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
|
||||
KVM_GUESTDBG_USE_SW_BP | \
|
||||
@ -687,10 +698,11 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
|
||||
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
|
||||
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
|
||||
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
|
||||
int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_sys_reg_table_init(void);
|
||||
int kvm_sys_reg_table_init(void);
|
||||
|
||||
/* MMIO helpers */
|
||||
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
|
||||
@ -799,9 +811,6 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
|
||||
#ifdef CONFIG_KVM
|
||||
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
|
||||
void kvm_clr_pmu_events(u32 clr);
|
||||
|
||||
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
|
||||
#else
|
||||
static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
|
||||
static inline void kvm_clr_pmu_events(u32 clr) {}
|
||||
@ -833,8 +842,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
||||
#define kvm_has_mte(kvm) \
|
||||
(system_supports_mte() && \
|
||||
test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags))
|
||||
#define kvm_vcpu_has_pmu(vcpu) \
|
||||
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
|
||||
|
||||
int kvm_trng_call(struct kvm_vcpu *vcpu);
|
||||
#ifdef CONFIG_KVM
|
||||
@ -845,4 +852,7 @@ void __init kvm_hyp_reserve(void);
|
||||
static inline void kvm_hyp_reserve(void) { }
|
||||
#endif
|
||||
|
||||
void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
|
||||
bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
|
||||
|
||||
#endif /* __ARM64_KVM_HOST_H__ */
|
||||
|
@ -154,6 +154,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
|
||||
int kvm_share_hyp(void *from, void *to);
|
||||
void kvm_unshare_hyp(void *from, void *to);
|
||||
int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
|
||||
int __create_hyp_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot);
|
||||
int hyp_alloc_private_va_range(size_t size, unsigned long *haddr);
|
||||
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
|
||||
void __iomem **kaddr,
|
||||
void __iomem **haddr);
|
||||
|
@ -35,30 +35,6 @@ static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
|
||||
}
|
||||
#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
|
||||
|
||||
static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
|
||||
{
|
||||
pteval_t prot = 0;
|
||||
|
||||
if (vm_flags & VM_ARM64_BTI)
|
||||
prot |= PTE_GP;
|
||||
|
||||
/*
|
||||
* There are two conditions required for returning a Normal Tagged
|
||||
* memory type: (1) the user requested it via PROT_MTE passed to
|
||||
* mmap() or mprotect() and (2) the corresponding vma supports MTE. We
|
||||
* register (1) as VM_MTE in the vma->vm_flags and (2) as
|
||||
* VM_MTE_ALLOWED. Note that the latter can only be set during the
|
||||
* mmap() call since mprotect() does not accept MAP_* flags.
|
||||
* Checking for VM_MTE only is sufficient since arch_validate_flags()
|
||||
* does not permit (VM_MTE & !VM_MTE_ALLOWED).
|
||||
*/
|
||||
if (vm_flags & VM_MTE)
|
||||
prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
|
||||
|
||||
return __pgprot(prot);
|
||||
}
|
||||
#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
|
||||
|
||||
static inline bool arch_validate_prot(unsigned long prot,
|
||||
unsigned long addr __always_unused)
|
||||
{
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define __ASM_MTE_KASAN_H
|
||||
|
||||
#include <asm/compiler.h>
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/mte-def.h>
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/cmpxchg.h>
|
||||
#include <asm/stack_pointer.h>
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
static inline void set_my_cpu_offset(unsigned long off)
|
||||
{
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Software defined PTE bits definition.
|
||||
*/
|
||||
#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
|
||||
#define PTE_SWP_EXCLUSIVE (_AT(pteval_t, 1) << 2) /* only for swp ptes */
|
||||
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
|
||||
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
|
||||
#define PTE_DEVMAP (_AT(pteval_t, 1) << 57)
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <linux/mmdebug.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/page_table_check.h>
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
|
||||
@ -96,6 +97,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
|
||||
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
|
||||
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
|
||||
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
|
||||
#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
|
||||
#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
|
||||
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
|
||||
#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
|
||||
@ -312,8 +314,8 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
|
||||
__func__, pte_val(old_pte), pte_val(pte));
|
||||
}
|
||||
|
||||
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte)
|
||||
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte)
|
||||
{
|
||||
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
|
||||
__sync_icache_dcache(pte);
|
||||
@ -343,6 +345,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
set_pte(ptep, pte);
|
||||
}
|
||||
|
||||
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte)
|
||||
{
|
||||
page_table_check_pte_set(mm, addr, ptep, pte);
|
||||
return __set_pte_at(mm, addr, ptep, pte);
|
||||
}
|
||||
|
||||
/*
|
||||
* Huge pte definitions.
|
||||
*/
|
||||
@ -402,6 +411,22 @@ static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
|
||||
return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
|
||||
static inline pte_t pte_swp_mkexclusive(pte_t pte)
|
||||
{
|
||||
return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
|
||||
}
|
||||
|
||||
static inline int pte_swp_exclusive(pte_t pte)
|
||||
{
|
||||
return pte_val(pte) & PTE_SWP_EXCLUSIVE;
|
||||
}
|
||||
|
||||
static inline pte_t pte_swp_clear_exclusive(pte_t pte)
|
||||
{
|
||||
return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
/*
|
||||
* See the comment in include/linux/pgtable.h
|
||||
@ -438,6 +463,8 @@ static inline int pmd_trans_huge(pmd_t pmd)
|
||||
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
|
||||
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
|
||||
#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
|
||||
#define pmd_user(pmd) pte_user(pmd_pte(pmd))
|
||||
#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd))
|
||||
#define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
|
||||
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
|
||||
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
|
||||
@ -485,8 +512,19 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
|
||||
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
|
||||
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
|
||||
|
||||
#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
|
||||
#define set_pud_at(mm, addr, pudp, pud) set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud))
|
||||
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmdp, pmd_t pmd)
|
||||
{
|
||||
page_table_check_pmd_set(mm, addr, pmdp, pmd);
|
||||
return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
|
||||
}
|
||||
|
||||
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
|
||||
pud_t *pudp, pud_t pud)
|
||||
{
|
||||
page_table_check_pud_set(mm, addr, pudp, pud);
|
||||
return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
|
||||
}
|
||||
|
||||
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
|
||||
#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
|
||||
@ -627,6 +665,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
|
||||
#define pud_present(pud) pte_present(pud_pte(pud))
|
||||
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
|
||||
#define pud_valid(pud) pte_valid(pud_pte(pud))
|
||||
#define pud_user(pud) pte_user(pud_pte(pud))
|
||||
|
||||
|
||||
static inline void set_pud(pud_t *pudp, pud_t pud)
|
||||
{
|
||||
@ -799,6 +839,23 @@ static inline int pgd_devmap(pgd_t pgd)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_CHECK
|
||||
static inline bool pte_user_accessible_page(pte_t pte)
|
||||
{
|
||||
return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
|
||||
}
|
||||
|
||||
static inline bool pmd_user_accessible_page(pmd_t pmd)
|
||||
{
|
||||
return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
|
||||
}
|
||||
|
||||
static inline bool pud_user_accessible_page(pud_t pud)
|
||||
{
|
||||
return pud_present(pud) && pud_user(pud);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Atomic pte/pmd modifications.
|
||||
*/
|
||||
@ -860,7 +917,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
||||
unsigned long address, pte_t *ptep)
|
||||
{
|
||||
return __pte(xchg_relaxed(&pte_val(*ptep), 0));
|
||||
pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
|
||||
|
||||
page_table_check_pte_clear(mm, address, pte);
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
@ -868,7 +929,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
||||
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
|
||||
unsigned long address, pmd_t *pmdp)
|
||||
{
|
||||
return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
|
||||
pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
|
||||
|
||||
page_table_check_pmd_clear(mm, address, pmd);
|
||||
|
||||
return pmd;
|
||||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
@ -902,6 +967,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
|
||||
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmdp, pmd_t pmd)
|
||||
{
|
||||
page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
|
||||
return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
|
||||
}
|
||||
#endif
|
||||
@ -909,12 +975,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
|
||||
/*
|
||||
* Encode and decode a swap entry:
|
||||
* bits 0-1: present (must be zero)
|
||||
* bits 2-7: swap type
|
||||
* bits 2: remember PG_anon_exclusive
|
||||
* bits 3-7: swap type
|
||||
* bits 8-57: swap offset
|
||||
* bit 58: PTE_PROT_NONE (must be zero)
|
||||
*/
|
||||
#define __SWP_TYPE_SHIFT 2
|
||||
#define __SWP_TYPE_BITS 6
|
||||
#define __SWP_TYPE_SHIFT 3
|
||||
#define __SWP_TYPE_BITS 5
|
||||
#define __SWP_OFFSET_BITS 50
|
||||
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
|
||||
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
|
||||
@ -964,10 +1031,10 @@ static inline void arch_swap_invalidate_area(int type)
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_SWAP_RESTORE
|
||||
static inline void arch_swap_restore(swp_entry_t entry, struct page *page)
|
||||
static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
|
||||
{
|
||||
if (system_supports_mte() && mte_restore_tags(entry, page))
|
||||
set_bit(PG_mte_tagged, &page->flags);
|
||||
if (system_supports_mte() && mte_restore_tags(entry, &folio->page))
|
||||
set_bit(PG_mte_tagged, &folio->flags);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_ARM64_MTE */
|
||||
|
@ -87,5 +87,6 @@
|
||||
#define HWCAP2_SME_B16F32 (1 << 28)
|
||||
#define HWCAP2_SME_F32F32 (1 << 29)
|
||||
#define HWCAP2_SME_FA64 (1 << 30)
|
||||
#define HWCAP2_WFXT (1UL << 31)
|
||||
|
||||
#endif /* _UAPI__ASM_HWCAP_H */
|
||||
|
@ -334,6 +334,40 @@ struct kvm_arm_copy_mte_tags {
|
||||
#define KVM_ARM64_SVE_VLS_WORDS \
|
||||
((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1)
|
||||
|
||||
/* Bitmap feature firmware registers */
|
||||
#define KVM_REG_ARM_FW_FEAT_BMAP (0x0016 << KVM_REG_ARM_COPROC_SHIFT)
|
||||
#define KVM_REG_ARM_FW_FEAT_BMAP_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
|
||||
KVM_REG_ARM_FW_FEAT_BMAP | \
|
||||
((r) & 0xffff))
|
||||
|
||||
#define KVM_REG_ARM_STD_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(0)
|
||||
|
||||
enum {
|
||||
KVM_REG_ARM_STD_BIT_TRNG_V1_0 = 0,
|
||||
#ifdef __KERNEL__
|
||||
KVM_REG_ARM_STD_BMAP_BIT_COUNT,
|
||||
#endif
|
||||
};
|
||||
|
||||
#define KVM_REG_ARM_STD_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(1)
|
||||
|
||||
enum {
|
||||
KVM_REG_ARM_STD_HYP_BIT_PV_TIME = 0,
|
||||
#ifdef __KERNEL__
|
||||
KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT,
|
||||
#endif
|
||||
};
|
||||
|
||||
#define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2)
|
||||
|
||||
enum {
|
||||
KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT = 0,
|
||||
KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1,
|
||||
#ifdef __KERNEL__
|
||||
KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT,
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Device Control API: ARM VGIC */
|
||||
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
|
||||
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
|
||||
|
@ -237,6 +237,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
|
||||
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_WFXT_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
@ -2517,6 +2518,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
.cpu_enable = fa64_kernel_enable,
|
||||
},
|
||||
#endif /* CONFIG_ARM64_SME */
|
||||
{
|
||||
.desc = "WFx with timeout",
|
||||
.capability = ARM64_HAS_WFXT,
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.sys_reg = SYS_ID_AA64ISAR2_EL1,
|
||||
.sign = FTR_UNSIGNED,
|
||||
.field_pos = ID_AA64ISAR2_WFXT_SHIFT,
|
||||
.field_width = 4,
|
||||
.matches = has_cpuid_feature,
|
||||
.min_field_value = ID_AA64ISAR2_WFXT_SUPPORTED,
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
@ -2650,6 +2662,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
|
||||
HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
|
||||
HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_WFXT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_WFXT_SUPPORTED, CAP_HWCAP, KERNEL_HWCAP_WFXT),
|
||||
#ifdef CONFIG_ARM64_SME
|
||||
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
|
||||
|
@ -106,6 +106,7 @@ static const char *const hwcap_str[] = {
|
||||
[KERNEL_HWCAP_SME_B16F32] = "smeb16f32",
|
||||
[KERNEL_HWCAP_SME_F32F32] = "smef32f32",
|
||||
[KERNEL_HWCAP_SME_FA64] = "smefa64",
|
||||
[KERNEL_HWCAP_WFXT] = "wfxt",
|
||||
};
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
@ -13,7 +13,7 @@ obj-$(CONFIG_KVM) += hyp/
|
||||
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
|
||||
inject_fault.o va_layout.o handle_exit.o \
|
||||
guest.o debug.o reset.o sys_regs.o \
|
||||
vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \
|
||||
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
|
||||
arch_timer.o trng.o vmid.o \
|
||||
vgic/vgic.o vgic/vgic-init.o \
|
||||
vgic/vgic-irqfd.o vgic/vgic-v2.o \
|
||||
@ -22,7 +22,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
|
||||
vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
|
||||
vgic/vgic-its.o vgic/vgic-debug.o
|
||||
|
||||
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o
|
||||
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
|
||||
|
||||
always-y := hyp_constants.h hyp-constants.s
|
||||
|
||||
|
@ -208,18 +208,16 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
|
||||
static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
|
||||
u64 val)
|
||||
{
|
||||
u64 cval, now;
|
||||
u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
|
||||
|
||||
cval = timer_get_cval(timer_ctx);
|
||||
now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
|
||||
|
||||
if (now < cval) {
|
||||
if (now < val) {
|
||||
u64 ns;
|
||||
|
||||
ns = cyclecounter_cyc2ns(timecounter->cc,
|
||||
cval - now,
|
||||
val - now,
|
||||
timecounter->mask,
|
||||
&timecounter->frac);
|
||||
return ns;
|
||||
@ -228,6 +226,11 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
|
||||
{
|
||||
return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
|
||||
}
|
||||
|
||||
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
|
||||
{
|
||||
WARN_ON(timer_ctx && timer_ctx->loaded);
|
||||
@ -236,6 +239,20 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
|
||||
(ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
|
||||
}
|
||||
|
||||
static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
|
||||
(vcpu->arch.flags & KVM_ARM64_WFIT));
|
||||
}
|
||||
|
||||
static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_context *ctx = vcpu_vtimer(vcpu);
|
||||
u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
|
||||
|
||||
return kvm_counter_compute_delta(ctx, val);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the earliest expiration time in ns among guest timers.
|
||||
* Note that it will return 0 if none of timers can fire.
|
||||
@ -253,6 +270,9 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
|
||||
min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
|
||||
}
|
||||
|
||||
if (vcpu_has_wfit_active(vcpu))
|
||||
min_delta = min(min_delta, wfit_delay_ns(vcpu));
|
||||
|
||||
/* If none of timers can fire, then return 0 */
|
||||
if (min_delta == ULLONG_MAX)
|
||||
return 0;
|
||||
@ -350,15 +370,9 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
|
||||
return cval <= now;
|
||||
}
|
||||
|
||||
bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
|
||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct timer_map map;
|
||||
|
||||
get_timer_map(vcpu, &map);
|
||||
|
||||
return kvm_timer_should_fire(map.direct_vtimer) ||
|
||||
kvm_timer_should_fire(map.direct_ptimer) ||
|
||||
kvm_timer_should_fire(map.emul_ptimer);
|
||||
return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -484,7 +498,8 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
|
||||
!kvm_timer_irq_can_fire(map.direct_ptimer) &&
|
||||
!kvm_timer_irq_can_fire(map.emul_ptimer))
|
||||
!kvm_timer_irq_can_fire(map.emul_ptimer) &&
|
||||
!vcpu_has_wfit_active(vcpu))
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -97,6 +97,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
case KVM_CAP_ARM_SYSTEM_SUSPEND:
|
||||
r = 0;
|
||||
set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
@ -153,9 +157,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
kvm_vgic_early_init(kvm);
|
||||
|
||||
/* The maximum number of VCPUs is limited by the host's GIC model */
|
||||
kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
|
||||
kvm->max_vcpus = kvm_arm_default_max_vcpus();
|
||||
|
||||
set_default_spectre(kvm);
|
||||
kvm_arm_init_hypercalls(kvm);
|
||||
|
||||
return ret;
|
||||
out_free_stage2_pgd:
|
||||
@ -210,6 +215,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_SET_GUEST_DEBUG:
|
||||
case KVM_CAP_VCPU_ATTRIBUTES:
|
||||
case KVM_CAP_PTP_KVM:
|
||||
case KVM_CAP_ARM_SYSTEM_SUSPEND:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_SET_GUEST_DEBUG2:
|
||||
@ -230,7 +236,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_MAX_VCPUS:
|
||||
case KVM_CAP_MAX_VCPU_ID:
|
||||
if (kvm)
|
||||
r = kvm->arch.max_vcpus;
|
||||
r = kvm->max_vcpus;
|
||||
else
|
||||
r = kvm_arm_default_max_vcpus();
|
||||
break;
|
||||
@ -306,7 +312,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
|
||||
if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
|
||||
return -EBUSY;
|
||||
|
||||
if (id >= kvm->arch.max_vcpus)
|
||||
if (id >= kvm->max_vcpus)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
@ -356,11 +362,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
kvm_arm_vcpu_destroy(vcpu);
|
||||
}
|
||||
|
||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_timer_is_pending(vcpu);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
||||
@ -432,20 +433,34 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
vcpu->cpu = -1;
|
||||
}
|
||||
|
||||
static void vcpu_power_off(struct kvm_vcpu *vcpu)
|
||||
void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.power_off = true;
|
||||
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
|
||||
kvm_make_request(KVM_REQ_SLEEP, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED;
|
||||
}
|
||||
|
||||
static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED;
|
||||
kvm_make_request(KVM_REQ_SUSPEND, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mp_state *mp_state)
|
||||
{
|
||||
if (vcpu->arch.power_off)
|
||||
mp_state->mp_state = KVM_MP_STATE_STOPPED;
|
||||
else
|
||||
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
*mp_state = vcpu->arch.mp_state;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -457,10 +472,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
||||
|
||||
switch (mp_state->mp_state) {
|
||||
case KVM_MP_STATE_RUNNABLE:
|
||||
vcpu->arch.power_off = false;
|
||||
vcpu->arch.mp_state = *mp_state;
|
||||
break;
|
||||
case KVM_MP_STATE_STOPPED:
|
||||
vcpu_power_off(vcpu);
|
||||
kvm_arm_vcpu_power_off(vcpu);
|
||||
break;
|
||||
case KVM_MP_STATE_SUSPENDED:
|
||||
kvm_arm_vcpu_suspend(vcpu);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
@ -480,7 +498,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
{
|
||||
bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
|
||||
return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
|
||||
&& !v->arch.power_off && !v->arch.pause);
|
||||
&& !kvm_arm_vcpu_stopped(v) && !v->arch.pause);
|
||||
}
|
||||
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
|
||||
@ -592,15 +610,15 @@ void kvm_arm_resume_guest(struct kvm *kvm)
|
||||
}
|
||||
}
|
||||
|
||||
static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
|
||||
static void kvm_vcpu_sleep(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
|
||||
|
||||
rcuwait_wait_event(wait,
|
||||
(!vcpu->arch.power_off) &&(!vcpu->arch.pause),
|
||||
(!kvm_arm_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
|
||||
TASK_INTERRUPTIBLE);
|
||||
|
||||
if (vcpu->arch.power_off || vcpu->arch.pause) {
|
||||
if (kvm_arm_vcpu_stopped(vcpu) || vcpu->arch.pause) {
|
||||
/* Awaken to handle a signal, request we sleep again later. */
|
||||
kvm_make_request(KVM_REQ_SLEEP, vcpu);
|
||||
}
|
||||
@ -639,6 +657,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
|
||||
preempt_enable();
|
||||
|
||||
kvm_vcpu_halt(vcpu);
|
||||
vcpu->arch.flags &= ~KVM_ARM64_WFIT;
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
|
||||
preempt_disable();
|
||||
@ -646,11 +665,53 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_arm_vcpu_suspended(vcpu))
|
||||
return 1;
|
||||
|
||||
kvm_vcpu_wfi(vcpu);
|
||||
|
||||
/*
|
||||
* The suspend state is sticky; we do not leave it until userspace
|
||||
* explicitly marks the vCPU as runnable. Request that we suspend again
|
||||
* later.
|
||||
*/
|
||||
kvm_make_request(KVM_REQ_SUSPEND, vcpu);
|
||||
|
||||
/*
|
||||
* Check to make sure the vCPU is actually runnable. If so, exit to
|
||||
* userspace informing it of the wakeup condition.
|
||||
*/
|
||||
if (kvm_arch_vcpu_runnable(vcpu)) {
|
||||
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
|
||||
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP;
|
||||
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Otherwise, we were unblocked to process a different event, such as a
|
||||
* pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to
|
||||
* process the event.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* check_vcpu_requests - check and handle pending vCPU requests
|
||||
* @vcpu: the VCPU pointer
|
||||
*
|
||||
* Return: 1 if we should enter the guest
|
||||
* 0 if we should exit to userspace
|
||||
* < 0 if we should exit to userspace, where the return value indicates
|
||||
* an error
|
||||
*/
|
||||
static int check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_request_pending(vcpu)) {
|
||||
if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
|
||||
vcpu_req_sleep(vcpu);
|
||||
kvm_vcpu_sleep(vcpu);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
|
||||
kvm_reset_vcpu(vcpu);
|
||||
@ -675,7 +736,12 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
|
||||
kvm_pmu_handle_pmcr(vcpu,
|
||||
__vcpu_sys_reg(vcpu, PMCR_EL0));
|
||||
|
||||
if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
|
||||
return kvm_vcpu_suspend(vcpu);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
|
||||
@ -792,7 +858,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
if (!ret)
|
||||
ret = 1;
|
||||
|
||||
check_vcpu_requests(vcpu);
|
||||
if (ret > 0)
|
||||
ret = check_vcpu_requests(vcpu);
|
||||
|
||||
/*
|
||||
* Preparing the interrupts to be injected also
|
||||
@ -816,6 +883,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_vgic_flush_hwstate(vcpu);
|
||||
|
||||
kvm_pmu_update_vcpu_events(vcpu);
|
||||
|
||||
/*
|
||||
* Ensure we set mode to IN_GUEST_MODE after we disable
|
||||
* interrupts and before the final VCPU requests check.
|
||||
@ -1125,9 +1194,9 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
|
||||
* Handle the "start in power-off" case.
|
||||
*/
|
||||
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
|
||||
vcpu_power_off(vcpu);
|
||||
kvm_arm_vcpu_power_off(vcpu);
|
||||
else
|
||||
vcpu->arch.power_off = false;
|
||||
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1485,7 +1554,6 @@ static void cpu_prepare_hyp_mode(int cpu)
|
||||
tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
|
||||
params->tcr_el2 = tcr;
|
||||
|
||||
params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
|
||||
params->pgd_pa = kvm_mmu_get_httbr();
|
||||
if (is_protected_kvm_enabled())
|
||||
params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
|
||||
@ -1763,8 +1831,6 @@ static int init_subsystems(void)
|
||||
|
||||
kvm_register_perf_callbacks(NULL);
|
||||
|
||||
kvm_sys_reg_table_init();
|
||||
|
||||
out:
|
||||
if (err || !is_protected_kvm_enabled())
|
||||
on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
|
||||
@ -1935,14 +2001,46 @@ static int init_hyp_mode(void)
|
||||
* Map the Hyp stack pages
|
||||
*/
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
|
||||
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
|
||||
err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
|
||||
PAGE_HYP);
|
||||
unsigned long hyp_addr;
|
||||
|
||||
/*
|
||||
* Allocate a contiguous HYP private VA range for the stack
|
||||
* and guard page. The allocation is also aligned based on
|
||||
* the order of its size.
|
||||
*/
|
||||
err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
|
||||
if (err) {
|
||||
kvm_err("Cannot allocate hyp stack guard page\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since the stack grows downwards, map the stack to the page
|
||||
* at the higher address and leave the lower guard page
|
||||
* unbacked.
|
||||
*
|
||||
* Any valid stack address now has the PAGE_SHIFT bit as 1
|
||||
* and addresses corresponding to the guard page have the
|
||||
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
|
||||
*/
|
||||
err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
|
||||
__pa(stack_page), PAGE_HYP);
|
||||
if (err) {
|
||||
kvm_err("Cannot map hyp stack\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save the stack PA in nvhe_init_params. This will be needed
|
||||
* to recreate the stack mapping in protected nVHE mode.
|
||||
* __hyp_pa() won't do the right thing there, since the stack
|
||||
* has been mapped in the flexible private VA space.
|
||||
*/
|
||||
params->stack_pa = __pa(stack_page);
|
||||
|
||||
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
@ -2091,6 +2189,12 @@ int kvm_arch_init(void *opaque)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
err = kvm_sys_reg_table_init();
|
||||
if (err) {
|
||||
kvm_info("Error initializing system register tables");
|
||||
return err;
|
||||
}
|
||||
|
||||
in_hyp_mode = is_kernel_in_hyp_mode();
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/fs.h>
|
||||
#include <kvm/arm_psci.h>
|
||||
#include <kvm/arm_hypercalls.h>
|
||||
#include <asm/cputype.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/fpsimd.h>
|
||||
@ -756,7 +756,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
|
||||
switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
|
||||
case KVM_REG_ARM_CORE: return get_core_reg(vcpu, reg);
|
||||
case KVM_REG_ARM_FW: return kvm_arm_get_fw_reg(vcpu, reg);
|
||||
case KVM_REG_ARM_FW:
|
||||
case KVM_REG_ARM_FW_FEAT_BMAP:
|
||||
return kvm_arm_get_fw_reg(vcpu, reg);
|
||||
case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg);
|
||||
}
|
||||
|
||||
@ -774,7 +776,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
|
||||
switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
|
||||
case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg);
|
||||
case KVM_REG_ARM_FW: return kvm_arm_set_fw_reg(vcpu, reg);
|
||||
case KVM_REG_ARM_FW:
|
||||
case KVM_REG_ARM_FW_FEAT_BMAP:
|
||||
return kvm_arm_set_fw_reg(vcpu, reg);
|
||||
case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg);
|
||||
}
|
||||
|
||||
|
@ -80,24 +80,51 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
|
||||
*
|
||||
* @vcpu: the vcpu pointer
|
||||
*
|
||||
* WFE: Yield the CPU and come back to this vcpu when the scheduler
|
||||
* WFE[T]: Yield the CPU and come back to this vcpu when the scheduler
|
||||
* decides to.
|
||||
* WFI: Simply call kvm_vcpu_halt(), which will halt execution of
|
||||
* world-switches and schedule other host processes until there is an
|
||||
* incoming IRQ or FIQ to the VM.
|
||||
* WFIT: Same as WFI, with a timed wakeup implemented as a background timer
|
||||
*
|
||||
* WF{I,E}T can immediately return if the deadline has already expired.
|
||||
*/
|
||||
static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
|
||||
u64 esr = kvm_vcpu_get_esr(vcpu);
|
||||
|
||||
if (esr & ESR_ELx_WFx_ISS_WFE) {
|
||||
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
|
||||
vcpu->stat.wfe_exit_stat++;
|
||||
kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
|
||||
} else {
|
||||
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
|
||||
vcpu->stat.wfi_exit_stat++;
|
||||
kvm_vcpu_wfi(vcpu);
|
||||
}
|
||||
|
||||
if (esr & ESR_ELx_WFx_ISS_WFxT) {
|
||||
if (esr & ESR_ELx_WFx_ISS_RV) {
|
||||
u64 val, now;
|
||||
|
||||
now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT);
|
||||
val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
|
||||
|
||||
if (now >= val)
|
||||
goto out;
|
||||
} else {
|
||||
/* Treat WFxT as WFx if RN is invalid */
|
||||
esr &= ~ESR_ELx_WFx_ISS_WFxT;
|
||||
}
|
||||
}
|
||||
|
||||
if (esr & ESR_ELx_WFx_ISS_WFE) {
|
||||
kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
|
||||
} else {
|
||||
if (esr & ESR_ELx_WFx_ISS_WFxT)
|
||||
vcpu->arch.flags |= KVM_ARM64_WFIT;
|
||||
|
||||
kvm_vcpu_wfi(vcpu);
|
||||
}
|
||||
out:
|
||||
kvm_incr_pc(vcpu);
|
||||
|
||||
return 1;
|
||||
@ -169,6 +196,7 @@ static exit_handle_fn arm_exit_handlers[] = {
|
||||
[ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
|
||||
[ESR_ELx_EC_CP14_MR] = kvm_handle_cp14_32,
|
||||
[ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store,
|
||||
[ESR_ELx_EC_CP10_ID] = kvm_handle_cp10_id,
|
||||
[ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64,
|
||||
[ESR_ELx_EC_HVC32] = handle_hvc,
|
||||
[ESR_ELx_EC_SMC32] = handle_smc,
|
||||
@ -297,13 +325,8 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
|
||||
u64 elr_in_kimg = __phys_to_kimg(elr_phys);
|
||||
u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
|
||||
u64 mode = spsr & PSR_MODE_MASK;
|
||||
u64 panic_addr = elr_virt + hyp_offset;
|
||||
|
||||
/*
|
||||
* The nVHE hyp symbols are not included by kallsyms to avoid issues
|
||||
* with aliasing. That means that the symbols cannot be printed with the
|
||||
* "%pS" format specifier, so fall back to the vmlinux address if
|
||||
* there's no better option.
|
||||
*/
|
||||
if (mode != PSR_MODE_EL2t && mode != PSR_MODE_EL2h) {
|
||||
kvm_err("Invalid host exception to nVHE hyp!\n");
|
||||
} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
|
||||
@ -323,9 +346,11 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
|
||||
if (file)
|
||||
kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
|
||||
else
|
||||
kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset);
|
||||
kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr,
|
||||
(void *)panic_addr);
|
||||
} else {
|
||||
kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset);
|
||||
kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr,
|
||||
(void *)panic_addr);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -19,8 +19,10 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
|
||||
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
|
||||
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
|
||||
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
|
||||
unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
|
||||
enum kvm_pgtable_prot prot);
|
||||
int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
|
||||
enum kvm_pgtable_prot prot,
|
||||
unsigned long *haddr);
|
||||
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
|
||||
|
||||
static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
|
||||
unsigned long *start, unsigned long *end)
|
||||
|
@ -80,7 +80,7 @@ SYM_FUNC_START(__hyp_do_panic)
|
||||
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
|
||||
PSR_MODE_EL1h)
|
||||
msr spsr_el2, lr
|
||||
ldr lr, =nvhe_hyp_panic_handler
|
||||
adr_l lr, nvhe_hyp_panic_handler
|
||||
hyp_kimg_va lr, x6
|
||||
msr elr_el2, lr
|
||||
|
||||
@ -125,13 +125,11 @@ alternative_else_nop_endif
|
||||
add sp, sp, #16
|
||||
/*
|
||||
* Compute the idmap address of __kvm_handle_stub_hvc and
|
||||
* jump there. Since we use kimage_voffset, do not use the
|
||||
* HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead
|
||||
* (by loading it from the constant pool).
|
||||
* jump there.
|
||||
*
|
||||
* Preserve x0-x4, which may contain stub parameters.
|
||||
*/
|
||||
ldr x5, =__kvm_handle_stub_hvc
|
||||
adr_l x5, __kvm_handle_stub_hvc
|
||||
hyp_pa x5, x6
|
||||
br x5
|
||||
SYM_FUNC_END(__host_hvc)
|
||||
@ -153,6 +151,18 @@ SYM_FUNC_END(__host_hvc)
|
||||
|
||||
.macro invalid_host_el2_vect
|
||||
.align 7
|
||||
|
||||
/*
|
||||
* Test whether the SP has overflowed, without corrupting a GPR.
|
||||
* nVHE hypervisor stacks are aligned so that the PAGE_SHIFT bit
|
||||
* of SP should always be 1.
|
||||
*/
|
||||
add sp, sp, x0 // sp' = sp + x0
|
||||
sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
|
||||
tbz x0, #PAGE_SHIFT, .L__hyp_sp_overflow\@
|
||||
sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
|
||||
sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
|
||||
|
||||
/* If a guest is loaded, panic out of it. */
|
||||
stp x0, x1, [sp, #-16]!
|
||||
get_loaded_vcpu x0, x1
|
||||
@ -165,6 +175,18 @@ SYM_FUNC_END(__host_hvc)
|
||||
* been partially clobbered by __host_enter.
|
||||
*/
|
||||
b hyp_panic
|
||||
|
||||
.L__hyp_sp_overflow\@:
|
||||
/*
|
||||
* Reset SP to the top of the stack, to allow handling the hyp_panic.
|
||||
* This corrupts the stack but is ok, since we won't be attempting
|
||||
* any unwinding here.
|
||||
*/
|
||||
ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1
|
||||
mov sp, x0
|
||||
|
||||
b hyp_panic_bad_stack
|
||||
ASM_BUG()
|
||||
.endm
|
||||
|
||||
.macro invalid_host_el1_vect
|
||||
|
@ -160,7 +160,23 @@ static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ct
|
||||
DECLARE_REG(size_t, size, host_ctxt, 2);
|
||||
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot);
|
||||
/*
|
||||
* __pkvm_create_private_mapping() populates a pointer with the
|
||||
* hypervisor start address of the allocation.
|
||||
*
|
||||
* However, handle___pkvm_create_private_mapping() hypercall crosses the
|
||||
* EL1/EL2 boundary so the pointer would not be valid in this context.
|
||||
*
|
||||
* Instead pass the allocation address as the return value (or return
|
||||
* ERR_PTR() on failure).
|
||||
*/
|
||||
unsigned long haddr;
|
||||
int err = __pkvm_create_private_mapping(phys, size, prot, &haddr);
|
||||
|
||||
if (err)
|
||||
haddr = (unsigned long)ERR_PTR(err);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = haddr;
|
||||
}
|
||||
|
||||
static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
|
||||
|
@ -37,36 +37,60 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
|
||||
return err;
|
||||
}
|
||||
|
||||
unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
|
||||
enum kvm_pgtable_prot prot)
|
||||
/**
|
||||
* pkvm_alloc_private_va_range - Allocates a private VA range.
|
||||
* @size: The size of the VA range to reserve.
|
||||
* @haddr: The hypervisor virtual start address of the allocation.
|
||||
*
|
||||
* The private virtual address (VA) range is allocated above __io_map_base
|
||||
* and aligned based on the order of @size.
|
||||
*
|
||||
* Return: 0 on success or negative error code on failure.
|
||||
*/
|
||||
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
|
||||
{
|
||||
unsigned long base, addr;
|
||||
int ret = 0;
|
||||
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
|
||||
/* Align the allocation based on the order of its size */
|
||||
addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
|
||||
|
||||
/* The allocated size is always a multiple of PAGE_SIZE */
|
||||
base = addr + PAGE_ALIGN(size);
|
||||
|
||||
/* Are we overflowing on the vmemmap ? */
|
||||
if (!addr || base > __hyp_vmemmap)
|
||||
ret = -ENOMEM;
|
||||
else {
|
||||
__io_map_base = base;
|
||||
*haddr = addr;
|
||||
}
|
||||
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
|
||||
enum kvm_pgtable_prot prot,
|
||||
unsigned long *haddr)
|
||||
{
|
||||
unsigned long addr;
|
||||
int err;
|
||||
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
|
||||
size = PAGE_ALIGN(size + offset_in_page(phys));
|
||||
addr = __io_map_base;
|
||||
__io_map_base += size;
|
||||
err = pkvm_alloc_private_va_range(size, &addr);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Are we overflowing on the vmemmap ? */
|
||||
if (__io_map_base > __hyp_vmemmap) {
|
||||
__io_map_base -= size;
|
||||
addr = (unsigned long)ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
}
|
||||
err = __pkvm_create_mappings(addr, size, phys, prot);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot);
|
||||
if (err) {
|
||||
addr = (unsigned long)ERR_PTR(err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
addr = addr + offset_in_page(phys);
|
||||
out:
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
|
||||
return addr;
|
||||
*haddr = addr + offset_in_page(phys);
|
||||
return err;
|
||||
}
|
||||
|
||||
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
|
||||
@ -146,7 +170,8 @@ int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot)
|
||||
int hyp_map_vectors(void)
|
||||
{
|
||||
phys_addr_t phys;
|
||||
void *bp_base;
|
||||
unsigned long bp_base;
|
||||
int ret;
|
||||
|
||||
if (!kvm_system_needs_idmapped_vectors()) {
|
||||
__hyp_bp_vect_base = __bp_harden_hyp_vecs;
|
||||
@ -154,13 +179,12 @@ int hyp_map_vectors(void)
|
||||
}
|
||||
|
||||
phys = __hyp_pa(__bp_harden_hyp_vecs);
|
||||
bp_base = (void *)__pkvm_create_private_mapping(phys,
|
||||
__BP_HARDEN_HYP_VECS_SZ,
|
||||
PAGE_HYP_EXEC);
|
||||
if (IS_ERR_OR_NULL(bp_base))
|
||||
return PTR_ERR(bp_base);
|
||||
ret = __pkvm_create_private_mapping(phys, __BP_HARDEN_HYP_VECS_SZ,
|
||||
PAGE_HYP_EXEC, &bp_base);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
__hyp_bp_vect_base = bp_base;
|
||||
__hyp_bp_vect_base = (void *)bp_base;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -99,17 +99,42 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < hyp_nr_cpus; i++) {
|
||||
struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
|
||||
unsigned long hyp_addr;
|
||||
|
||||
start = (void *)kern_hyp_va(per_cpu_base[i]);
|
||||
end = start + PAGE_ALIGN(hyp_percpu_size);
|
||||
ret = pkvm_create_mappings(start, end, PAGE_HYP);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
end = (void *)per_cpu_ptr(&kvm_init_params, i)->stack_hyp_va;
|
||||
start = end - PAGE_SIZE;
|
||||
ret = pkvm_create_mappings(start, end, PAGE_HYP);
|
||||
/*
|
||||
* Allocate a contiguous HYP private VA range for the stack
|
||||
* and guard page. The allocation is also aligned based on
|
||||
* the order of its size.
|
||||
*/
|
||||
ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Since the stack grows downwards, map the stack to the page
|
||||
* at the higher address and leave the lower guard page
|
||||
* unbacked.
|
||||
*
|
||||
* Any valid stack address now has the PAGE_SHIFT bit as 1
|
||||
* and addresses corresponding to the guard page have the
|
||||
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
|
||||
*/
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
|
||||
PAGE_SIZE, params->stack_pa, PAGE_HYP);
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Update stack_hyp_va to end of the stack's private VA range */
|
||||
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -150,16 +150,13 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Disable host events, enable guest events
|
||||
*/
|
||||
static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
|
||||
#ifdef CONFIG_HW_PERF_EVENTS
|
||||
static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_host_data *host;
|
||||
struct kvm_pmu_events *pmu;
|
||||
|
||||
host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
|
||||
pmu = &host->pmu_events;
|
||||
struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
|
||||
|
||||
if (pmu->events_host)
|
||||
write_sysreg(pmu->events_host, pmcntenclr_el0);
|
||||
@ -170,16 +167,12 @@ static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
|
||||
return (pmu->events_host || pmu->events_guest);
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Disable guest events, enable host events
|
||||
*/
|
||||
static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
|
||||
static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_host_data *host;
|
||||
struct kvm_pmu_events *pmu;
|
||||
|
||||
host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
|
||||
pmu = &host->pmu_events;
|
||||
struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
|
||||
|
||||
if (pmu->events_guest)
|
||||
write_sysreg(pmu->events_guest, pmcntenclr_el0);
|
||||
@ -187,8 +180,12 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
|
||||
if (pmu->events_host)
|
||||
write_sysreg(pmu->events_host, pmcntenset_el0);
|
||||
}
|
||||
#else
|
||||
#define __pmu_switch_to_guest(v) ({ false; })
|
||||
#define __pmu_switch_to_host(v) do {} while (0)
|
||||
#endif
|
||||
|
||||
/**
|
||||
/*
|
||||
* Handler for protected VM MSR, MRS or System instruction execution in AArch64.
|
||||
*
|
||||
* Returns true if the hypervisor has handled the exit, and control should go
|
||||
@ -205,23 +202,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
kvm_handle_pvm_sysreg(vcpu, exit_code));
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler for protected floating-point and Advanced SIMD accesses.
|
||||
*
|
||||
* Returns true if the hypervisor has handled the exit, and control should go
|
||||
* back to the guest, or false if it hasn't.
|
||||
*/
|
||||
static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
/* Linux guests assume support for floating-point and Advanced SIMD. */
|
||||
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
|
||||
PVM_ID_AA64PFR0_ALLOW));
|
||||
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
|
||||
PVM_ID_AA64PFR0_ALLOW));
|
||||
|
||||
return kvm_hyp_handle_fpsimd(vcpu, exit_code);
|
||||
}
|
||||
|
||||
static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
|
||||
@ -237,7 +217,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
|
||||
[ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
|
||||
[ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd,
|
||||
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
|
||||
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
|
||||
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
|
||||
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
|
||||
@ -304,7 +284,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
host_ctxt->__hyp_running_vcpu = vcpu;
|
||||
guest_ctxt = &vcpu->arch.ctxt;
|
||||
|
||||
pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
|
||||
pmu_switch_needed = __pmu_switch_to_guest(vcpu);
|
||||
|
||||
__sysreg_save_state_nvhe(host_ctxt);
|
||||
/*
|
||||
@ -366,7 +346,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
__debug_restore_host_buffers_nvhe(vcpu);
|
||||
|
||||
if (pmu_switch_needed)
|
||||
__pmu_switch_to_host(host_ctxt);
|
||||
__pmu_switch_to_host(vcpu);
|
||||
|
||||
/* Returning to host will clear PSR.I, remask PMR if needed */
|
||||
if (system_uses_irq_prio_masking())
|
||||
@ -377,7 +357,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
return exit_code;
|
||||
}
|
||||
|
||||
void __noreturn hyp_panic(void)
|
||||
asmlinkage void __noreturn hyp_panic(void)
|
||||
{
|
||||
u64 spsr = read_sysreg_el2(SYS_SPSR);
|
||||
u64 elr = read_sysreg_el2(SYS_ELR);
|
||||
@ -399,6 +379,11 @@ void __noreturn hyp_panic(void)
|
||||
unreachable();
|
||||
}
|
||||
|
||||
asmlinkage void __noreturn hyp_panic_bad_stack(void)
|
||||
{
|
||||
hyp_panic();
|
||||
}
|
||||
|
||||
asmlinkage void kvm_unexpected_el2_exception(void)
|
||||
{
|
||||
return __kvm_unexpected_el2_exception();
|
||||
|
@ -90,9 +90,6 @@ static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
|
||||
u64 set_mask = 0;
|
||||
u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
|
||||
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
|
||||
|
||||
set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
|
||||
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
|
||||
|
||||
|
@ -9,6 +9,13 @@
|
||||
#include <kvm/arm_hypercalls.h>
|
||||
#include <kvm/arm_psci.h>
|
||||
|
||||
#define KVM_ARM_SMCCC_STD_FEATURES \
|
||||
GENMASK(KVM_REG_ARM_STD_BMAP_BIT_COUNT - 1, 0)
|
||||
#define KVM_ARM_SMCCC_STD_HYP_FEATURES \
|
||||
GENMASK(KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT - 1, 0)
|
||||
#define KVM_ARM_SMCCC_VENDOR_HYP_FEATURES \
|
||||
GENMASK(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT - 1, 0)
|
||||
|
||||
static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
|
||||
{
|
||||
struct system_time_snapshot systime_snapshot;
|
||||
@ -58,13 +65,73 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
|
||||
val[3] = lower_32_bits(cycles);
|
||||
}
|
||||
|
||||
static bool kvm_hvc_call_default_allowed(u32 func_id)
|
||||
{
|
||||
switch (func_id) {
|
||||
/*
|
||||
* List of function-ids that are not gated with the bitmapped
|
||||
* feature firmware registers, and are to be allowed for
|
||||
* servicing the call by default.
|
||||
*/
|
||||
case ARM_SMCCC_VERSION_FUNC_ID:
|
||||
case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
|
||||
return true;
|
||||
default:
|
||||
/* PSCI 0.2 and up is in the 0:0x1f range */
|
||||
if (ARM_SMCCC_OWNER_NUM(func_id) == ARM_SMCCC_OWNER_STANDARD &&
|
||||
ARM_SMCCC_FUNC_NUM(func_id) <= 0x1f)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* KVM's PSCI 0.1 doesn't comply with SMCCC, and has
|
||||
* its own function-id base and range
|
||||
*/
|
||||
if (func_id >= KVM_PSCI_FN(0) && func_id <= KVM_PSCI_FN(3))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id)
|
||||
{
|
||||
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
|
||||
|
||||
switch (func_id) {
|
||||
case ARM_SMCCC_TRNG_VERSION:
|
||||
case ARM_SMCCC_TRNG_FEATURES:
|
||||
case ARM_SMCCC_TRNG_GET_UUID:
|
||||
case ARM_SMCCC_TRNG_RND32:
|
||||
case ARM_SMCCC_TRNG_RND64:
|
||||
return test_bit(KVM_REG_ARM_STD_BIT_TRNG_V1_0,
|
||||
&smccc_feat->std_bmap);
|
||||
case ARM_SMCCC_HV_PV_TIME_FEATURES:
|
||||
case ARM_SMCCC_HV_PV_TIME_ST:
|
||||
return test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME,
|
||||
&smccc_feat->std_hyp_bmap);
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
|
||||
case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID:
|
||||
return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT,
|
||||
&smccc_feat->vendor_hyp_bmap);
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
|
||||
return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP,
|
||||
&smccc_feat->vendor_hyp_bmap);
|
||||
default:
|
||||
return kvm_hvc_call_default_allowed(func_id);
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
|
||||
u32 func_id = smccc_get_function(vcpu);
|
||||
u64 val[4] = {SMCCC_RET_NOT_SUPPORTED};
|
||||
u32 feature;
|
||||
gpa_t gpa;
|
||||
|
||||
if (!kvm_hvc_call_allowed(vcpu, func_id))
|
||||
goto out;
|
||||
|
||||
switch (func_id) {
|
||||
case ARM_SMCCC_VERSION_FUNC_ID:
|
||||
val[0] = ARM_SMCCC_VERSION_1_1;
|
||||
@ -120,7 +187,9 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
break;
|
||||
case ARM_SMCCC_HV_PV_TIME_FEATURES:
|
||||
val[0] = SMCCC_RET_SUCCESS;
|
||||
if (test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME,
|
||||
&smccc_feat->std_hyp_bmap))
|
||||
val[0] = SMCCC_RET_SUCCESS;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@ -139,8 +208,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
||||
val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3;
|
||||
break;
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
|
||||
val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES);
|
||||
val[0] |= BIT(ARM_SMCCC_KVM_FUNC_PTP);
|
||||
val[0] = smccc_feat->vendor_hyp_bmap;
|
||||
break;
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
|
||||
kvm_ptp_get_time(vcpu, val);
|
||||
@ -155,6 +223,259 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
||||
return kvm_psci_call(vcpu);
|
||||
}
|
||||
|
||||
out:
|
||||
smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static const u64 kvm_arm_fw_reg_ids[] = {
|
||||
KVM_REG_ARM_PSCI_VERSION,
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1,
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2,
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3,
|
||||
KVM_REG_ARM_STD_BMAP,
|
||||
KVM_REG_ARM_STD_HYP_BMAP,
|
||||
KVM_REG_ARM_VENDOR_HYP_BMAP,
|
||||
};
|
||||
|
||||
void kvm_arm_init_hypercalls(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat;
|
||||
|
||||
smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES;
|
||||
smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES;
|
||||
smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
|
||||
}
|
||||
|
||||
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return ARRAY_SIZE(kvm_arm_fw_reg_ids);
|
||||
}
|
||||
|
||||
int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(kvm_arm_fw_reg_ids); i++) {
|
||||
if (put_user(kvm_arm_fw_reg_ids[i], uindices++))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define KVM_REG_FEATURE_LEVEL_MASK GENMASK(3, 0)
|
||||
|
||||
/*
|
||||
* Convert the workaround level into an easy-to-compare number, where higher
|
||||
* values mean better protection.
|
||||
*/
|
||||
static int get_kernel_wa_level(u64 regid)
|
||||
{
|
||||
switch (regid) {
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
switch (arm64_get_spectre_v2_state()) {
|
||||
case SPECTRE_VULNERABLE:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
|
||||
case SPECTRE_MITIGATED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
|
||||
case SPECTRE_UNAFFECTED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
|
||||
}
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
switch (arm64_get_spectre_v4_state()) {
|
||||
case SPECTRE_MITIGATED:
|
||||
/*
|
||||
* As for the hypercall discovery, we pretend we
|
||||
* don't have any FW mitigation if SSBS is there at
|
||||
* all times.
|
||||
*/
|
||||
if (cpus_have_final_cap(ARM64_SSBS))
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
|
||||
fallthrough;
|
||||
case SPECTRE_UNAFFECTED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
|
||||
case SPECTRE_VULNERABLE:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
|
||||
}
|
||||
break;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
|
||||
switch (arm64_get_spectre_bhb_state()) {
|
||||
case SPECTRE_VULNERABLE:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
|
||||
case SPECTRE_MITIGATED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL;
|
||||
case SPECTRE_UNAFFECTED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED;
|
||||
}
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
{
|
||||
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
|
||||
void __user *uaddr = (void __user *)(long)reg->addr;
|
||||
u64 val;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_ARM_PSCI_VERSION:
|
||||
val = kvm_psci_version(vcpu);
|
||||
break;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
|
||||
val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
|
||||
break;
|
||||
case KVM_REG_ARM_STD_BMAP:
|
||||
val = READ_ONCE(smccc_feat->std_bmap);
|
||||
break;
|
||||
case KVM_REG_ARM_STD_HYP_BMAP:
|
||||
val = READ_ONCE(smccc_feat->std_hyp_bmap);
|
||||
break;
|
||||
case KVM_REG_ARM_VENDOR_HYP_BMAP:
|
||||
val = READ_ONCE(smccc_feat->vendor_hyp_bmap);
|
||||
break;
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat;
|
||||
unsigned long *fw_reg_bmap, fw_reg_features;
|
||||
|
||||
switch (reg_id) {
|
||||
case KVM_REG_ARM_STD_BMAP:
|
||||
fw_reg_bmap = &smccc_feat->std_bmap;
|
||||
fw_reg_features = KVM_ARM_SMCCC_STD_FEATURES;
|
||||
break;
|
||||
case KVM_REG_ARM_STD_HYP_BMAP:
|
||||
fw_reg_bmap = &smccc_feat->std_hyp_bmap;
|
||||
fw_reg_features = KVM_ARM_SMCCC_STD_HYP_FEATURES;
|
||||
break;
|
||||
case KVM_REG_ARM_VENDOR_HYP_BMAP:
|
||||
fw_reg_bmap = &smccc_feat->vendor_hyp_bmap;
|
||||
fw_reg_features = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
|
||||
break;
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/* Check for unsupported bit */
|
||||
if (val & ~fw_reg_features)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) &&
|
||||
val != *fw_reg_bmap) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
WRITE_ONCE(*fw_reg_bmap, val);
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
{
|
||||
void __user *uaddr = (void __user *)(long)reg->addr;
|
||||
u64 val;
|
||||
int wa_level;
|
||||
|
||||
if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_ARM_PSCI_VERSION:
|
||||
{
|
||||
bool wants_02;
|
||||
|
||||
wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
|
||||
|
||||
switch (val) {
|
||||
case KVM_ARM_PSCI_0_1:
|
||||
if (wants_02)
|
||||
return -EINVAL;
|
||||
vcpu->kvm->arch.psci_version = val;
|
||||
return 0;
|
||||
case KVM_ARM_PSCI_0_2:
|
||||
case KVM_ARM_PSCI_1_0:
|
||||
case KVM_ARM_PSCI_1_1:
|
||||
if (!wants_02)
|
||||
return -EINVAL;
|
||||
vcpu->kvm->arch.psci_version = val;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
|
||||
if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (get_kernel_wa_level(reg->id) < val)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
|
||||
return -EINVAL;
|
||||
|
||||
/* The enabled bit must not be set unless the level is AVAIL. */
|
||||
if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) &&
|
||||
(val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Map all the possible incoming states to the only two we
|
||||
* really want to deal with.
|
||||
*/
|
||||
switch (val & KVM_REG_FEATURE_LEVEL_MASK) {
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
|
||||
wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
|
||||
break;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
|
||||
wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
|
||||
* other way around.
|
||||
*/
|
||||
if (get_kernel_wa_level(reg->id) < wa_level)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
case KVM_REG_ARM_STD_BMAP:
|
||||
case KVM_REG_ARM_STD_HYP_BMAP:
|
||||
case KVM_REG_ARM_VENDOR_HYP_BMAP:
|
||||
return kvm_arm_set_fw_reg_bmap(vcpu, reg->id, val);
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -258,8 +258,8 @@ static bool kvm_host_owns_hyp_mappings(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __create_hyp_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot)
|
||||
int __create_hyp_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
int err;
|
||||
|
||||
@ -457,23 +457,22 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
|
||||
unsigned long *haddr,
|
||||
enum kvm_pgtable_prot prot)
|
||||
|
||||
/**
|
||||
* hyp_alloc_private_va_range - Allocates a private VA range.
|
||||
* @size: The size of the VA range to reserve.
|
||||
* @haddr: The hypervisor virtual start address of the allocation.
|
||||
*
|
||||
* The private virtual address (VA) range is allocated below io_map_base
|
||||
* and aligned based on the order of @size.
|
||||
*
|
||||
* Return: 0 on success or negative error code on failure.
|
||||
*/
|
||||
int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
|
||||
{
|
||||
unsigned long base;
|
||||
int ret = 0;
|
||||
|
||||
if (!kvm_host_owns_hyp_mappings()) {
|
||||
base = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
|
||||
phys_addr, size, prot);
|
||||
if (IS_ERR_OR_NULL((void *)base))
|
||||
return PTR_ERR((void *)base);
|
||||
*haddr = base;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
mutex_lock(&kvm_hyp_pgd_mutex);
|
||||
|
||||
/*
|
||||
@ -484,8 +483,10 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
|
||||
*
|
||||
* The allocated size is always a multiple of PAGE_SIZE.
|
||||
*/
|
||||
size = PAGE_ALIGN(size + offset_in_page(phys_addr));
|
||||
base = io_map_base - size;
|
||||
base = io_map_base - PAGE_ALIGN(size);
|
||||
|
||||
/* Align the allocation based on the order of its size */
|
||||
base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
|
||||
|
||||
/*
|
||||
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
|
||||
@ -495,19 +496,40 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
|
||||
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
|
||||
ret = -ENOMEM;
|
||||
else
|
||||
io_map_base = base;
|
||||
*haddr = io_map_base = base;
|
||||
|
||||
mutex_unlock(&kvm_hyp_pgd_mutex);
|
||||
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = __create_hyp_mappings(base, size, phys_addr, prot);
|
||||
if (ret)
|
||||
goto out;
|
||||
static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
|
||||
unsigned long *haddr,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
unsigned long addr;
|
||||
int ret = 0;
|
||||
|
||||
*haddr = base + offset_in_page(phys_addr);
|
||||
out:
|
||||
if (!kvm_host_owns_hyp_mappings()) {
|
||||
addr = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
|
||||
phys_addr, size, prot);
|
||||
if (IS_ERR_VALUE(addr))
|
||||
return addr;
|
||||
*haddr = addr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size = PAGE_ALIGN(size + offset_in_page(phys_addr));
|
||||
ret = hyp_alloc_private_va_range(size, &addr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = __create_hyp_mappings(addr, size, phys_addr, prot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
*haddr = addr + offset_in_page(phys_addr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -774,8 +774,7 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
|
||||
{
|
||||
struct arm_pmu_entry *entry;
|
||||
|
||||
if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF ||
|
||||
is_protected_kvm_enabled())
|
||||
if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
|
||||
return;
|
||||
|
||||
mutex_lock(&arm_pmus_lock);
|
||||
|
@ -5,7 +5,8 @@
|
||||
*/
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
|
||||
static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events);
|
||||
|
||||
/*
|
||||
* Given the perf event attributes and system type, determine
|
||||
@ -25,21 +26,26 @@ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
|
||||
return (attr->exclude_host != attr->exclude_guest);
|
||||
}
|
||||
|
||||
struct kvm_pmu_events *kvm_get_pmu_events(void)
|
||||
{
|
||||
return this_cpu_ptr(&kvm_pmu_events);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add events to track that we may want to switch at guest entry/exit
|
||||
* time.
|
||||
*/
|
||||
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
|
||||
{
|
||||
struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
|
||||
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
|
||||
|
||||
if (!kvm_arm_support_pmu_v3() || !ctx || !kvm_pmu_switch_needed(attr))
|
||||
if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr))
|
||||
return;
|
||||
|
||||
if (!attr->exclude_host)
|
||||
ctx->pmu_events.events_host |= set;
|
||||
pmu->events_host |= set;
|
||||
if (!attr->exclude_guest)
|
||||
ctx->pmu_events.events_guest |= set;
|
||||
pmu->events_guest |= set;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -47,13 +53,13 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
|
||||
*/
|
||||
void kvm_clr_pmu_events(u32 clr)
|
||||
{
|
||||
struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
|
||||
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
|
||||
|
||||
if (!kvm_arm_support_pmu_v3() || !ctx)
|
||||
if (!kvm_arm_support_pmu_v3() || !pmu)
|
||||
return;
|
||||
|
||||
ctx->pmu_events.events_host &= ~clr;
|
||||
ctx->pmu_events.events_guest &= ~clr;
|
||||
pmu->events_host &= ~clr;
|
||||
pmu->events_guest &= ~clr;
|
||||
}
|
||||
|
||||
#define PMEVTYPER_READ_CASE(idx) \
|
||||
@ -169,16 +175,16 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events)
|
||||
*/
|
||||
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_host_data *host;
|
||||
struct kvm_pmu_events *pmu;
|
||||
u32 events_guest, events_host;
|
||||
|
||||
if (!kvm_arm_support_pmu_v3() || !has_vhe())
|
||||
return;
|
||||
|
||||
preempt_disable();
|
||||
host = this_cpu_ptr_hyp_sym(kvm_host_data);
|
||||
events_guest = host->pmu_events.events_guest;
|
||||
events_host = host->pmu_events.events_host;
|
||||
pmu = kvm_get_pmu_events();
|
||||
events_guest = pmu->events_guest;
|
||||
events_host = pmu->events_host;
|
||||
|
||||
kvm_vcpu_pmu_enable_el0(events_guest);
|
||||
kvm_vcpu_pmu_disable_el0(events_host);
|
||||
@ -190,15 +196,15 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_host_data *host;
|
||||
struct kvm_pmu_events *pmu;
|
||||
u32 events_guest, events_host;
|
||||
|
||||
if (!kvm_arm_support_pmu_v3() || !has_vhe())
|
||||
return;
|
||||
|
||||
host = this_cpu_ptr_hyp_sym(kvm_host_data);
|
||||
events_guest = host->pmu_events.events_guest;
|
||||
events_host = host->pmu_events.events_host;
|
||||
pmu = kvm_get_pmu_events();
|
||||
events_guest = pmu->events_guest;
|
||||
events_host = pmu->events_host;
|
||||
|
||||
kvm_vcpu_pmu_enable_el0(events_host);
|
||||
kvm_vcpu_pmu_disable_el0(events_guest);
|
||||
|
@ -51,13 +51,6 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
|
||||
return PSCI_RET_SUCCESS;
|
||||
}
|
||||
|
||||
static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.power_off = true;
|
||||
kvm_make_request(KVM_REQ_SLEEP, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu,
|
||||
unsigned long affinity)
|
||||
{
|
||||
@ -83,7 +76,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
||||
*/
|
||||
if (!vcpu)
|
||||
return PSCI_RET_INVALID_PARAMS;
|
||||
if (!vcpu->arch.power_off) {
|
||||
if (!kvm_arm_vcpu_stopped(vcpu)) {
|
||||
if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
|
||||
return PSCI_RET_ALREADY_ON;
|
||||
else
|
||||
@ -107,12 +100,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
||||
kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
|
||||
|
||||
/*
|
||||
* Make sure the reset request is observed if the change to
|
||||
* power_off is observed.
|
||||
* Make sure the reset request is observed if the RUNNABLE mp_state is
|
||||
* observed.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
vcpu->arch.power_off = false;
|
||||
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
|
||||
return PSCI_RET_SUCCESS;
|
||||
@ -150,7 +143,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
|
||||
mpidr = kvm_vcpu_get_mpidr_aff(tmp);
|
||||
if ((mpidr & target_affinity_mask) == target_affinity) {
|
||||
matching_cpus++;
|
||||
if (!tmp->arch.power_off)
|
||||
if (!kvm_arm_vcpu_stopped(tmp))
|
||||
return PSCI_0_2_AFFINITY_LEVEL_ON;
|
||||
}
|
||||
}
|
||||
@ -176,7 +169,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
|
||||
* re-initialized.
|
||||
*/
|
||||
kvm_for_each_vcpu(i, tmp, vcpu->kvm)
|
||||
tmp->arch.power_off = true;
|
||||
tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
|
||||
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
|
||||
|
||||
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
|
||||
@ -202,6 +195,15 @@ static void kvm_psci_system_reset2(struct kvm_vcpu *vcpu)
|
||||
KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2);
|
||||
}
|
||||
|
||||
static void kvm_psci_system_suspend(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_run *run = vcpu->run;
|
||||
|
||||
memset(&run->system_event, 0, sizeof(vcpu->run->system_event));
|
||||
run->system_event.type = KVM_SYSTEM_EVENT_SUSPEND;
|
||||
run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
|
||||
}
|
||||
|
||||
static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
@ -245,7 +247,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
|
||||
val = kvm_psci_vcpu_suspend(vcpu);
|
||||
break;
|
||||
case PSCI_0_2_FN_CPU_OFF:
|
||||
kvm_psci_vcpu_off(vcpu);
|
||||
kvm_arm_vcpu_power_off(vcpu);
|
||||
val = PSCI_RET_SUCCESS;
|
||||
break;
|
||||
case PSCI_0_2_FN_CPU_ON:
|
||||
@ -305,9 +307,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
|
||||
{
|
||||
unsigned long val = PSCI_RET_NOT_SUPPORTED;
|
||||
u32 psci_fn = smccc_get_function(vcpu);
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
u32 arg;
|
||||
unsigned long val;
|
||||
int ret = 1;
|
||||
|
||||
switch(psci_fn) {
|
||||
@ -320,6 +323,8 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
|
||||
if (val)
|
||||
break;
|
||||
|
||||
val = PSCI_RET_NOT_SUPPORTED;
|
||||
|
||||
switch(arg) {
|
||||
case PSCI_0_2_FN_PSCI_VERSION:
|
||||
case PSCI_0_2_FN_CPU_SUSPEND:
|
||||
@ -336,18 +341,32 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
|
||||
case ARM_SMCCC_VERSION_FUNC_ID:
|
||||
val = 0;
|
||||
break;
|
||||
case PSCI_1_0_FN_SYSTEM_SUSPEND:
|
||||
case PSCI_1_0_FN64_SYSTEM_SUSPEND:
|
||||
if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags))
|
||||
val = 0;
|
||||
break;
|
||||
case PSCI_1_1_FN_SYSTEM_RESET2:
|
||||
case PSCI_1_1_FN64_SYSTEM_RESET2:
|
||||
if (minor >= 1) {
|
||||
if (minor >= 1)
|
||||
val = 0;
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
default:
|
||||
val = PSCI_RET_NOT_SUPPORTED;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case PSCI_1_0_FN_SYSTEM_SUSPEND:
|
||||
kvm_psci_narrow_to_32bit(vcpu);
|
||||
fallthrough;
|
||||
case PSCI_1_0_FN64_SYSTEM_SUSPEND:
|
||||
/*
|
||||
* Return directly to userspace without changing the vCPU's
|
||||
* registers. Userspace depends on reading the SMCCC parameters
|
||||
* to implement SYSTEM_SUSPEND.
|
||||
*/
|
||||
if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags)) {
|
||||
kvm_psci_system_suspend(vcpu);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case PSCI_1_1_FN_SYSTEM_RESET2:
|
||||
kvm_psci_narrow_to_32bit(vcpu);
|
||||
fallthrough;
|
||||
@ -365,7 +384,7 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
|
||||
val = PSCI_RET_INVALID_PARAMS;
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
break;
|
||||
default:
|
||||
return kvm_psci_0_2_call(vcpu);
|
||||
}
|
||||
@ -382,7 +401,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
|
||||
|
||||
switch (psci_fn) {
|
||||
case KVM_PSCI_FN_CPU_OFF:
|
||||
kvm_psci_vcpu_off(vcpu);
|
||||
kvm_arm_vcpu_power_off(vcpu);
|
||||
val = PSCI_RET_SUCCESS;
|
||||
break;
|
||||
case KVM_PSCI_FN_CPU_ON:
|
||||
@ -437,186 +456,3 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 4; /* PSCI version and three workaround registers */
|
||||
}
|
||||
|
||||
int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
|
||||
{
|
||||
if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
|
||||
return -EFAULT;
|
||||
|
||||
if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
|
||||
return -EFAULT;
|
||||
|
||||
if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
|
||||
return -EFAULT;
|
||||
|
||||
if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define KVM_REG_FEATURE_LEVEL_WIDTH 4
|
||||
#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
|
||||
|
||||
/*
|
||||
* Convert the workaround level into an easy-to-compare number, where higher
|
||||
* values mean better protection.
|
||||
*/
|
||||
static int get_kernel_wa_level(u64 regid)
|
||||
{
|
||||
switch (regid) {
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
switch (arm64_get_spectre_v2_state()) {
|
||||
case SPECTRE_VULNERABLE:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
|
||||
case SPECTRE_MITIGATED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
|
||||
case SPECTRE_UNAFFECTED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
|
||||
}
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
switch (arm64_get_spectre_v4_state()) {
|
||||
case SPECTRE_MITIGATED:
|
||||
/*
|
||||
* As for the hypercall discovery, we pretend we
|
||||
* don't have any FW mitigation if SSBS is there at
|
||||
* all times.
|
||||
*/
|
||||
if (cpus_have_final_cap(ARM64_SSBS))
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
|
||||
fallthrough;
|
||||
case SPECTRE_UNAFFECTED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
|
||||
case SPECTRE_VULNERABLE:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
|
||||
}
|
||||
break;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
|
||||
switch (arm64_get_spectre_bhb_state()) {
|
||||
case SPECTRE_VULNERABLE:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
|
||||
case SPECTRE_MITIGATED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL;
|
||||
case SPECTRE_UNAFFECTED:
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED;
|
||||
}
|
||||
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
{
|
||||
void __user *uaddr = (void __user *)(long)reg->addr;
|
||||
u64 val;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_ARM_PSCI_VERSION:
|
||||
val = kvm_psci_version(vcpu);
|
||||
break;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
|
||||
val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
|
||||
break;
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
{
|
||||
void __user *uaddr = (void __user *)(long)reg->addr;
|
||||
u64 val;
|
||||
int wa_level;
|
||||
|
||||
if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
switch (reg->id) {
|
||||
case KVM_REG_ARM_PSCI_VERSION:
|
||||
{
|
||||
bool wants_02;
|
||||
|
||||
wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
|
||||
|
||||
switch (val) {
|
||||
case KVM_ARM_PSCI_0_1:
|
||||
if (wants_02)
|
||||
return -EINVAL;
|
||||
vcpu->kvm->arch.psci_version = val;
|
||||
return 0;
|
||||
case KVM_ARM_PSCI_0_2:
|
||||
case KVM_ARM_PSCI_1_0:
|
||||
case KVM_ARM_PSCI_1_1:
|
||||
if (!wants_02)
|
||||
return -EINVAL;
|
||||
vcpu->kvm->arch.psci_version = val;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
|
||||
if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (get_kernel_wa_level(reg->id) < val)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||
if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
|
||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
|
||||
return -EINVAL;
|
||||
|
||||
/* The enabled bit must not be set unless the level is AVAIL. */
|
||||
if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) &&
|
||||
(val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Map all the possible incoming states to the only two we
|
||||
* really want to deal with.
|
||||
*/
|
||||
switch (val & KVM_REG_FEATURE_LEVEL_MASK) {
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
|
||||
wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
|
||||
break;
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
|
||||
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
|
||||
wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
|
||||
* other way around.
|
||||
*/
|
||||
if (get_kernel_wa_level(reg->id) < wa_level)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -1145,6 +1145,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
||||
if (!vcpu_has_ptrauth(vcpu))
|
||||
val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
|
||||
ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
|
||||
if (!cpus_have_final_cap(ARM64_HAS_WFXT))
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_WFXT);
|
||||
break;
|
||||
case SYS_ID_AA64DFR0_EL1:
|
||||
/* Limit debug to ARMv8.0 */
|
||||
@ -2020,20 +2022,22 @@ static const struct sys_reg_desc cp14_64_regs[] = {
|
||||
{ Op1( 0), CRm( 2), .access = trap_raz_wi },
|
||||
};
|
||||
|
||||
#define CP15_PMU_SYS_REG(_map, _Op1, _CRn, _CRm, _Op2) \
|
||||
AA32(_map), \
|
||||
Op1(_Op1), CRn(_CRn), CRm(_CRm), Op2(_Op2), \
|
||||
.visibility = pmu_visibility
|
||||
|
||||
/* Macro to expand the PMEVCNTRn register */
|
||||
#define PMU_PMEVCNTR(n) \
|
||||
/* PMEVCNTRn */ \
|
||||
{ Op1(0), CRn(0b1110), \
|
||||
CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
|
||||
access_pmu_evcntr }
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 0b1110, \
|
||||
(0b1000 | (((n) >> 3) & 0x3)), ((n) & 0x7)), \
|
||||
.access = access_pmu_evcntr }
|
||||
|
||||
/* Macro to expand the PMEVTYPERn register */
|
||||
#define PMU_PMEVTYPER(n) \
|
||||
/* PMEVTYPERn */ \
|
||||
{ Op1(0), CRn(0b1110), \
|
||||
CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
|
||||
access_pmu_evtyper }
|
||||
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 0b1110, \
|
||||
(0b1100 | (((n) >> 3) & 0x3)), ((n) & 0x7)), \
|
||||
.access = access_pmu_evtyper }
|
||||
/*
|
||||
* Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
|
||||
* depending on the way they are accessed (as a 32bit or a 64bit
|
||||
@ -2073,25 +2077,25 @@ static const struct sys_reg_desc cp15_regs[] = {
|
||||
{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
|
||||
|
||||
/* PMU */
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr },
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten },
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten },
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs },
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc },
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr },
|
||||
{ AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
|
||||
{ AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
|
||||
{ Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr },
|
||||
{ Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper },
|
||||
{ Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr },
|
||||
{ Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr },
|
||||
{ Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten },
|
||||
{ Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
|
||||
{ Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
|
||||
{ AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 4), access_pmceid },
|
||||
{ AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 5), access_pmceid },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 0), .access = access_pmcr },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 1), .access = access_pmcnten },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 2), .access = access_pmcnten },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 3), .access = access_pmovs },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 4), .access = access_pmswinc },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 5), .access = access_pmselr },
|
||||
{ CP15_PMU_SYS_REG(LO, 0, 9, 12, 6), .access = access_pmceid },
|
||||
{ CP15_PMU_SYS_REG(LO, 0, 9, 12, 7), .access = access_pmceid },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 0), .access = access_pmu_evcntr },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 1), .access = access_pmu_evtyper },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 2), .access = access_pmu_evcntr },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 0), .access = access_pmuserenr },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 1), .access = access_pminten },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 2), .access = access_pminten },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 3), .access = access_pmovs },
|
||||
{ CP15_PMU_SYS_REG(HI, 0, 9, 14, 4), .access = access_pmceid },
|
||||
{ CP15_PMU_SYS_REG(HI, 0, 9, 14, 5), .access = access_pmceid },
|
||||
/* PMMIR */
|
||||
{ Op1( 0), CRn( 9), CRm(14), Op2( 6), trap_raz_wi },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 6), .access = trap_raz_wi },
|
||||
|
||||
/* PRRR/MAIR0 */
|
||||
{ AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 },
|
||||
@ -2176,7 +2180,7 @@ static const struct sys_reg_desc cp15_regs[] = {
|
||||
PMU_PMEVTYPER(29),
|
||||
PMU_PMEVTYPER(30),
|
||||
/* PMCCFILTR */
|
||||
{ Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 14, 15, 7), .access = access_pmu_evtyper },
|
||||
|
||||
{ Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr },
|
||||
{ Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr },
|
||||
@ -2185,7 +2189,7 @@ static const struct sys_reg_desc cp15_regs[] = {
|
||||
|
||||
static const struct sys_reg_desc cp15_64_regs[] = {
|
||||
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 },
|
||||
{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
|
||||
{ CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr },
|
||||
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
|
||||
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 },
|
||||
{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
|
||||
@ -2193,25 +2197,24 @@ static const struct sys_reg_desc cp15_64_regs[] = {
|
||||
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
|
||||
};
|
||||
|
||||
static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
|
||||
bool is_32)
|
||||
static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
|
||||
bool is_32)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
if (!is_32 && table[i].reg && !table[i].reset) {
|
||||
kvm_err("sys_reg table %p entry %d has lacks reset\n",
|
||||
table, i);
|
||||
return 1;
|
||||
kvm_err("sys_reg table %pS entry %d lacks reset\n", &table[i], i);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
|
||||
kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
|
||||
return 1;
|
||||
kvm_err("sys_reg table %pS entry %d out of order\n", &table[i - 1], i - 1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
|
||||
@ -2252,27 +2255,27 @@ static void perform_access(struct kvm_vcpu *vcpu,
|
||||
* @table: array of trap descriptors
|
||||
* @num: size of the trap descriptor array
|
||||
*
|
||||
* Return 0 if the access has been handled, and -1 if not.
|
||||
* Return true if the access has been handled, false if not.
|
||||
*/
|
||||
static int emulate_cp(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc *table,
|
||||
size_t num)
|
||||
static bool emulate_cp(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc *table,
|
||||
size_t num)
|
||||
{
|
||||
const struct sys_reg_desc *r;
|
||||
|
||||
if (!table)
|
||||
return -1; /* Not handled */
|
||||
return false; /* Not handled */
|
||||
|
||||
r = find_reg(params, table, num);
|
||||
|
||||
if (r) {
|
||||
perform_access(vcpu, params, r);
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Not handled */
|
||||
return -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void unhandled_cp_access(struct kvm_vcpu *vcpu,
|
||||
@ -2336,7 +2339,7 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
|
||||
* potential register operation in the case of a read and return
|
||||
* with success.
|
||||
*/
|
||||
if (!emulate_cp(vcpu, ¶ms, global, nr_global)) {
|
||||
if (emulate_cp(vcpu, ¶ms, global, nr_global)) {
|
||||
/* Split up the value between registers for the read side */
|
||||
if (!params.is_write) {
|
||||
vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
|
||||
@ -2350,34 +2353,144 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params);
|
||||
|
||||
/*
|
||||
* The CP10 ID registers are architecturally mapped to AArch64 feature
|
||||
* registers. Abuse that fact so we can rely on the AArch64 handler for accesses
|
||||
* from AArch32.
|
||||
*/
|
||||
static bool kvm_esr_cp10_id_to_sys64(u64 esr, struct sys_reg_params *params)
|
||||
{
|
||||
u8 reg_id = (esr >> 10) & 0xf;
|
||||
bool valid;
|
||||
|
||||
params->is_write = ((esr & 1) == 0);
|
||||
params->Op0 = 3;
|
||||
params->Op1 = 0;
|
||||
params->CRn = 0;
|
||||
params->CRm = 3;
|
||||
|
||||
/* CP10 ID registers are read-only */
|
||||
valid = !params->is_write;
|
||||
|
||||
switch (reg_id) {
|
||||
/* MVFR0 */
|
||||
case 0b0111:
|
||||
params->Op2 = 0;
|
||||
break;
|
||||
/* MVFR1 */
|
||||
case 0b0110:
|
||||
params->Op2 = 1;
|
||||
break;
|
||||
/* MVFR2 */
|
||||
case 0b0101:
|
||||
params->Op2 = 2;
|
||||
break;
|
||||
default:
|
||||
valid = false;
|
||||
}
|
||||
|
||||
if (valid)
|
||||
return true;
|
||||
|
||||
kvm_pr_unimpl("Unhandled cp10 register %s: %u\n",
|
||||
params->is_write ? "write" : "read", reg_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_handle_cp10_id() - Handles a VMRS trap on guest access to a 'Media and
|
||||
* VFP Register' from AArch32.
|
||||
* @vcpu: The vCPU pointer
|
||||
*
|
||||
* MVFR{0-2} are architecturally mapped to the AArch64 MVFR{0-2}_EL1 registers.
|
||||
* Work out the correct AArch64 system register encoding and reroute to the
|
||||
* AArch64 system register emulation.
|
||||
*/
|
||||
int kvm_handle_cp10_id(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int Rt = kvm_vcpu_sys_get_rt(vcpu);
|
||||
u64 esr = kvm_vcpu_get_esr(vcpu);
|
||||
struct sys_reg_params params;
|
||||
|
||||
/* UNDEF on any unhandled register access */
|
||||
if (!kvm_esr_cp10_id_to_sys64(esr, ¶ms)) {
|
||||
kvm_inject_undefined(vcpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (emulate_sys_reg(vcpu, ¶ms))
|
||||
vcpu_set_reg(vcpu, Rt, params.regval);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_emulate_cp15_id_reg() - Handles an MRC trap on a guest CP15 access where
|
||||
* CRn=0, which corresponds to the AArch32 feature
|
||||
* registers.
|
||||
* @vcpu: the vCPU pointer
|
||||
* @params: the system register access parameters.
|
||||
*
|
||||
* Our cp15 system register tables do not enumerate the AArch32 feature
|
||||
* registers. Conveniently, our AArch64 table does, and the AArch32 system
|
||||
* register encoding can be trivially remapped into the AArch64 for the feature
|
||||
* registers: Append op0=3, leaving op1, CRn, CRm, and op2 the same.
|
||||
*
|
||||
* According to DDI0487G.b G7.3.1, paragraph "Behavior of VMSAv8-32 32-bit
|
||||
* System registers with (coproc=0b1111, CRn==c0)", read accesses from this
|
||||
* range are either UNKNOWN or RES0. Rerouting remains architectural as we
|
||||
* treat undefined registers in this range as RAZ.
|
||||
*/
|
||||
static int kvm_emulate_cp15_id_reg(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params)
|
||||
{
|
||||
int Rt = kvm_vcpu_sys_get_rt(vcpu);
|
||||
|
||||
/* Treat impossible writes to RO registers as UNDEFINED */
|
||||
if (params->is_write) {
|
||||
unhandled_cp_access(vcpu, params);
|
||||
return 1;
|
||||
}
|
||||
|
||||
params->Op0 = 3;
|
||||
|
||||
/*
|
||||
* All registers where CRm > 3 are known to be UNKNOWN/RAZ from AArch32.
|
||||
* Avoid conflicting with future expansion of AArch64 feature registers
|
||||
* and simply treat them as RAZ here.
|
||||
*/
|
||||
if (params->CRm > 3)
|
||||
params->regval = 0;
|
||||
else if (!emulate_sys_reg(vcpu, params))
|
||||
return 1;
|
||||
|
||||
vcpu_set_reg(vcpu, Rt, params->regval);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access
|
||||
* @vcpu: The VCPU pointer
|
||||
* @run: The kvm_run struct
|
||||
*/
|
||||
static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc *global,
|
||||
size_t nr_global)
|
||||
{
|
||||
struct sys_reg_params params;
|
||||
u64 esr = kvm_vcpu_get_esr(vcpu);
|
||||
int Rt = kvm_vcpu_sys_get_rt(vcpu);
|
||||
|
||||
params.CRm = (esr >> 1) & 0xf;
|
||||
params.regval = vcpu_get_reg(vcpu, Rt);
|
||||
params.is_write = ((esr & 1) == 0);
|
||||
params.CRn = (esr >> 10) & 0xf;
|
||||
params.Op0 = 0;
|
||||
params.Op1 = (esr >> 14) & 0x7;
|
||||
params.Op2 = (esr >> 17) & 0x7;
|
||||
params->regval = vcpu_get_reg(vcpu, Rt);
|
||||
|
||||
if (!emulate_cp(vcpu, ¶ms, global, nr_global)) {
|
||||
if (!params.is_write)
|
||||
vcpu_set_reg(vcpu, Rt, params.regval);
|
||||
if (emulate_cp(vcpu, params, global, nr_global)) {
|
||||
if (!params->is_write)
|
||||
vcpu_set_reg(vcpu, Rt, params->regval);
|
||||
return 1;
|
||||
}
|
||||
|
||||
unhandled_cp_access(vcpu, ¶ms);
|
||||
unhandled_cp_access(vcpu, params);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -2388,7 +2501,20 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
|
||||
|
||||
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
|
||||
struct sys_reg_params params;
|
||||
|
||||
params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu));
|
||||
|
||||
/*
|
||||
* Certain AArch32 ID registers are handled by rerouting to the AArch64
|
||||
* system register table. Registers in the ID range where CRm=0 are
|
||||
* excluded from this scheme as they do not trivially map into AArch64
|
||||
* system register encodings.
|
||||
*/
|
||||
if (params.Op1 == 0 && params.CRn == 0 && params.CRm)
|
||||
return kvm_emulate_cp15_id_reg(vcpu, ¶ms);
|
||||
|
||||
return kvm_handle_cp_32(vcpu, ¶ms, cp15_regs, ARRAY_SIZE(cp15_regs));
|
||||
}
|
||||
|
||||
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
|
||||
@ -2398,7 +2524,11 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
|
||||
|
||||
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs));
|
||||
struct sys_reg_params params;
|
||||
|
||||
params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu));
|
||||
|
||||
return kvm_handle_cp_32(vcpu, ¶ms, cp14_regs, ARRAY_SIZE(cp14_regs));
|
||||
}
|
||||
|
||||
static bool is_imp_def_sys_reg(struct sys_reg_params *params)
|
||||
@ -2407,7 +2537,14 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params)
|
||||
return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011;
|
||||
}
|
||||
|
||||
static int emulate_sys_reg(struct kvm_vcpu *vcpu,
|
||||
/**
|
||||
* emulate_sys_reg - Emulate a guest access to an AArch64 system register
|
||||
* @vcpu: The VCPU pointer
|
||||
* @params: Decoded system register parameters
|
||||
*
|
||||
* Return: true if the system register access was successful, false otherwise.
|
||||
*/
|
||||
static bool emulate_sys_reg(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params)
|
||||
{
|
||||
const struct sys_reg_desc *r;
|
||||
@ -2416,7 +2553,10 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
|
||||
|
||||
if (likely(r)) {
|
||||
perform_access(vcpu, params, r);
|
||||
} else if (is_imp_def_sys_reg(params)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (is_imp_def_sys_reg(params)) {
|
||||
kvm_inject_undefined(vcpu);
|
||||
} else {
|
||||
print_sys_reg_msg(params,
|
||||
@ -2424,7 +2564,7 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
|
||||
*vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
|
||||
kvm_inject_undefined(vcpu);
|
||||
}
|
||||
return 1;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2452,18 +2592,18 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
|
||||
struct sys_reg_params params;
|
||||
unsigned long esr = kvm_vcpu_get_esr(vcpu);
|
||||
int Rt = kvm_vcpu_sys_get_rt(vcpu);
|
||||
int ret;
|
||||
|
||||
trace_kvm_handle_sys_reg(esr);
|
||||
|
||||
params = esr_sys64_to_params(esr);
|
||||
params.regval = vcpu_get_reg(vcpu, Rt);
|
||||
|
||||
ret = emulate_sys_reg(vcpu, ¶ms);
|
||||
if (!emulate_sys_reg(vcpu, ¶ms))
|
||||
return 1;
|
||||
|
||||
if (!params.is_write)
|
||||
vcpu_set_reg(vcpu, Rt, params.regval);
|
||||
return ret;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
@ -2866,18 +3006,22 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
|
||||
return write_demux_regids(uindices);
|
||||
}
|
||||
|
||||
void kvm_sys_reg_table_init(void)
|
||||
int kvm_sys_reg_table_init(void)
|
||||
{
|
||||
bool valid = true;
|
||||
unsigned int i;
|
||||
struct sys_reg_desc clidr;
|
||||
|
||||
/* Make sure tables are unique and in order. */
|
||||
BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false));
|
||||
BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true));
|
||||
BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true));
|
||||
BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true));
|
||||
BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true));
|
||||
BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false));
|
||||
valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
|
||||
valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true);
|
||||
valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true);
|
||||
valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true);
|
||||
valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true);
|
||||
valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false);
|
||||
|
||||
if (!valid)
|
||||
return -EINVAL;
|
||||
|
||||
/* We abuse the reset function to overwrite the table itself. */
|
||||
for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
|
||||
@ -2900,4 +3044,6 @@ void kvm_sys_reg_table_init(void)
|
||||
break;
|
||||
/* Clear all higher bits. */
|
||||
cache_levels &= (1 << (i*3))-1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -35,12 +35,19 @@ struct sys_reg_params {
|
||||
.Op2 = ((esr) >> 17) & 0x7, \
|
||||
.is_write = !((esr) & 1) })
|
||||
|
||||
#define esr_cp1x_32_to_params(esr) \
|
||||
((struct sys_reg_params){ .Op1 = ((esr) >> 14) & 0x7, \
|
||||
.CRn = ((esr) >> 10) & 0xf, \
|
||||
.CRm = ((esr) >> 1) & 0xf, \
|
||||
.Op2 = ((esr) >> 17) & 0x7, \
|
||||
.is_write = !((esr) & 1) })
|
||||
|
||||
struct sys_reg_desc {
|
||||
/* Sysreg string for debug */
|
||||
const char *name;
|
||||
|
||||
enum {
|
||||
AA32_ZEROHIGH,
|
||||
AA32_DIRECT,
|
||||
AA32_LO,
|
||||
AA32_HI,
|
||||
} aarch32_map;
|
||||
|
@ -98,11 +98,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
|
||||
ret = 0;
|
||||
|
||||
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
|
||||
kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
|
||||
kvm->max_vcpus = VGIC_V2_MAX_CPUS;
|
||||
else
|
||||
kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS;
|
||||
kvm->max_vcpus = VGIC_V3_MAX_CPUS;
|
||||
|
||||
if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) {
|
||||
if (atomic_read(&kvm->online_vcpus) > kvm->max_vcpus) {
|
||||
ret = -E2BIG;
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -319,7 +319,12 @@ int vgic_init(struct kvm *kvm)
|
||||
|
||||
vgic_debug_init(kvm);
|
||||
|
||||
dist->implementation_rev = 2;
|
||||
/*
|
||||
* If userspace didn't set the GIC implementation revision,
|
||||
* default to the latest and greatest. You know want it.
|
||||
*/
|
||||
if (!dist->implementation_rev)
|
||||
dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST;
|
||||
dist->initialized = true;
|
||||
|
||||
out:
|
||||
|
@ -683,7 +683,7 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
|
||||
if (!vcpu)
|
||||
return E_ITS_INT_UNMAPPED_INTERRUPT;
|
||||
|
||||
if (!vcpu->arch.vgic_cpu.lpis_enabled)
|
||||
if (!vgic_lpis_enabled(vcpu))
|
||||
return -EBUSY;
|
||||
|
||||
vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq);
|
||||
@ -894,6 +894,18 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
|
||||
return update_affinity(ite->irq, vcpu);
|
||||
}
|
||||
|
||||
static bool __is_visible_gfn_locked(struct vgic_its *its, gpa_t gpa)
|
||||
{
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
int idx;
|
||||
bool ret;
|
||||
|
||||
idx = srcu_read_lock(&its->dev->kvm->srcu);
|
||||
ret = kvm_is_visible_gfn(its->dev->kvm, gfn);
|
||||
srcu_read_unlock(&its->dev->kvm->srcu, idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether an ID can be stored into the corresponding guest table.
|
||||
* For a direct table this is pretty easy, but gets a bit nasty for
|
||||
@ -908,9 +920,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
|
||||
u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
|
||||
phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser);
|
||||
int esz = GITS_BASER_ENTRY_SIZE(baser);
|
||||
int index, idx;
|
||||
gfn_t gfn;
|
||||
bool ret;
|
||||
int index;
|
||||
|
||||
switch (type) {
|
||||
case GITS_BASER_TYPE_DEVICE:
|
||||
@ -933,12 +943,11 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
|
||||
return false;
|
||||
|
||||
addr = base + id * esz;
|
||||
gfn = addr >> PAGE_SHIFT;
|
||||
|
||||
if (eaddr)
|
||||
*eaddr = addr;
|
||||
|
||||
goto out;
|
||||
return __is_visible_gfn_locked(its, addr);
|
||||
}
|
||||
|
||||
/* calculate and check the index into the 1st level */
|
||||
@ -964,27 +973,42 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
|
||||
/* Find the address of the actual entry */
|
||||
index = id % (SZ_64K / esz);
|
||||
indirect_ptr += index * esz;
|
||||
gfn = indirect_ptr >> PAGE_SHIFT;
|
||||
|
||||
if (eaddr)
|
||||
*eaddr = indirect_ptr;
|
||||
|
||||
out:
|
||||
idx = srcu_read_lock(&its->dev->kvm->srcu);
|
||||
ret = kvm_is_visible_gfn(its->dev->kvm, gfn);
|
||||
srcu_read_unlock(&its->dev->kvm->srcu, idx);
|
||||
return ret;
|
||||
return __is_visible_gfn_locked(its, indirect_ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether an event ID can be stored in the corresponding Interrupt
|
||||
* Translation Table, which starts at device->itt_addr.
|
||||
*/
|
||||
static bool vgic_its_check_event_id(struct vgic_its *its, struct its_device *device,
|
||||
u32 event_id)
|
||||
{
|
||||
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
|
||||
int ite_esz = abi->ite_esz;
|
||||
gpa_t gpa;
|
||||
|
||||
/* max table size is: BIT_ULL(device->num_eventid_bits) * ite_esz */
|
||||
if (event_id >= BIT_ULL(device->num_eventid_bits))
|
||||
return false;
|
||||
|
||||
gpa = device->itt_addr + event_id * ite_esz;
|
||||
return __is_visible_gfn_locked(its, gpa);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a new collection into the ITS collection table.
|
||||
* Returns 0 on success, and a negative error value for generic errors.
|
||||
*/
|
||||
static int vgic_its_alloc_collection(struct vgic_its *its,
|
||||
struct its_collection **colp,
|
||||
u32 coll_id)
|
||||
{
|
||||
struct its_collection *collection;
|
||||
|
||||
if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
|
||||
return E_ITS_MAPC_COLLECTION_OOR;
|
||||
|
||||
collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT);
|
||||
if (!collection)
|
||||
return -ENOMEM;
|
||||
@ -1061,7 +1085,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
|
||||
if (!device)
|
||||
return E_ITS_MAPTI_UNMAPPED_DEVICE;
|
||||
|
||||
if (event_id >= BIT_ULL(device->num_eventid_bits))
|
||||
if (!vgic_its_check_event_id(its, device, event_id))
|
||||
return E_ITS_MAPTI_ID_OOR;
|
||||
|
||||
if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI)
|
||||
@ -1078,7 +1102,12 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
|
||||
|
||||
collection = find_collection(its, coll_id);
|
||||
if (!collection) {
|
||||
int ret = vgic_its_alloc_collection(its, &collection, coll_id);
|
||||
int ret;
|
||||
|
||||
if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
|
||||
return E_ITS_MAPC_COLLECTION_OOR;
|
||||
|
||||
ret = vgic_its_alloc_collection(its, &collection, coll_id);
|
||||
if (ret)
|
||||
return ret;
|
||||
new_coll = collection;
|
||||
@ -1233,6 +1262,10 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
|
||||
if (!collection) {
|
||||
int ret;
|
||||
|
||||
if (!vgic_its_check_id(its, its->baser_coll_table,
|
||||
coll_id, NULL))
|
||||
return E_ITS_MAPC_COLLECTION_OOR;
|
||||
|
||||
ret = vgic_its_alloc_collection(its, &collection,
|
||||
coll_id);
|
||||
if (ret)
|
||||
@ -1272,6 +1305,11 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq)
|
||||
{
|
||||
return update_lpi_config(kvm, irq, NULL, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* The INV command syncs the configuration bits from the memory table.
|
||||
* Must be called with the its_lock mutex held.
|
||||
@ -1288,7 +1326,41 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
|
||||
if (!ite)
|
||||
return E_ITS_INV_UNMAPPED_INTERRUPT;
|
||||
|
||||
return update_lpi_config(kvm, ite->irq, NULL, true);
|
||||
return vgic_its_inv_lpi(kvm, ite->irq);
|
||||
}
|
||||
|
||||
/**
|
||||
* vgic_its_invall - invalidate all LPIs targetting a given vcpu
|
||||
* @vcpu: the vcpu for which the RD is targetted by an invalidation
|
||||
*
|
||||
* Contrary to the INVALL command, this targets a RD instead of a
|
||||
* collection, and we don't need to hold the its_lock, since no ITS is
|
||||
* involved here.
|
||||
*/
|
||||
int vgic_its_invall(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
int irq_count, i = 0;
|
||||
u32 *intids;
|
||||
|
||||
irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
|
||||
if (irq_count < 0)
|
||||
return irq_count;
|
||||
|
||||
for (i = 0; i < irq_count; i++) {
|
||||
struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]);
|
||||
if (!irq)
|
||||
continue;
|
||||
update_lpi_config(kvm, irq, vcpu, false);
|
||||
vgic_put_irq(kvm, irq);
|
||||
}
|
||||
|
||||
kfree(intids);
|
||||
|
||||
if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
|
||||
its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1305,32 +1377,13 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
|
||||
u32 coll_id = its_cmd_get_collection(its_cmd);
|
||||
struct its_collection *collection;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct vgic_irq *irq;
|
||||
u32 *intids;
|
||||
int irq_count, i;
|
||||
|
||||
collection = find_collection(its, coll_id);
|
||||
if (!its_is_collection_mapped(collection))
|
||||
return E_ITS_INVALL_UNMAPPED_COLLECTION;
|
||||
|
||||
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
|
||||
|
||||
irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
|
||||
if (irq_count < 0)
|
||||
return irq_count;
|
||||
|
||||
for (i = 0; i < irq_count; i++) {
|
||||
irq = vgic_get_irq(kvm, NULL, intids[i]);
|
||||
if (!irq)
|
||||
continue;
|
||||
update_lpi_config(kvm, irq, vcpu, false);
|
||||
vgic_put_irq(kvm, irq);
|
||||
}
|
||||
|
||||
kfree(intids);
|
||||
|
||||
if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
|
||||
its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
|
||||
vgic_its_invall(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2175,6 +2228,9 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
|
||||
if (!collection)
|
||||
return -EINVAL;
|
||||
|
||||
if (!vgic_its_check_event_id(its, dev, event_id))
|
||||
return -EINVAL;
|
||||
|
||||
ite = vgic_its_alloc_ite(dev, collection, event_id);
|
||||
if (IS_ERR(ite))
|
||||
return PTR_ERR(ite);
|
||||
@ -2183,8 +2239,10 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
|
||||
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
|
||||
|
||||
irq = vgic_add_lpi(kvm, lpi_id, vcpu);
|
||||
if (IS_ERR(irq))
|
||||
if (IS_ERR(irq)) {
|
||||
its_free_ite(kvm, ite);
|
||||
return PTR_ERR(irq);
|
||||
}
|
||||
ite->irq = irq;
|
||||
|
||||
return offset;
|
||||
@ -2296,6 +2354,7 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
|
||||
void *ptr, void *opaque)
|
||||
{
|
||||
struct its_device *dev;
|
||||
u64 baser = its->baser_device_table;
|
||||
gpa_t itt_addr;
|
||||
u8 num_eventid_bits;
|
||||
u64 entry = *(u64 *)ptr;
|
||||
@ -2316,6 +2375,9 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
|
||||
/* dte entry is valid */
|
||||
offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT;
|
||||
|
||||
if (!vgic_its_check_id(its, baser, id, NULL))
|
||||
return -EINVAL;
|
||||
|
||||
dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits);
|
||||
if (IS_ERR(dev))
|
||||
return PTR_ERR(dev);
|
||||
@ -2445,6 +2507,9 @@ static int vgic_its_restore_device_tables(struct vgic_its *its)
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
|
||||
if (ret < 0)
|
||||
vgic_its_free_device_list(its->dev->kvm, its);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2461,6 +2526,11 @@ static int vgic_its_save_cte(struct vgic_its *its,
|
||||
return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz);
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore a collection entry into the ITS collection table.
|
||||
* Return +1 on success, 0 if the entry was invalid (which should be
|
||||
* interpreted as end-of-table), and a negative error value for generic errors.
|
||||
*/
|
||||
static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
|
||||
{
|
||||
struct its_collection *collection;
|
||||
@ -2487,6 +2557,10 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
|
||||
collection = find_collection(its, coll_id);
|
||||
if (collection)
|
||||
return -EEXIST;
|
||||
|
||||
if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
|
||||
return -EINVAL;
|
||||
|
||||
ret = vgic_its_alloc_collection(its, &collection, coll_id);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -2566,6 +2640,9 @@ static int vgic_its_restore_collection_table(struct vgic_its *its)
|
||||
if (ret > 0)
|
||||
return 0;
|
||||
|
||||
if (ret < 0)
|
||||
vgic_its_free_collection_list(its->dev->kvm, its);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2597,7 +2674,10 @@ static int vgic_its_restore_tables_v0(struct vgic_its *its)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return vgic_its_restore_device_tables(its);
|
||||
ret = vgic_its_restore_device_tables(its);
|
||||
if (ret)
|
||||
vgic_its_free_collection_list(its->dev->kvm, its);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vgic_its_commit_v0(struct vgic_its *its)
|
||||
|
@ -73,9 +73,13 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
u32 reg;
|
||||
|
||||
switch (addr & 0x0c) {
|
||||
case GIC_DIST_IIDR:
|
||||
if (val != vgic_mmio_read_v2_misc(vcpu, addr, len))
|
||||
reg = vgic_mmio_read_v2_misc(vcpu, addr, len);
|
||||
if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
@ -87,8 +91,16 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
|
||||
* migration from old kernels to new kernels with legacy
|
||||
* userspace.
|
||||
*/
|
||||
vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
|
||||
return 0;
|
||||
reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg);
|
||||
switch (reg) {
|
||||
case KVM_VGIC_IMP_REV_2:
|
||||
case KVM_VGIC_IMP_REV_3:
|
||||
vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
|
||||
dist->implementation_rev = reg;
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
vgic_mmio_write_v2_misc(vcpu, addr, len, val);
|
||||
|
@ -155,13 +155,27 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
|
||||
unsigned long val)
|
||||
{
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
u32 reg;
|
||||
|
||||
switch (addr & 0x0c) {
|
||||
case GICD_TYPER2:
|
||||
case GICD_IIDR:
|
||||
if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
case GICD_IIDR:
|
||||
reg = vgic_mmio_read_v3_misc(vcpu, addr, len);
|
||||
if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg);
|
||||
switch (reg) {
|
||||
case KVM_VGIC_IMP_REV_2:
|
||||
case KVM_VGIC_IMP_REV_3:
|
||||
dist->implementation_rev = reg;
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
case GICD_CTLR:
|
||||
/* Not a GICv4.1? No HW SGIs */
|
||||
if (!kvm_vgic_global_state.has_gicv4_1)
|
||||
@ -221,34 +235,58 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
bool vgic_lpis_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
|
||||
return atomic_read(&vgic_cpu->ctlr) == GICR_CTLR_ENABLE_LPIS;
|
||||
}
|
||||
|
||||
static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
unsigned long val;
|
||||
|
||||
return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0;
|
||||
val = atomic_read(&vgic_cpu->ctlr);
|
||||
if (vgic_get_implementation_rev(vcpu) >= KVM_VGIC_IMP_REV_3)
|
||||
val |= GICR_CTLR_IR | GICR_CTLR_CES;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
bool was_enabled = vgic_cpu->lpis_enabled;
|
||||
u32 ctlr;
|
||||
|
||||
if (!vgic_has_its(vcpu->kvm))
|
||||
return;
|
||||
|
||||
vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS;
|
||||
if (!(val & GICR_CTLR_ENABLE_LPIS)) {
|
||||
/*
|
||||
* Don't disable if RWP is set, as there already an
|
||||
* ongoing disable. Funky guest...
|
||||
*/
|
||||
ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr,
|
||||
GICR_CTLR_ENABLE_LPIS,
|
||||
GICR_CTLR_RWP);
|
||||
if (ctlr != GICR_CTLR_ENABLE_LPIS)
|
||||
return;
|
||||
|
||||
if (was_enabled && !vgic_cpu->lpis_enabled) {
|
||||
vgic_flush_pending_lpis(vcpu);
|
||||
vgic_its_invalidate_cache(vcpu->kvm);
|
||||
}
|
||||
atomic_set_release(&vgic_cpu->ctlr, 0);
|
||||
} else {
|
||||
ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, 0,
|
||||
GICR_CTLR_ENABLE_LPIS);
|
||||
if (ctlr != 0)
|
||||
return;
|
||||
|
||||
if (!was_enabled && vgic_cpu->lpis_enabled)
|
||||
vgic_enable_lpis(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu)
|
||||
@ -478,11 +516,10 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
|
||||
unsigned long val)
|
||||
{
|
||||
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
u64 old_propbaser, propbaser;
|
||||
|
||||
/* Storing a value with LPIs already enabled is undefined */
|
||||
if (vgic_cpu->lpis_enabled)
|
||||
if (vgic_lpis_enabled(vcpu))
|
||||
return;
|
||||
|
||||
do {
|
||||
@ -513,7 +550,7 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
|
||||
u64 old_pendbaser, pendbaser;
|
||||
|
||||
/* Storing a value with LPIs already enabled is undefined */
|
||||
if (vgic_cpu->lpis_enabled)
|
||||
if (vgic_lpis_enabled(vcpu))
|
||||
return;
|
||||
|
||||
do {
|
||||
@ -525,6 +562,63 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
|
||||
pendbaser) != old_pendbaser);
|
||||
}
|
||||
|
||||
static unsigned long vgic_mmio_read_sync(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
return !!atomic_read(&vcpu->arch.vgic_cpu.syncr_busy);
|
||||
}
|
||||
|
||||
static void vgic_set_rdist_busy(struct kvm_vcpu *vcpu, bool busy)
|
||||
{
|
||||
if (busy) {
|
||||
atomic_inc(&vcpu->arch.vgic_cpu.syncr_busy);
|
||||
smp_mb__after_atomic();
|
||||
} else {
|
||||
smp_mb__before_atomic();
|
||||
atomic_dec(&vcpu->arch.vgic_cpu.syncr_busy);
|
||||
}
|
||||
}
|
||||
|
||||
static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
struct vgic_irq *irq;
|
||||
|
||||
/*
|
||||
* If the guest wrote only to the upper 32bit part of the
|
||||
* register, drop the write on the floor, as it is only for
|
||||
* vPEs (which we don't support for obvious reasons).
|
||||
*
|
||||
* Also discard the access if LPIs are not enabled.
|
||||
*/
|
||||
if ((addr & 4) || !vgic_lpis_enabled(vcpu))
|
||||
return;
|
||||
|
||||
vgic_set_rdist_busy(vcpu, true);
|
||||
|
||||
irq = vgic_get_irq(vcpu->kvm, NULL, lower_32_bits(val));
|
||||
if (irq) {
|
||||
vgic_its_inv_lpi(vcpu->kvm, irq);
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
vgic_set_rdist_busy(vcpu, false);
|
||||
}
|
||||
|
||||
static void vgic_mmio_write_invall(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
/* See vgic_mmio_write_invlpi() for the early return rationale */
|
||||
if ((addr & 4) || !vgic_lpis_enabled(vcpu))
|
||||
return;
|
||||
|
||||
vgic_set_rdist_busy(vcpu, true);
|
||||
vgic_its_invall(vcpu);
|
||||
vgic_set_rdist_busy(vcpu, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* The GICv3 per-IRQ registers are split to control PPIs and SGIs in the
|
||||
* redistributors, while SPIs are covered by registers in the distributor
|
||||
@ -630,6 +724,15 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER,
|
||||
vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8,
|
||||
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_INVLPIR,
|
||||
vgic_mmio_read_raz, vgic_mmio_write_invlpi, 8,
|
||||
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_INVALLR,
|
||||
vgic_mmio_read_raz, vgic_mmio_write_invall, 8,
|
||||
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_SYNCR,
|
||||
vgic_mmio_read_sync, vgic_mmio_write_wi, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
|
||||
vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
|
||||
VGIC_ACCESS_32bit),
|
||||
|
@ -612,6 +612,10 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
|
||||
static const struct midr_range broken_seis[] = {
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
|
||||
{},
|
||||
};
|
||||
|
||||
|
@ -98,6 +98,11 @@
|
||||
#define DEBUG_SPINLOCK_BUG_ON(p)
|
||||
#endif
|
||||
|
||||
static inline u32 vgic_get_implementation_rev(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->kvm->arch.vgic.implementation_rev;
|
||||
}
|
||||
|
||||
/* Requires the irq_lock to be held by the caller. */
|
||||
static inline bool irq_is_pending(struct vgic_irq *irq)
|
||||
{
|
||||
@ -308,6 +313,7 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
|
||||
(base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
|
||||
}
|
||||
|
||||
bool vgic_lpis_enabled(struct kvm_vcpu *vcpu);
|
||||
int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
|
||||
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
|
||||
u32 devid, u32 eventid, struct vgic_irq **irq);
|
||||
@ -317,6 +323,10 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm);
|
||||
void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
|
||||
void vgic_its_invalidate_cache(struct kvm *kvm);
|
||||
|
||||
/* GICv4.1 MMIO interface */
|
||||
int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);
|
||||
int vgic_its_invall(struct kvm_vcpu *vcpu);
|
||||
|
||||
bool vgic_supports_direct_msis(struct kvm *kvm);
|
||||
int vgic_v4_init(struct kvm *kvm);
|
||||
void vgic_v4_teardown(struct kvm *kvm);
|
||||
|
@ -27,7 +27,17 @@ void __delay(unsigned long cycles)
|
||||
{
|
||||
cycles_t start = get_cycles();
|
||||
|
||||
if (arch_timer_evtstrm_available()) {
|
||||
if (cpus_have_const_cap(ARM64_HAS_WFXT)) {
|
||||
u64 end = start + cycles;
|
||||
|
||||
/*
|
||||
* Start with WFIT. If an interrupt makes us resume
|
||||
* early, use a WFET loop to complete the delay.
|
||||
*/
|
||||
wfit(end);
|
||||
while ((get_cycles() - start) < cycles)
|
||||
wfet(end);
|
||||
} else if (arch_timer_evtstrm_available()) {
|
||||
const cycles_t timer_evt_period =
|
||||
USECS_TO_CYCLES(ARCH_TIMER_EVT_STREAM_PERIOD_US);
|
||||
|
||||
|
@ -75,6 +75,20 @@ EXPORT_SYMBOL_GPL(__sync_icache_dcache);
|
||||
*/
|
||||
void flush_dcache_page(struct page *page)
|
||||
{
|
||||
/*
|
||||
* Only the head page's flags of HugeTLB can be cleared since the tail
|
||||
* vmemmap pages associated with each HugeTLB page are mapped with
|
||||
* read-only when CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP is enabled (more
|
||||
* details can refer to vmemmap_remap_pte()). Although
|
||||
* __sync_icache_dcache() only set PG_dcache_clean flag on the head
|
||||
* page struct, there is more than one page struct with PG_dcache_clean
|
||||
* associated with the HugeTLB page since the head vmemmap page frame
|
||||
* is reused (more details can refer to the comments above
|
||||
* page_fixed_fake_head()).
|
||||
*/
|
||||
if (hugetlb_optimize_vmemmap_enabled() && PageHuge(page))
|
||||
page = compound_head(page);
|
||||
|
||||
if (test_bit(PG_dcache_clean, &page->flags))
|
||||
clear_bit(PG_dcache_clean, &page->flags);
|
||||
}
|
||||
|
@ -502,19 +502,20 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
||||
set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
|
||||
}
|
||||
|
||||
void huge_ptep_clear_flush(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep)
|
||||
pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
size_t pgsize;
|
||||
int ncontig;
|
||||
pte_t orig_pte;
|
||||
|
||||
if (!pte_cont(READ_ONCE(*ptep))) {
|
||||
ptep_clear_flush(vma, addr, ptep);
|
||||
return;
|
||||
}
|
||||
if (!pte_cont(READ_ONCE(*ptep)))
|
||||
return ptep_clear_flush(vma, addr, ptep);
|
||||
|
||||
ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
|
||||
clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
|
||||
orig_pte = get_clear_contig(vma->vm_mm, addr, ptep, pgsize, ncontig);
|
||||
flush_tlb_range(vma, addr, addr + pgsize * ncontig);
|
||||
return orig_pte;
|
||||
}
|
||||
|
||||
static int __init hugetlbpage_init(void)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user