Merge 27bc50fc90
("Merge tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm") into android-mainline
Steps on the way to 6.1-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I3a6855cb0e3a54f605ccb92af058088f280e0349
This commit is contained in:
commit
de07798cf0
@ -299,7 +299,7 @@ Per-node-per-memcgroup LRU (cgroup's private LRU) is guarded by
|
|||||||
lruvec->lru_lock; PG_lru bit of page->flags is cleared before
|
lruvec->lru_lock; PG_lru bit of page->flags is cleared before
|
||||||
isolating a page from its LRU under lruvec->lru_lock.
|
isolating a page from its LRU under lruvec->lru_lock.
|
||||||
|
|
||||||
2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
|
2.7 Kernel Memory Extension
|
||||||
-----------------------------------------------
|
-----------------------------------------------
|
||||||
|
|
||||||
With the Kernel memory extension, the Memory Controller is able to limit
|
With the Kernel memory extension, the Memory Controller is able to limit
|
||||||
@ -386,8 +386,6 @@ U != 0, K >= U:
|
|||||||
|
|
||||||
a. Enable CONFIG_CGROUPS
|
a. Enable CONFIG_CGROUPS
|
||||||
b. Enable CONFIG_MEMCG
|
b. Enable CONFIG_MEMCG
|
||||||
c. Enable CONFIG_MEMCG_SWAP (to use swap extension)
|
|
||||||
d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
|
|
||||||
|
|
||||||
3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
|
3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
|
@ -6057,12 +6057,6 @@
|
|||||||
This parameter controls use of the Protected
|
This parameter controls use of the Protected
|
||||||
Execution Facility on pSeries.
|
Execution Facility on pSeries.
|
||||||
|
|
||||||
swapaccount= [KNL]
|
|
||||||
Format: [0|1]
|
|
||||||
Enable accounting of swap in memory resource
|
|
||||||
controller if no parameter or 1 is given or disable
|
|
||||||
it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst)
|
|
||||||
|
|
||||||
swiotlb= [ARM,IA-64,PPC,MIPS,X86]
|
swiotlb= [ARM,IA-64,PPC,MIPS,X86]
|
||||||
Format: { <int> [,<int>] | force | noforce }
|
Format: { <int> [,<int>] | force | noforce }
|
||||||
<int> -- Number of I/O TLB slabs
|
<int> -- Number of I/O TLB slabs
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
.. SPDX-License-Identifier: GPL-2.0
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
========================
|
==========================
|
||||||
Monitoring Data Accesses
|
DAMON: Data Access MONitor
|
||||||
========================
|
==========================
|
||||||
|
|
||||||
:doc:`DAMON </mm/damon/index>` allows light-weight data access monitoring.
|
:doc:`DAMON </mm/damon/index>` allows light-weight data access monitoring.
|
||||||
Using DAMON, users can analyze the memory access patterns of their systems and
|
Using DAMON, users can analyze the memory access patterns of their systems and
|
||||||
|
@ -29,16 +29,9 @@ called DAMON Operator (DAMO). It is available at
|
|||||||
https://github.com/awslabs/damo. The examples below assume that ``damo`` is on
|
https://github.com/awslabs/damo. The examples below assume that ``damo`` is on
|
||||||
your ``$PATH``. It's not mandatory, though.
|
your ``$PATH``. It's not mandatory, though.
|
||||||
|
|
||||||
Because DAMO is using the debugfs interface (refer to :doc:`usage` for the
|
Because DAMO is using the sysfs interface (refer to :doc:`usage` for the
|
||||||
detail) of DAMON, you should ensure debugfs is mounted. Mount it manually as
|
detail) of DAMON, you should ensure :doc:`sysfs </filesystems/sysfs>` is
|
||||||
below::
|
mounted.
|
||||||
|
|
||||||
# mount -t debugfs none /sys/kernel/debug/
|
|
||||||
|
|
||||||
or append the following line to your ``/etc/fstab`` file so that your system
|
|
||||||
can automatically mount debugfs upon booting::
|
|
||||||
|
|
||||||
debugfs /sys/kernel/debug debugfs defaults 0 0
|
|
||||||
|
|
||||||
|
|
||||||
Recording Data Access Patterns
|
Recording Data Access Patterns
|
||||||
|
@ -393,6 +393,11 @@ the files as above. Above is only for an example.
|
|||||||
debugfs Interface
|
debugfs Interface
|
||||||
=================
|
=================
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
DAMON debugfs interface will be removed after next LTS kernel is released, so
|
||||||
|
users should move to the :ref:`sysfs interface <sysfs_interface>`.
|
||||||
|
|
||||||
DAMON exports eight files, ``attrs``, ``target_ids``, ``init_regions``,
|
DAMON exports eight files, ``attrs``, ``target_ids``, ``init_regions``,
|
||||||
``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` and
|
``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` and
|
||||||
``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
|
``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
|
||||||
|
@ -184,6 +184,42 @@ The maximum possible ``pages_sharing/pages_shared`` ratio is limited by the
|
|||||||
``max_page_sharing`` tunable. To increase the ratio ``max_page_sharing`` must
|
``max_page_sharing`` tunable. To increase the ratio ``max_page_sharing`` must
|
||||||
be increased accordingly.
|
be increased accordingly.
|
||||||
|
|
||||||
|
Monitoring KSM profit
|
||||||
|
=====================
|
||||||
|
|
||||||
|
KSM can save memory by merging identical pages, but also can consume
|
||||||
|
additional memory, because it needs to generate a number of rmap_items to
|
||||||
|
save each scanned page's brief rmap information. Some of these pages may
|
||||||
|
be merged, but some may not be abled to be merged after being checked
|
||||||
|
several times, which are unprofitable memory consumed.
|
||||||
|
|
||||||
|
1) How to determine whether KSM save memory or consume memory in system-wide
|
||||||
|
range? Here is a simple approximate calculation for reference::
|
||||||
|
|
||||||
|
general_profit =~ pages_sharing * sizeof(page) - (all_rmap_items) *
|
||||||
|
sizeof(rmap_item);
|
||||||
|
|
||||||
|
where all_rmap_items can be easily obtained by summing ``pages_sharing``,
|
||||||
|
``pages_shared``, ``pages_unshared`` and ``pages_volatile``.
|
||||||
|
|
||||||
|
2) The KSM profit inner a single process can be similarly obtained by the
|
||||||
|
following approximate calculation::
|
||||||
|
|
||||||
|
process_profit =~ ksm_merging_pages * sizeof(page) -
|
||||||
|
ksm_rmap_items * sizeof(rmap_item).
|
||||||
|
|
||||||
|
where ksm_merging_pages is shown under the directory ``/proc/<pid>/``,
|
||||||
|
and ksm_rmap_items is shown in ``/proc/<pid>/ksm_stat``.
|
||||||
|
|
||||||
|
From the perspective of application, a high ratio of ``ksm_rmap_items`` to
|
||||||
|
``ksm_merging_pages`` means a bad madvise-applied policy, so developers or
|
||||||
|
administrators have to rethink how to change madvise policy. Giving an example
|
||||||
|
for reference, a page's size is usually 4K, and the rmap_item's size is
|
||||||
|
separately 32B on 32-bit CPU architecture and 64B on 64-bit CPU architecture.
|
||||||
|
so if the ``ksm_rmap_items/ksm_merging_pages`` ratio exceeds 64 on 64-bit CPU
|
||||||
|
or exceeds 128 on 32-bit CPU, then the app's madvise policy should be dropped,
|
||||||
|
because the ksm profit is approximately zero or negative.
|
||||||
|
|
||||||
Monitoring KSM events
|
Monitoring KSM events
|
||||||
=====================
|
=====================
|
||||||
|
|
||||||
|
@ -191,7 +191,14 @@ allocation failure to throttle the next allocation attempt::
|
|||||||
|
|
||||||
/sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs
|
/sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs
|
||||||
|
|
||||||
The khugepaged progress can be seen in the number of pages collapsed::
|
The khugepaged progress can be seen in the number of pages collapsed (note
|
||||||
|
that this counter may not be an exact count of the number of pages
|
||||||
|
collapsed, since "collapsed" could mean multiple things: (1) A PTE mapping
|
||||||
|
being replaced by a PMD mapping, or (2) All 4K physical pages replaced by
|
||||||
|
one 2M hugepage. Each may happen independently, or together, depending on
|
||||||
|
the type of memory and the failures that occur. As such, this value should
|
||||||
|
be interpreted roughly as a sign of progress, and counters in /proc/vmstat
|
||||||
|
consulted for more accurate accounting)::
|
||||||
|
|
||||||
/sys/kernel/mm/transparent_hugepage/khugepaged/pages_collapsed
|
/sys/kernel/mm/transparent_hugepage/khugepaged/pages_collapsed
|
||||||
|
|
||||||
@ -366,10 +373,9 @@ thp_split_pmd
|
|||||||
page table entry.
|
page table entry.
|
||||||
|
|
||||||
thp_zero_page_alloc
|
thp_zero_page_alloc
|
||||||
is incremented every time a huge zero page is
|
is incremented every time a huge zero page used for thp is
|
||||||
successfully allocated. It includes allocations which where
|
successfully allocated. Note, it doesn't count every map of
|
||||||
dropped due race with other allocation. Note, it doesn't count
|
the huge zero page, only its allocation.
|
||||||
every map of the huge zero page, only its allocation.
|
|
||||||
|
|
||||||
thp_zero_page_alloc_failed
|
thp_zero_page_alloc_failed
|
||||||
is incremented if kernel fails to allocate
|
is incremented if kernel fails to allocate
|
||||||
|
@ -19,9 +19,6 @@ User Space Memory Access
|
|||||||
Memory Allocation Controls
|
Memory Allocation Controls
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
.. kernel-doc:: include/linux/gfp.h
|
|
||||||
:internal:
|
|
||||||
|
|
||||||
.. kernel-doc:: include/linux/gfp_types.h
|
.. kernel-doc:: include/linux/gfp_types.h
|
||||||
:doc: Page mobility and placement hints
|
:doc: Page mobility and placement hints
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ Documentation/dev-tools/testing-overview.rst
|
|||||||
kcov
|
kcov
|
||||||
gcov
|
gcov
|
||||||
kasan
|
kasan
|
||||||
|
kmsan
|
||||||
ubsan
|
ubsan
|
||||||
kmemleak
|
kmemleak
|
||||||
kcsan
|
kcsan
|
||||||
|
@ -111,9 +111,17 @@ parameter can be used to control panic and reporting behaviour:
|
|||||||
report or also panic the kernel (default: ``report``). The panic happens even
|
report or also panic the kernel (default: ``report``). The panic happens even
|
||||||
if ``kasan_multi_shot`` is enabled.
|
if ``kasan_multi_shot`` is enabled.
|
||||||
|
|
||||||
Hardware Tag-Based KASAN mode (see the section about various modes below) is
|
Software and Hardware Tag-Based KASAN modes (see the section about various
|
||||||
intended for use in production as a security mitigation. Therefore, it supports
|
modes below) support altering stack trace collection behavior:
|
||||||
additional boot parameters that allow disabling KASAN or controlling features:
|
|
||||||
|
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
|
||||||
|
traces collection (default: ``on``).
|
||||||
|
- ``kasan.stack_ring_size=<number of entries>`` specifies the number of entries
|
||||||
|
in the stack ring (default: ``32768``).
|
||||||
|
|
||||||
|
Hardware Tag-Based KASAN mode is intended for use in production as a security
|
||||||
|
mitigation. Therefore, it supports additional boot parameters that allow
|
||||||
|
disabling KASAN altogether or controlling its features:
|
||||||
|
|
||||||
- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
|
- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
|
||||||
|
|
||||||
@ -132,9 +140,6 @@ additional boot parameters that allow disabling KASAN or controlling features:
|
|||||||
- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
|
- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
|
||||||
allocations (default: ``on``).
|
allocations (default: ``on``).
|
||||||
|
|
||||||
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
|
|
||||||
traces collection (default: ``on``).
|
|
||||||
|
|
||||||
Error reports
|
Error reports
|
||||||
~~~~~~~~~~~~~
|
~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
427
Documentation/dev-tools/kmsan.rst
Normal file
427
Documentation/dev-tools/kmsan.rst
Normal file
@ -0,0 +1,427 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
.. Copyright (C) 2022, Google LLC.
|
||||||
|
|
||||||
|
===================================
|
||||||
|
The Kernel Memory Sanitizer (KMSAN)
|
||||||
|
===================================
|
||||||
|
|
||||||
|
KMSAN is a dynamic error detector aimed at finding uses of uninitialized
|
||||||
|
values. It is based on compiler instrumentation, and is quite similar to the
|
||||||
|
userspace `MemorySanitizer tool`_.
|
||||||
|
|
||||||
|
An important note is that KMSAN is not intended for production use, because it
|
||||||
|
drastically increases kernel memory footprint and slows the whole system down.
|
||||||
|
|
||||||
|
Usage
|
||||||
|
=====
|
||||||
|
|
||||||
|
Building the kernel
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
In order to build a kernel with KMSAN you will need a fresh Clang (14.0.6+).
|
||||||
|
Please refer to `LLVM documentation`_ for the instructions on how to build Clang.
|
||||||
|
|
||||||
|
Now configure and build the kernel with CONFIG_KMSAN enabled.
|
||||||
|
|
||||||
|
Example report
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Here is an example of a KMSAN report::
|
||||||
|
|
||||||
|
=====================================================
|
||||||
|
BUG: KMSAN: uninit-value in test_uninit_kmsan_check_memory+0x1be/0x380 [kmsan_test]
|
||||||
|
test_uninit_kmsan_check_memory+0x1be/0x380 mm/kmsan/kmsan_test.c:273
|
||||||
|
kunit_run_case_internal lib/kunit/test.c:333
|
||||||
|
kunit_try_run_case+0x206/0x420 lib/kunit/test.c:374
|
||||||
|
kunit_generic_run_threadfn_adapter+0x6d/0xc0 lib/kunit/try-catch.c:28
|
||||||
|
kthread+0x721/0x850 kernel/kthread.c:327
|
||||||
|
ret_from_fork+0x1f/0x30 ??:?
|
||||||
|
|
||||||
|
Uninit was stored to memory at:
|
||||||
|
do_uninit_local_array+0xfa/0x110 mm/kmsan/kmsan_test.c:260
|
||||||
|
test_uninit_kmsan_check_memory+0x1a2/0x380 mm/kmsan/kmsan_test.c:271
|
||||||
|
kunit_run_case_internal lib/kunit/test.c:333
|
||||||
|
kunit_try_run_case+0x206/0x420 lib/kunit/test.c:374
|
||||||
|
kunit_generic_run_threadfn_adapter+0x6d/0xc0 lib/kunit/try-catch.c:28
|
||||||
|
kthread+0x721/0x850 kernel/kthread.c:327
|
||||||
|
ret_from_fork+0x1f/0x30 ??:?
|
||||||
|
|
||||||
|
Local variable uninit created at:
|
||||||
|
do_uninit_local_array+0x4a/0x110 mm/kmsan/kmsan_test.c:256
|
||||||
|
test_uninit_kmsan_check_memory+0x1a2/0x380 mm/kmsan/kmsan_test.c:271
|
||||||
|
|
||||||
|
Bytes 4-7 of 8 are uninitialized
|
||||||
|
Memory access of size 8 starts at ffff888083fe3da0
|
||||||
|
|
||||||
|
CPU: 0 PID: 6731 Comm: kunit_try_catch Tainted: G B E 5.16.0-rc3+ #104
|
||||||
|
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
|
||||||
|
=====================================================
|
||||||
|
|
||||||
|
The report says that the local variable ``uninit`` was created uninitialized in
|
||||||
|
``do_uninit_local_array()``. The third stack trace corresponds to the place
|
||||||
|
where this variable was created.
|
||||||
|
|
||||||
|
The first stack trace shows where the uninit value was used (in
|
||||||
|
``test_uninit_kmsan_check_memory()``). The tool shows the bytes which were left
|
||||||
|
uninitialized in the local variable, as well as the stack where the value was
|
||||||
|
copied to another memory location before use.
|
||||||
|
|
||||||
|
A use of uninitialized value ``v`` is reported by KMSAN in the following cases:
|
||||||
|
- in a condition, e.g. ``if (v) { ... }``;
|
||||||
|
- in an indexing or pointer dereferencing, e.g. ``array[v]`` or ``*v``;
|
||||||
|
- when it is copied to userspace or hardware, e.g. ``copy_to_user(..., &v, ...)``;
|
||||||
|
- when it is passed as an argument to a function, and
|
||||||
|
``CONFIG_KMSAN_CHECK_PARAM_RETVAL`` is enabled (see below).
|
||||||
|
|
||||||
|
The mentioned cases (apart from copying data to userspace or hardware, which is
|
||||||
|
a security issue) are considered undefined behavior from the C11 Standard point
|
||||||
|
of view.
|
||||||
|
|
||||||
|
Disabling the instrumentation
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
A function can be marked with ``__no_kmsan_checks``. Doing so makes KMSAN
|
||||||
|
ignore uninitialized values in that function and mark its output as initialized.
|
||||||
|
As a result, the user will not get KMSAN reports related to that function.
|
||||||
|
|
||||||
|
Another function attribute supported by KMSAN is ``__no_sanitize_memory``.
|
||||||
|
Applying this attribute to a function will result in KMSAN not instrumenting
|
||||||
|
it, which can be helpful if we do not want the compiler to interfere with some
|
||||||
|
low-level code (e.g. that marked with ``noinstr`` which implicitly adds
|
||||||
|
``__no_sanitize_memory``).
|
||||||
|
|
||||||
|
This however comes at a cost: stack allocations from such functions will have
|
||||||
|
incorrect shadow/origin values, likely leading to false positives. Functions
|
||||||
|
called from non-instrumented code may also receive incorrect metadata for their
|
||||||
|
parameters.
|
||||||
|
|
||||||
|
As a rule of thumb, avoid using ``__no_sanitize_memory`` explicitly.
|
||||||
|
|
||||||
|
It is also possible to disable KMSAN for a single file (e.g. main.o)::
|
||||||
|
|
||||||
|
KMSAN_SANITIZE_main.o := n
|
||||||
|
|
||||||
|
or for the whole directory::
|
||||||
|
|
||||||
|
KMSAN_SANITIZE := n
|
||||||
|
|
||||||
|
in the Makefile. Think of this as applying ``__no_sanitize_memory`` to every
|
||||||
|
function in the file or directory. Most users won't need KMSAN_SANITIZE, unless
|
||||||
|
their code gets broken by KMSAN (e.g. runs at early boot time).
|
||||||
|
|
||||||
|
Support
|
||||||
|
=======
|
||||||
|
|
||||||
|
In order for KMSAN to work the kernel must be built with Clang, which so far is
|
||||||
|
the only compiler that has KMSAN support. The kernel instrumentation pass is
|
||||||
|
based on the userspace `MemorySanitizer tool`_.
|
||||||
|
|
||||||
|
The runtime library only supports x86_64 at the moment.
|
||||||
|
|
||||||
|
How KMSAN works
|
||||||
|
===============
|
||||||
|
|
||||||
|
KMSAN shadow memory
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
KMSAN associates a metadata byte (also called shadow byte) with every byte of
|
||||||
|
kernel memory. A bit in the shadow byte is set iff the corresponding bit of the
|
||||||
|
kernel memory byte is uninitialized. Marking the memory uninitialized (i.e.
|
||||||
|
setting its shadow bytes to ``0xff``) is called poisoning, marking it
|
||||||
|
initialized (setting the shadow bytes to ``0x00``) is called unpoisoning.
|
||||||
|
|
||||||
|
When a new variable is allocated on the stack, it is poisoned by default by
|
||||||
|
instrumentation code inserted by the compiler (unless it is a stack variable
|
||||||
|
that is immediately initialized). Any new heap allocation done without
|
||||||
|
``__GFP_ZERO`` is also poisoned.
|
||||||
|
|
||||||
|
Compiler instrumentation also tracks the shadow values as they are used along
|
||||||
|
the code. When needed, instrumentation code invokes the runtime library in
|
||||||
|
``mm/kmsan/`` to persist shadow values.
|
||||||
|
|
||||||
|
The shadow value of a basic or compound type is an array of bytes of the same
|
||||||
|
length. When a constant value is written into memory, that memory is unpoisoned.
|
||||||
|
When a value is read from memory, its shadow memory is also obtained and
|
||||||
|
propagated into all the operations which use that value. For every instruction
|
||||||
|
that takes one or more values the compiler generates code that calculates the
|
||||||
|
shadow of the result depending on those values and their shadows.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
int a = 0xff; // i.e. 0x000000ff
|
||||||
|
int b;
|
||||||
|
int c = a | b;
|
||||||
|
|
||||||
|
In this case the shadow of ``a`` is ``0``, shadow of ``b`` is ``0xffffffff``,
|
||||||
|
shadow of ``c`` is ``0xffffff00``. This means that the upper three bytes of
|
||||||
|
``c`` are uninitialized, while the lower byte is initialized.
|
||||||
|
|
||||||
|
Origin tracking
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Every four bytes of kernel memory also have a so-called origin mapped to them.
|
||||||
|
This origin describes the point in program execution at which the uninitialized
|
||||||
|
value was created. Every origin is associated with either the full allocation
|
||||||
|
stack (for heap-allocated memory), or the function containing the uninitialized
|
||||||
|
variable (for locals).
|
||||||
|
|
||||||
|
When an uninitialized variable is allocated on stack or heap, a new origin
|
||||||
|
value is created, and that variable's origin is filled with that value. When a
|
||||||
|
value is read from memory, its origin is also read and kept together with the
|
||||||
|
shadow. For every instruction that takes one or more values, the origin of the
|
||||||
|
result is one of the origins corresponding to any of the uninitialized inputs.
|
||||||
|
If a poisoned value is written into memory, its origin is written to the
|
||||||
|
corresponding storage as well.
|
||||||
|
|
||||||
|
Example 1::
|
||||||
|
|
||||||
|
int a = 42;
|
||||||
|
int b;
|
||||||
|
int c = a + b;
|
||||||
|
|
||||||
|
In this case the origin of ``b`` is generated upon function entry, and is
|
||||||
|
stored to the origin of ``c`` right before the addition result is written into
|
||||||
|
memory.
|
||||||
|
|
||||||
|
Several variables may share the same origin address, if they are stored in the
|
||||||
|
same four-byte chunk. In this case every write to either variable updates the
|
||||||
|
origin for all of them. We have to sacrifice precision in this case, because
|
||||||
|
storing origins for individual bits (and even bytes) would be too costly.
|
||||||
|
|
||||||
|
Example 2::
|
||||||
|
|
||||||
|
int combine(short a, short b) {
|
||||||
|
union ret_t {
|
||||||
|
int i;
|
||||||
|
short s[2];
|
||||||
|
} ret;
|
||||||
|
ret.s[0] = a;
|
||||||
|
ret.s[1] = b;
|
||||||
|
return ret.i;
|
||||||
|
}
|
||||||
|
|
||||||
|
If ``a`` is initialized and ``b`` is not, the shadow of the result would be
|
||||||
|
0xffff0000, and the origin of the result would be the origin of ``b``.
|
||||||
|
``ret.s[0]`` would have the same origin, but it will never be used, because
|
||||||
|
that variable is initialized.
|
||||||
|
|
||||||
|
If both function arguments are uninitialized, only the origin of the second
|
||||||
|
argument is preserved.
|
||||||
|
|
||||||
|
Origin chaining
|
||||||
|
~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
To ease debugging, KMSAN creates a new origin for every store of an
|
||||||
|
uninitialized value to memory. The new origin references both its creation stack
|
||||||
|
and the previous origin the value had. This may cause increased memory
|
||||||
|
consumption, so we limit the length of origin chains in the runtime.
|
||||||
|
|
||||||
|
Clang instrumentation API
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
Clang instrumentation pass inserts calls to functions defined in
|
||||||
|
``mm/kmsan/nstrumentation.c`` into the kernel code.
|
||||||
|
|
||||||
|
Shadow manipulation
|
||||||
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
For every memory access the compiler emits a call to a function that returns a
|
||||||
|
pair of pointers to the shadow and origin addresses of the given memory::
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
void *shadow, *origin;
|
||||||
|
} shadow_origin_ptr_t
|
||||||
|
|
||||||
|
shadow_origin_ptr_t __msan_metadata_ptr_for_load_{1,2,4,8}(void *addr)
|
||||||
|
shadow_origin_ptr_t __msan_metadata_ptr_for_store_{1,2,4,8}(void *addr)
|
||||||
|
shadow_origin_ptr_t __msan_metadata_ptr_for_load_n(void *addr, uintptr_t size)
|
||||||
|
shadow_origin_ptr_t __msan_metadata_ptr_for_store_n(void *addr, uintptr_t size)
|
||||||
|
|
||||||
|
The function name depends on the memory access size.
|
||||||
|
|
||||||
|
The compiler makes sure that for every loaded value its shadow and origin
|
||||||
|
values are read from memory. When a value is stored to memory, its shadow and
|
||||||
|
origin are also stored using the metadata pointers.
|
||||||
|
|
||||||
|
Handling locals
|
||||||
|
~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
A special function is used to create a new origin value for a local variable and
|
||||||
|
set the origin of that variable to that value::
|
||||||
|
|
||||||
|
void __msan_poison_alloca(void *addr, uintptr_t size, char *descr)
|
||||||
|
|
||||||
|
Access to per-task data
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
At the beginning of every instrumented function KMSAN inserts a call to
|
||||||
|
``__msan_get_context_state()``::
|
||||||
|
|
||||||
|
kmsan_context_state *__msan_get_context_state(void)
|
||||||
|
|
||||||
|
``kmsan_context_state`` is declared in ``include/linux/kmsan.h``::
|
||||||
|
|
||||||
|
struct kmsan_context_state {
|
||||||
|
char param_tls[KMSAN_PARAM_SIZE];
|
||||||
|
char retval_tls[KMSAN_RETVAL_SIZE];
|
||||||
|
char va_arg_tls[KMSAN_PARAM_SIZE];
|
||||||
|
char va_arg_origin_tls[KMSAN_PARAM_SIZE];
|
||||||
|
u64 va_arg_overflow_size_tls;
|
||||||
|
char param_origin_tls[KMSAN_PARAM_SIZE];
|
||||||
|
depot_stack_handle_t retval_origin_tls;
|
||||||
|
};
|
||||||
|
|
||||||
|
This structure is used by KMSAN to pass parameter shadows and origins between
|
||||||
|
instrumented functions (unless the parameters are checked immediately by
|
||||||
|
``CONFIG_KMSAN_CHECK_PARAM_RETVAL``).
|
||||||
|
|
||||||
|
Passing uninitialized values to functions
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Clang's MemorySanitizer instrumentation has an option,
|
||||||
|
``-fsanitize-memory-param-retval``, which makes the compiler check function
|
||||||
|
parameters passed by value, as well as function return values.
|
||||||
|
|
||||||
|
The option is controlled by ``CONFIG_KMSAN_CHECK_PARAM_RETVAL``, which is
|
||||||
|
enabled by default to let KMSAN report uninitialized values earlier.
|
||||||
|
Please refer to the `LKML discussion`_ for more details.
|
||||||
|
|
||||||
|
Because of the way the checks are implemented in LLVM (they are only applied to
|
||||||
|
parameters marked as ``noundef``), not all parameters are guaranteed to be
|
||||||
|
checked, so we cannot give up the metadata storage in ``kmsan_context_state``.
|
||||||
|
|
||||||
|
String functions
|
||||||
|
~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
The compiler replaces calls to ``memcpy()``/``memmove()``/``memset()`` with the
|
||||||
|
following functions. These functions are also called when data structures are
|
||||||
|
initialized or copied, making sure shadow and origin values are copied alongside
|
||||||
|
with the data::
|
||||||
|
|
||||||
|
void *__msan_memcpy(void *dst, void *src, uintptr_t n)
|
||||||
|
void *__msan_memmove(void *dst, void *src, uintptr_t n)
|
||||||
|
void *__msan_memset(void *dst, int c, uintptr_t n)
|
||||||
|
|
||||||
|
Error reporting
|
||||||
|
~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
For each use of a value the compiler emits a shadow check that calls
|
||||||
|
``__msan_warning()`` in the case that value is poisoned::
|
||||||
|
|
||||||
|
void __msan_warning(u32 origin)
|
||||||
|
|
||||||
|
``__msan_warning()`` causes KMSAN runtime to print an error report.
|
||||||
|
|
||||||
|
Inline assembly instrumentation
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
KMSAN instruments every inline assembly output with a call to::
|
||||||
|
|
||||||
|
void __msan_instrument_asm_store(void *addr, uintptr_t size)
|
||||||
|
|
||||||
|
, which unpoisons the memory region.
|
||||||
|
|
||||||
|
This approach may mask certain errors, but it also helps to avoid a lot of
|
||||||
|
false positives in bitwise operations, atomics etc.
|
||||||
|
|
||||||
|
Sometimes the pointers passed into inline assembly do not point to valid memory.
|
||||||
|
In such cases they are ignored at runtime.
|
||||||
|
|
||||||
|
|
||||||
|
Runtime library
|
||||||
|
---------------
|
||||||
|
|
||||||
|
The code is located in ``mm/kmsan/``.
|
||||||
|
|
||||||
|
Per-task KMSAN state
|
||||||
|
~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Every task_struct has an associated KMSAN task state that holds the KMSAN
|
||||||
|
context (see above) and a per-task flag disallowing KMSAN reports::
|
||||||
|
|
||||||
|
struct kmsan_context {
|
||||||
|
...
|
||||||
|
bool allow_reporting;
|
||||||
|
struct kmsan_context_state cstate;
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
struct task_struct {
|
||||||
|
...
|
||||||
|
struct kmsan_context kmsan;
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
KMSAN contexts
|
||||||
|
~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
When running in a kernel task context, KMSAN uses ``current->kmsan.cstate`` to
|
||||||
|
hold the metadata for function parameters and return values.
|
||||||
|
|
||||||
|
But in the case the kernel is running in the interrupt, softirq or NMI context,
|
||||||
|
where ``current`` is unavailable, KMSAN switches to per-cpu interrupt state::
|
||||||
|
|
||||||
|
DEFINE_PER_CPU(struct kmsan_ctx, kmsan_percpu_ctx);
|
||||||
|
|
||||||
|
Metadata allocation
|
||||||
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
There are several places in the kernel for which the metadata is stored.
|
||||||
|
|
||||||
|
1. Each ``struct page`` instance contains two pointers to its shadow and
|
||||||
|
origin pages::
|
||||||
|
|
||||||
|
struct page {
|
||||||
|
...
|
||||||
|
struct page *shadow, *origin;
|
||||||
|
...
|
||||||
|
};
|
||||||
|
|
||||||
|
At boot-time, the kernel allocates shadow and origin pages for every available
|
||||||
|
kernel page. This is done quite late, when the kernel address space is already
|
||||||
|
fragmented, so normal data pages may arbitrarily interleave with the metadata
|
||||||
|
pages.
|
||||||
|
|
||||||
|
This means that in general for two contiguous memory pages their shadow/origin
|
||||||
|
pages may not be contiguous. Consequently, if a memory access crosses the
|
||||||
|
boundary of a memory block, accesses to shadow/origin memory may potentially
|
||||||
|
corrupt other pages or read incorrect values from them.
|
||||||
|
|
||||||
|
In practice, contiguous memory pages returned by the same ``alloc_pages()``
|
||||||
|
call will have contiguous metadata, whereas if these pages belong to two
|
||||||
|
different allocations their metadata pages can be fragmented.
|
||||||
|
|
||||||
|
For the kernel data (``.data``, ``.bss`` etc.) and percpu memory regions
|
||||||
|
there also are no guarantees on metadata contiguity.
|
||||||
|
|
||||||
|
In the case ``__msan_metadata_ptr_for_XXX_YYY()`` hits the border between two
|
||||||
|
pages with non-contiguous metadata, it returns pointers to fake shadow/origin regions::
|
||||||
|
|
||||||
|
char dummy_load_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
|
||||||
|
char dummy_store_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
|
||||||
|
|
||||||
|
``dummy_load_page`` is zero-initialized, so reads from it always yield zeroes.
|
||||||
|
All stores to ``dummy_store_page`` are ignored.
|
||||||
|
|
||||||
|
2. For vmalloc memory and modules, there is a direct mapping between the memory
|
||||||
|
range, its shadow and origin. KMSAN reduces the vmalloc area by 3/4, making only
|
||||||
|
the first quarter available to ``vmalloc()``. The second quarter of the vmalloc
|
||||||
|
area contains shadow memory for the first quarter, the third one holds the
|
||||||
|
origins. A small part of the fourth quarter contains shadow and origins for the
|
||||||
|
kernel modules. Please refer to ``arch/x86/include/asm/pgtable_64_types.h`` for
|
||||||
|
more details.
|
||||||
|
|
||||||
|
When an array of pages is mapped into a contiguous virtual memory space, their
|
||||||
|
shadow and origin pages are similarly mapped into contiguous regions.
|
||||||
|
|
||||||
|
References
|
||||||
|
==========
|
||||||
|
|
||||||
|
E. Stepanov, K. Serebryany. `MemorySanitizer: fast detector of uninitialized
|
||||||
|
memory use in C++
|
||||||
|
<https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43308.pdf>`_.
|
||||||
|
In Proceedings of CGO 2015.
|
||||||
|
|
||||||
|
.. _MemorySanitizer tool: https://clang.llvm.org/docs/MemorySanitizer.html
|
||||||
|
.. _LLVM documentation: https://llvm.org/docs/GettingStarted.html
|
||||||
|
.. _LKML discussion: https://lore.kernel.org/all/20220614144853.3693273-1-glider@google.com/
|
@ -26,7 +26,7 @@ tree.
|
|||||||
|
|
||||||
If a KSM page is shared between less than ``max_page_sharing`` VMAs,
|
If a KSM page is shared between less than ``max_page_sharing`` VMAs,
|
||||||
the node of the stable tree that represents such KSM page points to a
|
the node of the stable tree that represents such KSM page points to a
|
||||||
list of struct rmap_item and the ``page->mapping`` of the
|
list of struct ksm_rmap_item and the ``page->mapping`` of the
|
||||||
KSM page points to the stable tree node.
|
KSM page points to the stable tree node.
|
||||||
|
|
||||||
When the sharing passes this threshold, KSM adds a second dimension to
|
When the sharing passes this threshold, KSM adds a second dimension to
|
||||||
|
15
MAINTAINERS
15
MAINTAINERS
@ -11011,7 +11011,6 @@ F: arch/*/include/asm/*kasan.h
|
|||||||
F: arch/*/mm/kasan_init*
|
F: arch/*/mm/kasan_init*
|
||||||
F: include/linux/kasan*.h
|
F: include/linux/kasan*.h
|
||||||
F: lib/Kconfig.kasan
|
F: lib/Kconfig.kasan
|
||||||
F: lib/test_kasan*.c
|
|
||||||
F: mm/kasan/
|
F: mm/kasan/
|
||||||
F: scripts/Makefile.kasan
|
F: scripts/Makefile.kasan
|
||||||
|
|
||||||
@ -11445,6 +11444,20 @@ F: kernel/kmod.c
|
|||||||
F: lib/test_kmod.c
|
F: lib/test_kmod.c
|
||||||
F: tools/testing/selftests/kmod/
|
F: tools/testing/selftests/kmod/
|
||||||
|
|
||||||
|
KMSAN
|
||||||
|
M: Alexander Potapenko <glider@google.com>
|
||||||
|
R: Marco Elver <elver@google.com>
|
||||||
|
R: Dmitry Vyukov <dvyukov@google.com>
|
||||||
|
L: kasan-dev@googlegroups.com
|
||||||
|
S: Maintained
|
||||||
|
F: Documentation/dev-tools/kmsan.rst
|
||||||
|
F: arch/*/include/asm/kmsan.h
|
||||||
|
F: arch/*/mm/kmsan_*
|
||||||
|
F: include/linux/kmsan*.h
|
||||||
|
F: lib/Kconfig.kmsan
|
||||||
|
F: mm/kmsan/
|
||||||
|
F: scripts/Makefile.kmsan
|
||||||
|
|
||||||
KPROBES
|
KPROBES
|
||||||
M: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
|
M: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
|
||||||
M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
|
M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
|
||||||
|
1
Makefile
1
Makefile
@ -1107,6 +1107,7 @@ include-y := scripts/Makefile.extrawarn
|
|||||||
include-$(CONFIG_DEBUG_INFO) += scripts/Makefile.debug
|
include-$(CONFIG_DEBUG_INFO) += scripts/Makefile.debug
|
||||||
include-$(CONFIG_KASAN) += scripts/Makefile.kasan
|
include-$(CONFIG_KASAN) += scripts/Makefile.kasan
|
||||||
include-$(CONFIG_KCSAN) += scripts/Makefile.kcsan
|
include-$(CONFIG_KCSAN) += scripts/Makefile.kcsan
|
||||||
|
include-$(CONFIG_KMSAN) += scripts/Makefile.kmsan
|
||||||
include-$(CONFIG_UBSAN) += scripts/Makefile.ubsan
|
include-$(CONFIG_UBSAN) += scripts/Makefile.ubsan
|
||||||
include-$(CONFIG_KCOV) += scripts/Makefile.kcov
|
include-$(CONFIG_KCOV) += scripts/Makefile.kcov
|
||||||
include-$(CONFIG_RANDSTRUCT) += scripts/Makefile.randstruct
|
include-$(CONFIG_RANDSTRUCT) += scripts/Makefile.randstruct
|
||||||
|
@ -8,9 +8,9 @@
|
|||||||
#include <asm/cpufeature.h>
|
#include <asm/cpufeature.h>
|
||||||
#include <asm/mte.h>
|
#include <asm/mte.h>
|
||||||
|
|
||||||
#define for_each_mte_vma(tsk, vma) \
|
#define for_each_mte_vma(vmi, vma) \
|
||||||
if (system_supports_mte()) \
|
if (system_supports_mte()) \
|
||||||
for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \
|
for_each_vma(vmi, vma) \
|
||||||
if (vma->vm_flags & VM_MTE)
|
if (vma->vm_flags & VM_MTE)
|
||||||
|
|
||||||
static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
|
static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
|
||||||
@ -81,8 +81,9 @@ Elf_Half elf_core_extra_phdrs(void)
|
|||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
int vma_count = 0;
|
int vma_count = 0;
|
||||||
|
VMA_ITERATOR(vmi, current->mm, 0);
|
||||||
|
|
||||||
for_each_mte_vma(current, vma)
|
for_each_mte_vma(vmi, vma)
|
||||||
vma_count++;
|
vma_count++;
|
||||||
|
|
||||||
return vma_count;
|
return vma_count;
|
||||||
@ -91,8 +92,9 @@ Elf_Half elf_core_extra_phdrs(void)
|
|||||||
int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
|
int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, current->mm, 0);
|
||||||
|
|
||||||
for_each_mte_vma(current, vma) {
|
for_each_mte_vma(vmi, vma) {
|
||||||
struct elf_phdr phdr;
|
struct elf_phdr phdr;
|
||||||
|
|
||||||
phdr.p_type = PT_AARCH64_MEMTAG_MTE;
|
phdr.p_type = PT_AARCH64_MEMTAG_MTE;
|
||||||
@ -116,8 +118,9 @@ size_t elf_core_extra_data_size(void)
|
|||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
size_t data_size = 0;
|
size_t data_size = 0;
|
||||||
|
VMA_ITERATOR(vmi, current->mm, 0);
|
||||||
|
|
||||||
for_each_mte_vma(current, vma)
|
for_each_mte_vma(vmi, vma)
|
||||||
data_size += mte_vma_tag_dump_size(vma);
|
data_size += mte_vma_tag_dump_size(vma);
|
||||||
|
|
||||||
return data_size;
|
return data_size;
|
||||||
@ -126,8 +129,9 @@ size_t elf_core_extra_data_size(void)
|
|||||||
int elf_core_write_extra_data(struct coredump_params *cprm)
|
int elf_core_write_extra_data(struct coredump_params *cprm)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, current->mm, 0);
|
||||||
|
|
||||||
for_each_mte_vma(current, vma) {
|
for_each_mte_vma(vmi, vma) {
|
||||||
if (vma->vm_flags & VM_DONTDUMP)
|
if (vma->vm_flags & VM_DONTDUMP)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -133,10 +133,11 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
|||||||
{
|
{
|
||||||
struct mm_struct *mm = task->mm;
|
struct mm_struct *mm = task->mm;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
|
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for_each_vma(vmi, vma) {
|
||||||
unsigned long size = vma->vm_end - vma->vm_start;
|
unsigned long size = vma->vm_end - vma->vm_start;
|
||||||
|
|
||||||
if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
|
if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
|
||||||
|
@ -9,7 +9,6 @@ CONFIG_HIGH_RES_TIMERS=y
|
|||||||
CONFIG_LOG_BUF_SHIFT=16
|
CONFIG_LOG_BUF_SHIFT=16
|
||||||
CONFIG_CGROUPS=y
|
CONFIG_CGROUPS=y
|
||||||
CONFIG_MEMCG=y
|
CONFIG_MEMCG=y
|
||||||
CONFIG_MEMCG_SWAP=y
|
|
||||||
CONFIG_BLK_CGROUP=y
|
CONFIG_BLK_CGROUP=y
|
||||||
CONFIG_CGROUP_SCHED=y
|
CONFIG_CGROUP_SCHED=y
|
||||||
CONFIG_CFS_BANDWIDTH=y
|
CONFIG_CFS_BANDWIDTH=y
|
||||||
|
@ -3,7 +3,6 @@ CONFIG_NO_HZ_IDLE=y
|
|||||||
CONFIG_IKCONFIG=y
|
CONFIG_IKCONFIG=y
|
||||||
CONFIG_IKCONFIG_PROC=y
|
CONFIG_IKCONFIG_PROC=y
|
||||||
CONFIG_MEMCG=y
|
CONFIG_MEMCG=y
|
||||||
CONFIG_MEMCG_SWAP=y
|
|
||||||
CONFIG_BLK_CGROUP=y
|
CONFIG_BLK_CGROUP=y
|
||||||
CONFIG_CFS_BANDWIDTH=y
|
CONFIG_CFS_BANDWIDTH=y
|
||||||
CONFIG_RT_GROUP_SCHED=y
|
CONFIG_RT_GROUP_SCHED=y
|
||||||
|
@ -657,15 +657,20 @@ static inline unsigned long mm_total_size(struct mm_struct *mm)
|
|||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
unsigned long usize = 0;
|
unsigned long usize = 0;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
for (vma = mm->mmap; vma && usize < parisc_cache_flush_threshold; vma = vma->vm_next)
|
for_each_vma(vmi, vma) {
|
||||||
|
if (usize >= parisc_cache_flush_threshold)
|
||||||
|
break;
|
||||||
usize += vma->vm_end - vma->vm_start;
|
usize += vma->vm_end - vma->vm_start;
|
||||||
|
}
|
||||||
return usize;
|
return usize;
|
||||||
}
|
}
|
||||||
|
|
||||||
void flush_cache_mm(struct mm_struct *mm)
|
void flush_cache_mm(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flushing the whole cache on each cpu takes forever on
|
* Flushing the whole cache on each cpu takes forever on
|
||||||
@ -685,7 +690,7 @@ void flush_cache_mm(struct mm_struct *mm)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Flush mm */
|
/* Flush mm */
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next)
|
for_each_vma(vmi, vma)
|
||||||
flush_cache_pages(vma, vma->vm_start, vma->vm_end);
|
flush_cache_pages(vma, vma->vm_start, vma->vm_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,7 +17,6 @@ CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
|
|||||||
CONFIG_NUMA_BALANCING=y
|
CONFIG_NUMA_BALANCING=y
|
||||||
CONFIG_CGROUPS=y
|
CONFIG_CGROUPS=y
|
||||||
CONFIG_MEMCG=y
|
CONFIG_MEMCG=y
|
||||||
CONFIG_MEMCG_SWAP=y
|
|
||||||
CONFIG_CGROUP_SCHED=y
|
CONFIG_CGROUP_SCHED=y
|
||||||
CONFIG_CGROUP_FREEZER=y
|
CONFIG_CGROUP_FREEZER=y
|
||||||
CONFIG_CPUSETS=y
|
CONFIG_CPUSETS=y
|
||||||
|
@ -18,7 +18,6 @@ CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
|
|||||||
CONFIG_NUMA_BALANCING=y
|
CONFIG_NUMA_BALANCING=y
|
||||||
CONFIG_CGROUPS=y
|
CONFIG_CGROUPS=y
|
||||||
CONFIG_MEMCG=y
|
CONFIG_MEMCG=y
|
||||||
CONFIG_MEMCG_SWAP=y
|
|
||||||
CONFIG_CGROUP_SCHED=y
|
CONFIG_CGROUP_SCHED=y
|
||||||
CONFIG_CGROUP_FREEZER=y
|
CONFIG_CGROUP_FREEZER=y
|
||||||
CONFIG_CPUSETS=y
|
CONFIG_CPUSETS=y
|
||||||
|
@ -115,18 +115,18 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page)
|
|||||||
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = task->mm;
|
struct mm_struct *mm = task->mm;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
|
for_each_vma(vmi, vma) {
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
|
||||||
unsigned long size = vma->vm_end - vma->vm_start;
|
unsigned long size = vma->vm_end - vma->vm_start;
|
||||||
|
|
||||||
if (vma_is_special_mapping(vma, &vvar_spec))
|
if (vma_is_special_mapping(vma, &vvar_spec))
|
||||||
zap_page_range(vma, vma->vm_start, size);
|
zap_page_range(vma, vma->vm_start, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,14 +81,15 @@ EXPORT_SYMBOL(hash__flush_range);
|
|||||||
void hash__flush_tlb_mm(struct mm_struct *mm)
|
void hash__flush_tlb_mm(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *mp;
|
struct vm_area_struct *mp;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It is safe to go down the mm's list of vmas when called
|
* It is safe to iterate the vmas when called from dup_mmap,
|
||||||
* from dup_mmap, holding mmap_lock. It would also be safe from
|
* holding mmap_lock. It would also be safe from unmap_region
|
||||||
* unmap_region or exit_mmap, but not from vmtruncate on SMP -
|
* or exit_mmap, but not from vmtruncate on SMP - but it seems
|
||||||
* but it seems dup_mmap is the only SMP case which gets here.
|
* dup_mmap is the only SMP case which gets here.
|
||||||
*/
|
*/
|
||||||
for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
|
for_each_vma(vmi, mp)
|
||||||
hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
|
hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(hash__flush_tlb_mm);
|
EXPORT_SYMBOL(hash__flush_tlb_mm);
|
||||||
|
@ -149,24 +149,15 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
|
|||||||
unsigned long len)
|
unsigned long len)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, mm, addr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't try too hard, we just mark all the vma in that range
|
* We don't try too hard, we just mark all the vma in that range
|
||||||
* VM_NOHUGEPAGE and split them.
|
* VM_NOHUGEPAGE and split them.
|
||||||
*/
|
*/
|
||||||
vma = find_vma(mm, addr);
|
for_each_vma_range(vmi, vma, addr + len) {
|
||||||
/*
|
|
||||||
* If the range is in unmapped range, just return
|
|
||||||
*/
|
|
||||||
if (vma && ((addr + len) <= vma->vm_start))
|
|
||||||
return;
|
|
||||||
|
|
||||||
while (vma) {
|
|
||||||
if (vma->vm_start >= (addr + len))
|
|
||||||
break;
|
|
||||||
vma->vm_flags |= VM_NOHUGEPAGE;
|
vma->vm_flags |= VM_NOHUGEPAGE;
|
||||||
walk_page_vma(vma, &subpage_walk_ops, NULL);
|
walk_page_vma(vma, &subpage_walk_ops, NULL);
|
||||||
vma = vma->vm_next;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -114,11 +114,12 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
|||||||
{
|
{
|
||||||
struct mm_struct *mm = task->mm;
|
struct mm_struct *mm = task->mm;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
struct __vdso_info *vdso_info = mm->context.vdso_info;
|
struct __vdso_info *vdso_info = mm->context.vdso_info;
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
|
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for_each_vma(vmi, vma) {
|
||||||
unsigned long size = vma->vm_end - vma->vm_start;
|
unsigned long size = vma->vm_end - vma->vm_start;
|
||||||
|
|
||||||
if (vma_is_special_mapping(vma, vdso_info->dm))
|
if (vma_is_special_mapping(vma, vdso_info->dm))
|
||||||
|
@ -69,10 +69,11 @@ static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
|
|||||||
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = task->mm;
|
struct mm_struct *mm = task->mm;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for_each_vma(vmi, vma) {
|
||||||
unsigned long size = vma->vm_end - vma->vm_start;
|
unsigned long size = vma->vm_end - vma->vm_start;
|
||||||
|
|
||||||
if (!vma_is_special_mapping(vma, &vvar_mapping))
|
if (!vma_is_special_mapping(vma, &vvar_mapping))
|
||||||
|
@ -81,8 +81,9 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
|
|||||||
|
|
||||||
might_fault();
|
might_fault();
|
||||||
if (!should_fail_usercopy()) {
|
if (!should_fail_usercopy()) {
|
||||||
instrument_copy_from_user(to, from, n);
|
instrument_copy_from_user_before(to, from, n);
|
||||||
res = raw_copy_from_user_key(to, from, n, key);
|
res = raw_copy_from_user_key(to, from, n, key);
|
||||||
|
instrument_copy_from_user_after(to, from, n, res);
|
||||||
}
|
}
|
||||||
if (unlikely(res))
|
if (unlikely(res))
|
||||||
memset(to + (n - res), 0, res);
|
memset(to + (n - res), 0, res);
|
||||||
|
@ -2515,8 +2515,9 @@ static const struct mm_walk_ops thp_split_walk_ops = {
|
|||||||
static inline void thp_split_mm(struct mm_struct *mm)
|
static inline void thp_split_mm(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
|
for_each_vma(vmi, vma) {
|
||||||
vma->vm_flags &= ~VM_HUGEPAGE;
|
vma->vm_flags &= ~VM_HUGEPAGE;
|
||||||
vma->vm_flags |= VM_NOHUGEPAGE;
|
vma->vm_flags |= VM_NOHUGEPAGE;
|
||||||
walk_page_vma(vma, &thp_split_walk_ops, NULL);
|
walk_page_vma(vma, &thp_split_walk_ops, NULL);
|
||||||
@ -2584,8 +2585,9 @@ int gmap_mark_unmergeable(void)
|
|||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
int ret;
|
int ret;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for_each_vma(vmi, vma) {
|
||||||
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
|
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
|
||||||
MADV_UNMERGEABLE, &vma->vm_flags);
|
MADV_UNMERGEABLE, &vma->vm_flags);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -16,7 +16,6 @@ CONFIG_CPUSETS=y
|
|||||||
# CONFIG_PROC_PID_CPUSET is not set
|
# CONFIG_PROC_PID_CPUSET is not set
|
||||||
CONFIG_CGROUP_CPUACCT=y
|
CONFIG_CGROUP_CPUACCT=y
|
||||||
CONFIG_CGROUP_MEMCG=y
|
CONFIG_CGROUP_MEMCG=y
|
||||||
CONFIG_CGROUP_MEMCG_SWAP=y
|
|
||||||
CONFIG_CGROUP_SCHED=y
|
CONFIG_CGROUP_SCHED=y
|
||||||
CONFIG_RT_GROUP_SCHED=y
|
CONFIG_RT_GROUP_SCHED=y
|
||||||
CONFIG_BLK_CGROUP=y
|
CONFIG_BLK_CGROUP=y
|
||||||
|
@ -14,7 +14,6 @@ CONFIG_CPUSETS=y
|
|||||||
# CONFIG_PROC_PID_CPUSET is not set
|
# CONFIG_PROC_PID_CPUSET is not set
|
||||||
CONFIG_CGROUP_CPUACCT=y
|
CONFIG_CGROUP_CPUACCT=y
|
||||||
CONFIG_CGROUP_MEMCG=y
|
CONFIG_CGROUP_MEMCG=y
|
||||||
CONFIG_CGROUP_MEMCG_SWAP=y
|
|
||||||
CONFIG_CGROUP_SCHED=y
|
CONFIG_CGROUP_SCHED=y
|
||||||
CONFIG_RT_GROUP_SCHED=y
|
CONFIG_RT_GROUP_SCHED=y
|
||||||
CONFIG_BLK_DEV_INITRD=y
|
CONFIG_BLK_DEV_INITRD=y
|
||||||
|
@ -584,21 +584,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
|||||||
|
|
||||||
void flush_tlb_mm(struct mm_struct *mm)
|
void flush_tlb_mm(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma = mm->mmap;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
while (vma != NULL) {
|
for_each_vma(vmi, vma)
|
||||||
fix_range(mm, vma->vm_start, vma->vm_end, 0);
|
fix_range(mm, vma->vm_start, vma->vm_end, 0);
|
||||||
vma = vma->vm_next;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void force_flush_all(void)
|
void force_flush_all(void)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
struct vm_area_struct *vma = mm->mmap;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
while (vma != NULL) {
|
for_each_vma(vmi, vma)
|
||||||
fix_range(mm, vma->vm_start, vma->vm_end, 1);
|
fix_range(mm, vma->vm_start, vma->vm_end, 1);
|
||||||
vma = vma->vm_next;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -131,7 +131,9 @@ config X86
|
|||||||
select CLKEVT_I8253
|
select CLKEVT_I8253
|
||||||
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
|
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
|
||||||
select CLOCKSOURCE_WATCHDOG
|
select CLOCKSOURCE_WATCHDOG
|
||||||
select DCACHE_WORD_ACCESS
|
# Word-size accesses may read uninitialized data past the trailing \0
|
||||||
|
# in strings and cause false KMSAN reports.
|
||||||
|
select DCACHE_WORD_ACCESS if !KMSAN
|
||||||
select DYNAMIC_SIGFRAME
|
select DYNAMIC_SIGFRAME
|
||||||
select EDAC_ATOMIC_SCRUB
|
select EDAC_ATOMIC_SCRUB
|
||||||
select EDAC_SUPPORT
|
select EDAC_SUPPORT
|
||||||
@ -169,6 +171,7 @@ config X86
|
|||||||
select HAVE_ARCH_KASAN if X86_64
|
select HAVE_ARCH_KASAN if X86_64
|
||||||
select HAVE_ARCH_KASAN_VMALLOC if X86_64
|
select HAVE_ARCH_KASAN_VMALLOC if X86_64
|
||||||
select HAVE_ARCH_KFENCE
|
select HAVE_ARCH_KFENCE
|
||||||
|
select HAVE_ARCH_KMSAN if X86_64
|
||||||
select HAVE_ARCH_KGDB
|
select HAVE_ARCH_KGDB
|
||||||
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
||||||
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
|
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
|
||||||
@ -329,6 +332,10 @@ config GENERIC_ISA_DMA
|
|||||||
def_bool y
|
def_bool y
|
||||||
depends on ISA_DMA_API
|
depends on ISA_DMA_API
|
||||||
|
|
||||||
|
config GENERIC_CSUM
|
||||||
|
bool
|
||||||
|
default y if KMSAN || KASAN
|
||||||
|
|
||||||
config GENERIC_BUG
|
config GENERIC_BUG
|
||||||
def_bool y
|
def_bool y
|
||||||
depends on BUG
|
depends on BUG
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
# Sanitizer runtimes are unavailable and cannot be linked for early boot code.
|
# Sanitizer runtimes are unavailable and cannot be linked for early boot code.
|
||||||
KASAN_SANITIZE := n
|
KASAN_SANITIZE := n
|
||||||
KCSAN_SANITIZE := n
|
KCSAN_SANITIZE := n
|
||||||
|
KMSAN_SANITIZE := n
|
||||||
OBJECT_FILES_NON_STANDARD := y
|
OBJECT_FILES_NON_STANDARD := y
|
||||||
|
|
||||||
# Kernel does not boot with kcov instrumentation here.
|
# Kernel does not boot with kcov instrumentation here.
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
# Sanitizer runtimes are unavailable and cannot be linked for early boot code.
|
# Sanitizer runtimes are unavailable and cannot be linked for early boot code.
|
||||||
KASAN_SANITIZE := n
|
KASAN_SANITIZE := n
|
||||||
KCSAN_SANITIZE := n
|
KCSAN_SANITIZE := n
|
||||||
|
KMSAN_SANITIZE := n
|
||||||
OBJECT_FILES_NON_STANDARD := y
|
OBJECT_FILES_NON_STANDARD := y
|
||||||
|
|
||||||
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
|
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
|
||||||
|
@ -11,6 +11,9 @@ include $(srctree)/lib/vdso/Makefile
|
|||||||
|
|
||||||
# Sanitizer runtimes are unavailable and cannot be linked here.
|
# Sanitizer runtimes are unavailable and cannot be linked here.
|
||||||
KASAN_SANITIZE := n
|
KASAN_SANITIZE := n
|
||||||
|
KMSAN_SANITIZE_vclock_gettime.o := n
|
||||||
|
KMSAN_SANITIZE_vgetcpu.o := n
|
||||||
|
|
||||||
UBSAN_SANITIZE := n
|
UBSAN_SANITIZE := n
|
||||||
KCSAN_SANITIZE := n
|
KCSAN_SANITIZE := n
|
||||||
OBJECT_FILES_NON_STANDARD := y
|
OBJECT_FILES_NON_STANDARD := y
|
||||||
|
@ -127,17 +127,17 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
|||||||
{
|
{
|
||||||
struct mm_struct *mm = task->mm;
|
struct mm_struct *mm = task->mm;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
|
for_each_vma(vmi, vma) {
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
|
||||||
unsigned long size = vma->vm_end - vma->vm_start;
|
unsigned long size = vma->vm_end - vma->vm_start;
|
||||||
|
|
||||||
if (vma_is_special_mapping(vma, &vvar_mapping))
|
if (vma_is_special_mapping(vma, &vvar_mapping))
|
||||||
zap_page_range(vma, vma->vm_start, size);
|
zap_page_range(vma, vma->vm_start, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -354,6 +354,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr)
|
|||||||
{
|
{
|
||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
/*
|
/*
|
||||||
@ -363,7 +364,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr)
|
|||||||
* We could search vma near context.vdso, but it's a slowpath,
|
* We could search vma near context.vdso, but it's a slowpath,
|
||||||
* so let's explicitly check all VMAs to be completely sure.
|
* so let's explicitly check all VMAs to be completely sure.
|
||||||
*/
|
*/
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for_each_vma(vmi, vma) {
|
||||||
if (vma_is_special_mapping(vma, &vdso_mapping) ||
|
if (vma_is_special_mapping(vma, &vdso_mapping) ||
|
||||||
vma_is_special_mapping(vma, &vvar_mapping)) {
|
vma_is_special_mapping(vma, &vvar_mapping)) {
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
|
@ -1,9 +1,13 @@
|
|||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
|
#ifdef CONFIG_GENERIC_CSUM
|
||||||
#define HAVE_CSUM_COPY_USER
|
# include <asm-generic/checksum.h>
|
||||||
#define _HAVE_ARCH_CSUM_AND_COPY
|
|
||||||
#ifdef CONFIG_X86_32
|
|
||||||
# include <asm/checksum_32.h>
|
|
||||||
#else
|
#else
|
||||||
# include <asm/checksum_64.h>
|
# define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
|
||||||
|
# define HAVE_CSUM_COPY_USER
|
||||||
|
# define _HAVE_ARCH_CSUM_AND_COPY
|
||||||
|
# ifdef CONFIG_X86_32
|
||||||
|
# include <asm/checksum_32.h>
|
||||||
|
# else
|
||||||
|
# include <asm/checksum_64.h>
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
87
arch/x86/include/asm/kmsan.h
Normal file
87
arch/x86/include/asm/kmsan.h
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/*
|
||||||
|
* x86 KMSAN support.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022, Google LLC
|
||||||
|
* Author: Alexander Potapenko <glider@google.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _ASM_X86_KMSAN_H
|
||||||
|
#define _ASM_X86_KMSAN_H
|
||||||
|
|
||||||
|
#ifndef MODULE
|
||||||
|
|
||||||
|
#include <asm/cpu_entry_area.h>
|
||||||
|
#include <asm/processor.h>
|
||||||
|
#include <linux/mmzone.h>
|
||||||
|
|
||||||
|
DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow);
|
||||||
|
DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Functions below are declared in the header to make sure they are inlined.
|
||||||
|
* They all are called from kmsan_get_metadata() for every memory access in
|
||||||
|
* the kernel, so speed is important here.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute metadata addresses for the CPU entry area on x86.
|
||||||
|
*/
|
||||||
|
static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
|
||||||
|
{
|
||||||
|
unsigned long addr64 = (unsigned long)addr;
|
||||||
|
char *metadata_array;
|
||||||
|
unsigned long off;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
if ((addr64 < CPU_ENTRY_AREA_BASE) ||
|
||||||
|
(addr64 >= (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE)))
|
||||||
|
return NULL;
|
||||||
|
cpu = (addr64 - CPU_ENTRY_AREA_BASE) / CPU_ENTRY_AREA_SIZE;
|
||||||
|
off = addr64 - (unsigned long)get_cpu_entry_area(cpu);
|
||||||
|
if ((off < 0) || (off >= CPU_ENTRY_AREA_SIZE))
|
||||||
|
return NULL;
|
||||||
|
metadata_array = is_origin ? cpu_entry_area_origin :
|
||||||
|
cpu_entry_area_shadow;
|
||||||
|
return &per_cpu(metadata_array[off], cpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Taken from arch/x86/mm/physaddr.h to avoid using an instrumented version.
|
||||||
|
*/
|
||||||
|
static inline bool kmsan_phys_addr_valid(unsigned long addr)
|
||||||
|
{
|
||||||
|
if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
|
||||||
|
return !(addr >> boot_cpu_data.x86_phys_bits);
|
||||||
|
else
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Taken from arch/x86/mm/physaddr.c to avoid using an instrumented version.
|
||||||
|
*/
|
||||||
|
static inline bool kmsan_virt_addr_valid(void *addr)
|
||||||
|
{
|
||||||
|
unsigned long x = (unsigned long)addr;
|
||||||
|
unsigned long y = x - __START_KERNEL_map;
|
||||||
|
|
||||||
|
/* use the carry flag to determine if x was < __START_KERNEL_map */
|
||||||
|
if (unlikely(x > y)) {
|
||||||
|
x = y + phys_base;
|
||||||
|
|
||||||
|
if (y >= KERNEL_IMAGE_SIZE)
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
x = y + (__START_KERNEL_map - PAGE_OFFSET);
|
||||||
|
|
||||||
|
/* carry flag will be set if starting x was >= PAGE_OFFSET */
|
||||||
|
if ((x > y) || !kmsan_phys_addr_valid(x))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pfn_valid(x >> PAGE_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* !MODULE */
|
||||||
|
|
||||||
|
#endif /* _ASM_X86_KMSAN_H */
|
@ -8,6 +8,8 @@
|
|||||||
#include <asm/cpufeatures.h>
|
#include <asm/cpufeatures.h>
|
||||||
#include <asm/alternative.h>
|
#include <asm/alternative.h>
|
||||||
|
|
||||||
|
#include <linux/kmsan-checks.h>
|
||||||
|
|
||||||
/* duplicated to the one in bootmem.h */
|
/* duplicated to the one in bootmem.h */
|
||||||
extern unsigned long max_pfn;
|
extern unsigned long max_pfn;
|
||||||
extern unsigned long phys_base;
|
extern unsigned long phys_base;
|
||||||
@ -47,6 +49,11 @@ void clear_page_erms(void *page);
|
|||||||
|
|
||||||
static inline void clear_page(void *page)
|
static inline void clear_page(void *page)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Clean up KMSAN metadata for the page being cleared. The assembly call
|
||||||
|
* below clobbers @page, so we perform unpoisoning before it.
|
||||||
|
*/
|
||||||
|
kmsan_unpoison_memory(page, PAGE_SIZE);
|
||||||
alternative_call_2(clear_page_orig,
|
alternative_call_2(clear_page_orig,
|
||||||
clear_page_rep, X86_FEATURE_REP_GOOD,
|
clear_page_rep, X86_FEATURE_REP_GOOD,
|
||||||
clear_page_erms, X86_FEATURE_ERMS,
|
clear_page_erms, X86_FEATURE_ERMS,
|
||||||
|
@ -139,7 +139,52 @@ extern unsigned int ptrs_per_p4d;
|
|||||||
# define VMEMMAP_START __VMEMMAP_BASE_L4
|
# define VMEMMAP_START __VMEMMAP_BASE_L4
|
||||||
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
|
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
|
||||||
|
|
||||||
#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
|
/*
|
||||||
|
* End of the region for which vmalloc page tables are pre-allocated.
|
||||||
|
* For non-KMSAN builds, this is the same as VMALLOC_END.
|
||||||
|
* For KMSAN builds, VMALLOC_START..VMEMORY_END is 4 times bigger than
|
||||||
|
* VMALLOC_START..VMALLOC_END (see below).
|
||||||
|
*/
|
||||||
|
#define VMEMORY_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
|
||||||
|
|
||||||
|
#ifndef CONFIG_KMSAN
|
||||||
|
#define VMALLOC_END VMEMORY_END
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* In KMSAN builds vmalloc area is four times smaller, and the remaining 3/4
|
||||||
|
* are used to keep the metadata for virtual pages. The memory formerly
|
||||||
|
* belonging to vmalloc area is now laid out as follows:
|
||||||
|
*
|
||||||
|
* 1st quarter: VMALLOC_START to VMALLOC_END - new vmalloc area
|
||||||
|
* 2nd quarter: KMSAN_VMALLOC_SHADOW_START to
|
||||||
|
* VMALLOC_END+KMSAN_VMALLOC_SHADOW_OFFSET - vmalloc area shadow
|
||||||
|
* 3rd quarter: KMSAN_VMALLOC_ORIGIN_START to
|
||||||
|
* VMALLOC_END+KMSAN_VMALLOC_ORIGIN_OFFSET - vmalloc area origins
|
||||||
|
* 4th quarter: KMSAN_MODULES_SHADOW_START to KMSAN_MODULES_ORIGIN_START
|
||||||
|
* - shadow for modules,
|
||||||
|
* KMSAN_MODULES_ORIGIN_START to
|
||||||
|
* KMSAN_MODULES_ORIGIN_START + MODULES_LEN - origins for modules.
|
||||||
|
*/
|
||||||
|
#define VMALLOC_QUARTER_SIZE ((VMALLOC_SIZE_TB << 40) >> 2)
|
||||||
|
#define VMALLOC_END (VMALLOC_START + VMALLOC_QUARTER_SIZE - 1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* vmalloc metadata addresses are calculated by adding shadow/origin offsets
|
||||||
|
* to vmalloc address.
|
||||||
|
*/
|
||||||
|
#define KMSAN_VMALLOC_SHADOW_OFFSET VMALLOC_QUARTER_SIZE
|
||||||
|
#define KMSAN_VMALLOC_ORIGIN_OFFSET (VMALLOC_QUARTER_SIZE << 1)
|
||||||
|
|
||||||
|
#define KMSAN_VMALLOC_SHADOW_START (VMALLOC_START + KMSAN_VMALLOC_SHADOW_OFFSET)
|
||||||
|
#define KMSAN_VMALLOC_ORIGIN_START (VMALLOC_START + KMSAN_VMALLOC_ORIGIN_OFFSET)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The shadow/origin for modules are placed one by one in the last 1/4 of
|
||||||
|
* vmalloc space.
|
||||||
|
*/
|
||||||
|
#define KMSAN_MODULES_SHADOW_START (VMALLOC_END + KMSAN_VMALLOC_ORIGIN_OFFSET + 1)
|
||||||
|
#define KMSAN_MODULES_ORIGIN_START (KMSAN_MODULES_SHADOW_START + MODULES_LEN)
|
||||||
|
#endif /* CONFIG_KMSAN */
|
||||||
|
|
||||||
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
|
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
|
||||||
/* The module sections ends with the start of the fixmap */
|
/* The module sections ends with the start of the fixmap */
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
#ifndef _ASM_X86_SPARSEMEM_H
|
#ifndef _ASM_X86_SPARSEMEM_H
|
||||||
#define _ASM_X86_SPARSEMEM_H
|
#define _ASM_X86_SPARSEMEM_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
#ifdef CONFIG_SPARSEMEM
|
#ifdef CONFIG_SPARSEMEM
|
||||||
/*
|
/*
|
||||||
* generic non-linear memory support:
|
* generic non-linear memory support:
|
||||||
|
@ -11,11 +11,23 @@
|
|||||||
function. */
|
function. */
|
||||||
|
|
||||||
#define __HAVE_ARCH_MEMCPY 1
|
#define __HAVE_ARCH_MEMCPY 1
|
||||||
|
#if defined(__SANITIZE_MEMORY__)
|
||||||
|
#undef memcpy
|
||||||
|
void *__msan_memcpy(void *dst, const void *src, size_t size);
|
||||||
|
#define memcpy __msan_memcpy
|
||||||
|
#else
|
||||||
extern void *memcpy(void *to, const void *from, size_t len);
|
extern void *memcpy(void *to, const void *from, size_t len);
|
||||||
|
#endif
|
||||||
extern void *__memcpy(void *to, const void *from, size_t len);
|
extern void *__memcpy(void *to, const void *from, size_t len);
|
||||||
|
|
||||||
#define __HAVE_ARCH_MEMSET
|
#define __HAVE_ARCH_MEMSET
|
||||||
|
#if defined(__SANITIZE_MEMORY__)
|
||||||
|
extern void *__msan_memset(void *s, int c, size_t n);
|
||||||
|
#undef memset
|
||||||
|
#define memset __msan_memset
|
||||||
|
#else
|
||||||
void *memset(void *s, int c, size_t n);
|
void *memset(void *s, int c, size_t n);
|
||||||
|
#endif
|
||||||
void *__memset(void *s, int c, size_t n);
|
void *__memset(void *s, int c, size_t n);
|
||||||
|
|
||||||
#define __HAVE_ARCH_MEMSET16
|
#define __HAVE_ARCH_MEMSET16
|
||||||
@ -55,7 +67,13 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_MEMMOVE
|
#define __HAVE_ARCH_MEMMOVE
|
||||||
|
#if defined(__SANITIZE_MEMORY__)
|
||||||
|
#undef memmove
|
||||||
|
void *__msan_memmove(void *dest, const void *src, size_t len);
|
||||||
|
#define memmove __msan_memmove
|
||||||
|
#else
|
||||||
void *memmove(void *dest, const void *src, size_t count);
|
void *memmove(void *dest, const void *src, size_t count);
|
||||||
|
#endif
|
||||||
void *__memmove(void *dest, const void *src, size_t count);
|
void *__memmove(void *dest, const void *src, size_t count);
|
||||||
|
|
||||||
int memcmp(const void *cs, const void *ct, size_t count);
|
int memcmp(const void *cs, const void *ct, size_t count);
|
||||||
@ -64,8 +82,7 @@ char *strcpy(char *dest, const char *src);
|
|||||||
char *strcat(char *dest, const char *src);
|
char *strcat(char *dest, const char *src);
|
||||||
int strcmp(const char *cs, const char *ct);
|
int strcmp(const char *cs, const char *ct);
|
||||||
|
|
||||||
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
|
#if (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For files that not instrumented (e.g. mm/slub.c) we
|
* For files that not instrumented (e.g. mm/slub.c) we
|
||||||
* should use not instrumented version of mem* functions.
|
* should use not instrumented version of mem* functions.
|
||||||
@ -73,7 +90,9 @@ int strcmp(const char *cs, const char *ct);
|
|||||||
|
|
||||||
#undef memcpy
|
#undef memcpy
|
||||||
#define memcpy(dst, src, len) __memcpy(dst, src, len)
|
#define memcpy(dst, src, len) __memcpy(dst, src, len)
|
||||||
|
#undef memmove
|
||||||
#define memmove(dst, src, len) __memmove(dst, src, len)
|
#define memmove(dst, src, len) __memmove(dst, src, len)
|
||||||
|
#undef memset
|
||||||
#define memset(s, c, n) __memset(s, c, n)
|
#define memset(s, c, n) __memset(s, c, n)
|
||||||
|
|
||||||
#ifndef __NO_FORTIFY
|
#ifndef __NO_FORTIFY
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
* User space memory access functions
|
* User space memory access functions
|
||||||
*/
|
*/
|
||||||
#include <linux/compiler.h>
|
#include <linux/compiler.h>
|
||||||
|
#include <linux/instrumented.h>
|
||||||
#include <linux/kasan-checks.h>
|
#include <linux/kasan-checks.h>
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <asm/asm.h>
|
#include <asm/asm.h>
|
||||||
@ -103,6 +104,7 @@ extern int __get_user_bad(void);
|
|||||||
: "=a" (__ret_gu), "=r" (__val_gu), \
|
: "=a" (__ret_gu), "=r" (__val_gu), \
|
||||||
ASM_CALL_CONSTRAINT \
|
ASM_CALL_CONSTRAINT \
|
||||||
: "0" (ptr), "i" (sizeof(*(ptr)))); \
|
: "0" (ptr), "i" (sizeof(*(ptr)))); \
|
||||||
|
instrument_get_user(__val_gu); \
|
||||||
(x) = (__force __typeof__(*(ptr))) __val_gu; \
|
(x) = (__force __typeof__(*(ptr))) __val_gu; \
|
||||||
__builtin_expect(__ret_gu, 0); \
|
__builtin_expect(__ret_gu, 0); \
|
||||||
})
|
})
|
||||||
@ -192,9 +194,11 @@ extern void __put_user_nocheck_8(void);
|
|||||||
int __ret_pu; \
|
int __ret_pu; \
|
||||||
void __user *__ptr_pu; \
|
void __user *__ptr_pu; \
|
||||||
register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX); \
|
register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX); \
|
||||||
__chk_user_ptr(ptr); \
|
__typeof__(*(ptr)) __x = (x); /* eval x once */ \
|
||||||
__ptr_pu = (ptr); \
|
__typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \
|
||||||
__val_pu = (x); \
|
__chk_user_ptr(__ptr); \
|
||||||
|
__ptr_pu = __ptr; \
|
||||||
|
__val_pu = __x; \
|
||||||
asm volatile("call __" #fn "_%P[size]" \
|
asm volatile("call __" #fn "_%P[size]" \
|
||||||
: "=c" (__ret_pu), \
|
: "=c" (__ret_pu), \
|
||||||
ASM_CALL_CONSTRAINT \
|
ASM_CALL_CONSTRAINT \
|
||||||
@ -202,6 +206,7 @@ extern void __put_user_nocheck_8(void);
|
|||||||
"r" (__val_pu), \
|
"r" (__val_pu), \
|
||||||
[size] "i" (sizeof(*(ptr))) \
|
[size] "i" (sizeof(*(ptr))) \
|
||||||
:"ebx"); \
|
:"ebx"); \
|
||||||
|
instrument_put_user(__x, __ptr, sizeof(*(ptr))); \
|
||||||
__builtin_expect(__ret_pu, 0); \
|
__builtin_expect(__ret_pu, 0); \
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -248,23 +253,25 @@ extern void __put_user_nocheck_8(void);
|
|||||||
|
|
||||||
#define __put_user_size(x, ptr, size, label) \
|
#define __put_user_size(x, ptr, size, label) \
|
||||||
do { \
|
do { \
|
||||||
|
__typeof__(*(ptr)) __x = (x); /* eval x once */ \
|
||||||
__chk_user_ptr(ptr); \
|
__chk_user_ptr(ptr); \
|
||||||
switch (size) { \
|
switch (size) { \
|
||||||
case 1: \
|
case 1: \
|
||||||
__put_user_goto(x, ptr, "b", "iq", label); \
|
__put_user_goto(__x, ptr, "b", "iq", label); \
|
||||||
break; \
|
break; \
|
||||||
case 2: \
|
case 2: \
|
||||||
__put_user_goto(x, ptr, "w", "ir", label); \
|
__put_user_goto(__x, ptr, "w", "ir", label); \
|
||||||
break; \
|
break; \
|
||||||
case 4: \
|
case 4: \
|
||||||
__put_user_goto(x, ptr, "l", "ir", label); \
|
__put_user_goto(__x, ptr, "l", "ir", label); \
|
||||||
break; \
|
break; \
|
||||||
case 8: \
|
case 8: \
|
||||||
__put_user_goto_u64(x, ptr, label); \
|
__put_user_goto_u64(__x, ptr, label); \
|
||||||
break; \
|
break; \
|
||||||
default: \
|
default: \
|
||||||
__put_user_bad(); \
|
__put_user_bad(); \
|
||||||
} \
|
} \
|
||||||
|
instrument_put_user(__x, ptr, size); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
|
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
|
||||||
@ -305,6 +312,7 @@ do { \
|
|||||||
default: \
|
default: \
|
||||||
(x) = __get_user_bad(); \
|
(x) = __get_user_bad(); \
|
||||||
} \
|
} \
|
||||||
|
instrument_get_user(x); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define __get_user_asm(x, addr, itype, ltype, label) \
|
#define __get_user_asm(x, addr, itype, ltype, label) \
|
||||||
|
@ -29,6 +29,8 @@ KASAN_SANITIZE_sev.o := n
|
|||||||
# With some compiler versions the generated code results in boot hangs, caused
|
# With some compiler versions the generated code results in boot hangs, caused
|
||||||
# by several compilation units. To be safe, disable all instrumentation.
|
# by several compilation units. To be safe, disable all instrumentation.
|
||||||
KCSAN_SANITIZE := n
|
KCSAN_SANITIZE := n
|
||||||
|
KMSAN_SANITIZE_head$(BITS).o := n
|
||||||
|
KMSAN_SANITIZE_nmi.o := n
|
||||||
|
|
||||||
# If instrumentation of this dir is enabled, boot hangs during first second.
|
# If instrumentation of this dir is enabled, boot hangs during first second.
|
||||||
# Probably could be more selective here, but note that files related to irqs,
|
# Probably could be more selective here, but note that files related to irqs,
|
||||||
|
@ -12,6 +12,7 @@ endif
|
|||||||
# If these files are instrumented, boot hangs during the first second.
|
# If these files are instrumented, boot hangs during the first second.
|
||||||
KCOV_INSTRUMENT_common.o := n
|
KCOV_INSTRUMENT_common.o := n
|
||||||
KCOV_INSTRUMENT_perf_event.o := n
|
KCOV_INSTRUMENT_perf_event.o := n
|
||||||
|
KMSAN_SANITIZE_common.o := n
|
||||||
|
|
||||||
# As above, instrumenting secondary CPU boot code causes boot hangs.
|
# As above, instrumenting secondary CPU boot code causes boot hangs.
|
||||||
KCSAN_SANITIZE_common.o := n
|
KCSAN_SANITIZE_common.o := n
|
||||||
|
@ -177,6 +177,12 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function reads pointers from the stack and dereferences them. The
|
||||||
|
* pointers may not have their KMSAN shadow set up properly, which may result
|
||||||
|
* in false positive reports. Disable instrumentation to avoid those.
|
||||||
|
*/
|
||||||
|
__no_kmsan_checks
|
||||||
static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||||
unsigned long *stack, const char *log_lvl)
|
unsigned long *stack, const char *log_lvl)
|
||||||
{
|
{
|
||||||
|
@ -553,6 +553,7 @@ void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32)
|
|||||||
* Kprobes not supported here. Set the probe on schedule instead.
|
* Kprobes not supported here. Set the probe on schedule instead.
|
||||||
* Function graph tracer not supported too.
|
* Function graph tracer not supported too.
|
||||||
*/
|
*/
|
||||||
|
__no_kmsan_checks
|
||||||
__visible __notrace_funcgraph struct task_struct *
|
__visible __notrace_funcgraph struct task_struct *
|
||||||
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||||
{
|
{
|
||||||
|
@ -95,7 +95,6 @@ void __init tboot_probe(void)
|
|||||||
|
|
||||||
static pgd_t *tboot_pg_dir;
|
static pgd_t *tboot_pg_dir;
|
||||||
static struct mm_struct tboot_mm = {
|
static struct mm_struct tboot_mm = {
|
||||||
.mm_rb = RB_ROOT,
|
|
||||||
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, tboot_mm.mmap_lock),
|
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, tboot_mm.mmap_lock),
|
||||||
.pgd = swapper_pg_dir,
|
.pgd = swapper_pg_dir,
|
||||||
.mm_users = ATOMIC_INIT(2),
|
.mm_users = ATOMIC_INIT(2),
|
||||||
|
@ -183,6 +183,16 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* While walking the stack, KMSAN may stomp on stale locals from other
|
||||||
|
* functions that were marked as uninitialized upon function exit, and
|
||||||
|
* now hold the call frame information for the current function (e.g. the frame
|
||||||
|
* pointer). Because KMSAN does not specifically mark call frames as
|
||||||
|
* initialized, false positive reports are possible. To prevent such reports,
|
||||||
|
* we mark the functions scanning the stack (here and below) with
|
||||||
|
* __no_kmsan_checks.
|
||||||
|
*/
|
||||||
|
__no_kmsan_checks
|
||||||
static bool update_stack_state(struct unwind_state *state,
|
static bool update_stack_state(struct unwind_state *state,
|
||||||
unsigned long *next_bp)
|
unsigned long *next_bp)
|
||||||
{
|
{
|
||||||
@ -250,6 +260,7 @@ static bool update_stack_state(struct unwind_state *state,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__no_kmsan_checks
|
||||||
bool unwind_next_frame(struct unwind_state *state)
|
bool unwind_next_frame(struct unwind_state *state)
|
||||||
{
|
{
|
||||||
struct pt_regs *regs;
|
struct pt_regs *regs;
|
||||||
|
@ -65,7 +65,9 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y)
|
|||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
obj-y += iomap_copy_64.o
|
obj-y += iomap_copy_64.o
|
||||||
|
ifneq ($(CONFIG_GENERIC_CSUM),y)
|
||||||
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
|
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
|
||||||
|
endif
|
||||||
lib-y += clear_page_64.o copy_page_64.o
|
lib-y += clear_page_64.o copy_page_64.o
|
||||||
lib-y += memmove_64.o memset_64.o
|
lib-y += memmove_64.o memset_64.o
|
||||||
lib-y += copy_user_64.o
|
lib-y += copy_user_64.o
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/io.h>
|
#include <linux/io.h>
|
||||||
|
#include <linux/kmsan-checks.h>
|
||||||
|
|
||||||
#define movs(type,to,from) \
|
#define movs(type,to,from) \
|
||||||
asm volatile("movs" type:"=&D" (to), "=&S" (from):"0" (to), "1" (from):"memory")
|
asm volatile("movs" type:"=&D" (to), "=&S" (from):"0" (to), "1" (from):"memory")
|
||||||
@ -37,6 +38,8 @@ static void string_memcpy_fromio(void *to, const volatile void __iomem *from, si
|
|||||||
n-=2;
|
n-=2;
|
||||||
}
|
}
|
||||||
rep_movs(to, (const void *)from, n);
|
rep_movs(to, (const void *)from, n);
|
||||||
|
/* KMSAN must treat values read from devices as initialized. */
|
||||||
|
kmsan_unpoison_memory(to, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void string_memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
|
static void string_memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
|
||||||
@ -44,6 +47,8 @@ static void string_memcpy_toio(volatile void __iomem *to, const void *from, size
|
|||||||
if (unlikely(!n))
|
if (unlikely(!n))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/* Make sure uninitialized memory isn't copied to devices. */
|
||||||
|
kmsan_check_memory(from, n);
|
||||||
/* Align any unaligned destination IO */
|
/* Align any unaligned destination IO */
|
||||||
if (unlikely(1 & (unsigned long)to)) {
|
if (unlikely(1 & (unsigned long)to)) {
|
||||||
movs("b", to, from);
|
movs("b", to, from);
|
||||||
|
@ -14,6 +14,8 @@ KASAN_SANITIZE_pgprot.o := n
|
|||||||
# Disable KCSAN entirely, because otherwise we get warnings that some functions
|
# Disable KCSAN entirely, because otherwise we get warnings that some functions
|
||||||
# reference __initdata sections.
|
# reference __initdata sections.
|
||||||
KCSAN_SANITIZE := n
|
KCSAN_SANITIZE := n
|
||||||
|
# Avoid recursion by not calling KMSAN hooks for CEA code.
|
||||||
|
KMSAN_SANITIZE_cpu_entry_area.o := n
|
||||||
|
|
||||||
ifdef CONFIG_FUNCTION_TRACER
|
ifdef CONFIG_FUNCTION_TRACER
|
||||||
CFLAGS_REMOVE_mem_encrypt.o = -pg
|
CFLAGS_REMOVE_mem_encrypt.o = -pg
|
||||||
@ -44,6 +46,9 @@ obj-$(CONFIG_HIGHMEM) += highmem_32.o
|
|||||||
KASAN_SANITIZE_kasan_init_$(BITS).o := n
|
KASAN_SANITIZE_kasan_init_$(BITS).o := n
|
||||||
obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o
|
obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o
|
||||||
|
|
||||||
|
KMSAN_SANITIZE_kmsan_shadow.o := n
|
||||||
|
obj-$(CONFIG_KMSAN) += kmsan_shadow.o
|
||||||
|
|
||||||
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
|
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
|
||||||
mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
|
mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
|
||||||
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
|
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
|
||||||
|
@ -260,7 +260,7 @@ static noinline int vmalloc_fault(unsigned long address)
|
|||||||
}
|
}
|
||||||
NOKPROBE_SYMBOL(vmalloc_fault);
|
NOKPROBE_SYMBOL(vmalloc_fault);
|
||||||
|
|
||||||
void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
|
static void __arch_sync_kernel_mappings(unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
unsigned long addr;
|
unsigned long addr;
|
||||||
|
|
||||||
@ -284,6 +284,27 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
__arch_sync_kernel_mappings(start, end);
|
||||||
|
#ifdef CONFIG_KMSAN
|
||||||
|
/*
|
||||||
|
* KMSAN maintains two additional metadata page mappings for the
|
||||||
|
* [VMALLOC_START, VMALLOC_END) range. These mappings start at
|
||||||
|
* KMSAN_VMALLOC_SHADOW_START and KMSAN_VMALLOC_ORIGIN_START and
|
||||||
|
* have to be synced together with the vmalloc memory mapping.
|
||||||
|
*/
|
||||||
|
if (start >= VMALLOC_START && end < VMALLOC_END) {
|
||||||
|
__arch_sync_kernel_mappings(
|
||||||
|
start - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START,
|
||||||
|
end - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START);
|
||||||
|
__arch_sync_kernel_mappings(
|
||||||
|
start - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START,
|
||||||
|
end - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static bool low_pfn(unsigned long pfn)
|
static bool low_pfn(unsigned long pfn)
|
||||||
{
|
{
|
||||||
return pfn < max_low_pfn;
|
return pfn < max_low_pfn;
|
||||||
|
@ -1288,7 +1288,7 @@ static void __init preallocate_vmalloc_pages(void)
|
|||||||
unsigned long addr;
|
unsigned long addr;
|
||||||
const char *lvl;
|
const char *lvl;
|
||||||
|
|
||||||
for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
|
for (addr = VMALLOC_START; addr <= VMEMORY_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
|
||||||
pgd_t *pgd = pgd_offset_k(addr);
|
pgd_t *pgd = pgd_offset_k(addr);
|
||||||
p4d_t *p4d;
|
p4d_t *p4d;
|
||||||
pud_t *pud;
|
pud_t *pud;
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <linux/cc_platform.h>
|
#include <linux/cc_platform.h>
|
||||||
#include <linux/efi.h>
|
#include <linux/efi.h>
|
||||||
#include <linux/pgtable.h>
|
#include <linux/pgtable.h>
|
||||||
|
#include <linux/kmsan.h>
|
||||||
|
|
||||||
#include <asm/set_memory.h>
|
#include <asm/set_memory.h>
|
||||||
#include <asm/e820/api.h>
|
#include <asm/e820/api.h>
|
||||||
@ -479,6 +480,8 @@ void iounmap(volatile void __iomem *addr)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kmsan_iounmap_page_range((unsigned long)addr,
|
||||||
|
(unsigned long)addr + get_vm_area_size(p));
|
||||||
memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
|
memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
|
||||||
|
|
||||||
/* Finally remove it */
|
/* Finally remove it */
|
||||||
|
20
arch/x86/mm/kmsan_shadow.c
Normal file
20
arch/x86/mm/kmsan_shadow.c
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* x86-specific bits of KMSAN shadow implementation.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2022 Google LLC
|
||||||
|
* Author: Alexander Potapenko <glider@google.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <asm/cpu_entry_area.h>
|
||||||
|
#include <linux/percpu-defs.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Addresses within the CPU entry area (including e.g. exception stacks) do not
|
||||||
|
* have struct page entries corresponding to them, so they need separate
|
||||||
|
* handling.
|
||||||
|
* arch_kmsan_get_meta_or_null() (declared in the header) maps the addresses in
|
||||||
|
* CPU entry area to addresses in cpu_entry_area_shadow/cpu_entry_area_origin.
|
||||||
|
*/
|
||||||
|
DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow);
|
||||||
|
DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin);
|
@ -10,6 +10,7 @@
|
|||||||
# Sanitizer runtimes are unavailable and cannot be linked here.
|
# Sanitizer runtimes are unavailable and cannot be linked here.
|
||||||
KASAN_SANITIZE := n
|
KASAN_SANITIZE := n
|
||||||
KCSAN_SANITIZE := n
|
KCSAN_SANITIZE := n
|
||||||
|
KMSAN_SANITIZE := n
|
||||||
OBJECT_FILES_NON_STANDARD := y
|
OBJECT_FILES_NON_STANDARD := y
|
||||||
|
|
||||||
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
|
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
|
||||||
|
@ -58,6 +58,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
|||||||
unsigned long len, unsigned long pgoff, unsigned long flags)
|
unsigned long len, unsigned long pgoff, unsigned long flags)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vmm;
|
struct vm_area_struct *vmm;
|
||||||
|
struct vma_iterator vmi;
|
||||||
|
|
||||||
if (flags & MAP_FIXED) {
|
if (flags & MAP_FIXED) {
|
||||||
/* We do not accept a shared mapping if it would violate
|
/* We do not accept a shared mapping if it would violate
|
||||||
@ -79,15 +80,20 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
|||||||
else
|
else
|
||||||
addr = PAGE_ALIGN(addr);
|
addr = PAGE_ALIGN(addr);
|
||||||
|
|
||||||
for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
|
vma_iter_init(&vmi, current->mm, addr);
|
||||||
/* At this point: (!vmm || addr < vmm->vm_end). */
|
for_each_vma(vmi, vmm) {
|
||||||
if (TASK_SIZE - len < addr)
|
/* At this point: (addr < vmm->vm_end). */
|
||||||
return -ENOMEM;
|
if (addr + len <= vm_start_gap(vmm))
|
||||||
if (!vmm || addr + len <= vm_start_gap(vmm))
|
break;
|
||||||
return addr;
|
|
||||||
addr = vmm->vm_end;
|
addr = vmm->vm_end;
|
||||||
if (flags & MAP_SHARED)
|
if (flags & MAP_SHARED)
|
||||||
addr = COLOUR_ALIGN(addr, pgoff);
|
addr = COLOUR_ALIGN(addr, pgoff);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (TASK_SIZE - len < addr)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
return addr;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -870,6 +870,8 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
|
|||||||
*same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
|
*same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
|
||||||
if (*same_page)
|
if (*same_page)
|
||||||
return true;
|
return true;
|
||||||
|
else if (IS_ENABLED(CONFIG_KMSAN))
|
||||||
|
return false;
|
||||||
return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
|
return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,6 +88,13 @@ static inline bool biovec_phys_mergeable(struct request_queue *q,
|
|||||||
phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
|
phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
|
||||||
phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;
|
phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Merging adjacent physical pages may not work correctly under KMSAN
|
||||||
|
* if their metadata pages aren't adjacent. Just disable merging.
|
||||||
|
*/
|
||||||
|
if (IS_ENABLED(CONFIG_KMSAN))
|
||||||
|
return false;
|
||||||
|
|
||||||
if (addr1 + vec1->bv_len != addr2)
|
if (addr1 + vec1->bv_len != addr2)
|
||||||
return false;
|
return false;
|
||||||
if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page))
|
if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page))
|
||||||
|
@ -1391,6 +1391,7 @@ endmenu
|
|||||||
config CRYPTO_HASH_INFO
|
config CRYPTO_HASH_INFO
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
if !KMSAN # avoid false positives from assembly
|
||||||
if ARM
|
if ARM
|
||||||
source "arch/arm/crypto/Kconfig"
|
source "arch/arm/crypto/Kconfig"
|
||||||
endif
|
endif
|
||||||
@ -1412,6 +1413,7 @@ endif
|
|||||||
if X86
|
if X86
|
||||||
source "arch/x86/crypto/Kconfig"
|
source "arch/x86/crypto/Kconfig"
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
source "drivers/crypto/Kconfig"
|
source "drivers/crypto/Kconfig"
|
||||||
source "crypto/asymmetric_keys/Kconfig"
|
source "crypto/asymmetric_keys/Kconfig"
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include <linux/pm_runtime.h>
|
#include <linux/pm_runtime.h>
|
||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
#include <linux/hugetlb.h>
|
||||||
|
|
||||||
static struct bus_type node_subsys = {
|
static struct bus_type node_subsys = {
|
||||||
.name = "node",
|
.name = "node",
|
||||||
@ -589,64 +590,9 @@ static const struct attribute_group *node_dev_groups[] = {
|
|||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_HUGETLBFS
|
|
||||||
/*
|
|
||||||
* hugetlbfs per node attributes registration interface:
|
|
||||||
* When/if hugetlb[fs] subsystem initializes [sometime after this module],
|
|
||||||
* it will register its per node attributes for all online nodes with
|
|
||||||
* memory. It will also call register_hugetlbfs_with_node(), below, to
|
|
||||||
* register its attribute registration functions with this node driver.
|
|
||||||
* Once these hooks have been initialized, the node driver will call into
|
|
||||||
* the hugetlb module to [un]register attributes for hot-plugged nodes.
|
|
||||||
*/
|
|
||||||
static node_registration_func_t __hugetlb_register_node;
|
|
||||||
static node_registration_func_t __hugetlb_unregister_node;
|
|
||||||
|
|
||||||
static inline bool hugetlb_register_node(struct node *node)
|
|
||||||
{
|
|
||||||
if (__hugetlb_register_node &&
|
|
||||||
node_state(node->dev.id, N_MEMORY)) {
|
|
||||||
__hugetlb_register_node(node);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void hugetlb_unregister_node(struct node *node)
|
|
||||||
{
|
|
||||||
if (__hugetlb_unregister_node)
|
|
||||||
__hugetlb_unregister_node(node);
|
|
||||||
}
|
|
||||||
|
|
||||||
void register_hugetlbfs_with_node(node_registration_func_t doregister,
|
|
||||||
node_registration_func_t unregister)
|
|
||||||
{
|
|
||||||
__hugetlb_register_node = doregister;
|
|
||||||
__hugetlb_unregister_node = unregister;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline void hugetlb_register_node(struct node *node) {}
|
|
||||||
|
|
||||||
static inline void hugetlb_unregister_node(struct node *node) {}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void node_device_release(struct device *dev)
|
static void node_device_release(struct device *dev)
|
||||||
{
|
{
|
||||||
struct node *node = to_node(dev);
|
kfree(to_node(dev));
|
||||||
|
|
||||||
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS)
|
|
||||||
/*
|
|
||||||
* We schedule the work only when a memory section is
|
|
||||||
* onlined/offlined on this node. When we come here,
|
|
||||||
* all the memory on this node has been offlined,
|
|
||||||
* so we won't enqueue new work to this work.
|
|
||||||
*
|
|
||||||
* The work is using node->node_work, so we should
|
|
||||||
* flush work before freeing the memory.
|
|
||||||
*/
|
|
||||||
flush_work(&node->node_work);
|
|
||||||
#endif
|
|
||||||
kfree(node);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -665,13 +611,13 @@ static int register_node(struct node *node, int num)
|
|||||||
node->dev.groups = node_dev_groups;
|
node->dev.groups = node_dev_groups;
|
||||||
error = device_register(&node->dev);
|
error = device_register(&node->dev);
|
||||||
|
|
||||||
if (error)
|
if (error) {
|
||||||
put_device(&node->dev);
|
put_device(&node->dev);
|
||||||
else {
|
} else {
|
||||||
hugetlb_register_node(node);
|
hugetlb_register_node(node);
|
||||||
|
|
||||||
compaction_register_node(node);
|
compaction_register_node(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -684,8 +630,8 @@ static int register_node(struct node *node, int num)
|
|||||||
*/
|
*/
|
||||||
void unregister_node(struct node *node)
|
void unregister_node(struct node *node)
|
||||||
{
|
{
|
||||||
|
hugetlb_unregister_node(node);
|
||||||
compaction_unregister_node(node);
|
compaction_unregister_node(node);
|
||||||
hugetlb_unregister_node(node); /* no-op, if memoryless node */
|
|
||||||
node_remove_accesses(node);
|
node_remove_accesses(node);
|
||||||
node_remove_caches(node);
|
node_remove_caches(node);
|
||||||
device_unregister(&node->dev);
|
device_unregister(&node->dev);
|
||||||
@ -907,74 +853,8 @@ void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
|
|||||||
(void *)&nid, func);
|
(void *)&nid, func);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_HUGETLBFS
|
|
||||||
/*
|
|
||||||
* Handle per node hstate attribute [un]registration on transistions
|
|
||||||
* to/from memoryless state.
|
|
||||||
*/
|
|
||||||
static void node_hugetlb_work(struct work_struct *work)
|
|
||||||
{
|
|
||||||
struct node *node = container_of(work, struct node, node_work);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We only get here when a node transitions to/from memoryless state.
|
|
||||||
* We can detect which transition occurred by examining whether the
|
|
||||||
* node has memory now. hugetlb_register_node() already check this
|
|
||||||
* so we try to register the attributes. If that fails, then the
|
|
||||||
* node has transitioned to memoryless, try to unregister the
|
|
||||||
* attributes.
|
|
||||||
*/
|
|
||||||
if (!hugetlb_register_node(node))
|
|
||||||
hugetlb_unregister_node(node);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void init_node_hugetlb_work(int nid)
|
|
||||||
{
|
|
||||||
INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int node_memory_callback(struct notifier_block *self,
|
|
||||||
unsigned long action, void *arg)
|
|
||||||
{
|
|
||||||
struct memory_notify *mnb = arg;
|
|
||||||
int nid = mnb->status_change_nid;
|
|
||||||
|
|
||||||
switch (action) {
|
|
||||||
case MEM_ONLINE:
|
|
||||||
case MEM_OFFLINE:
|
|
||||||
/*
|
|
||||||
* offload per node hstate [un]registration to a work thread
|
|
||||||
* when transitioning to/from memoryless state.
|
|
||||||
*/
|
|
||||||
if (nid != NUMA_NO_NODE)
|
|
||||||
schedule_work(&node_devices[nid]->node_work);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MEM_GOING_ONLINE:
|
|
||||||
case MEM_GOING_OFFLINE:
|
|
||||||
case MEM_CANCEL_ONLINE:
|
|
||||||
case MEM_CANCEL_OFFLINE:
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NOTIFY_OK;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_HUGETLBFS */
|
|
||||||
#endif /* CONFIG_MEMORY_HOTPLUG */
|
#endif /* CONFIG_MEMORY_HOTPLUG */
|
||||||
|
|
||||||
#if !defined(CONFIG_MEMORY_HOTPLUG) || !defined(CONFIG_HUGETLBFS)
|
|
||||||
static inline int node_memory_callback(struct notifier_block *self,
|
|
||||||
unsigned long action, void *arg)
|
|
||||||
{
|
|
||||||
return NOTIFY_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void init_node_hugetlb_work(int nid) { }
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int __register_one_node(int nid)
|
int __register_one_node(int nid)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
@ -993,8 +873,6 @@ int __register_one_node(int nid)
|
|||||||
}
|
}
|
||||||
|
|
||||||
INIT_LIST_HEAD(&node_devices[nid]->access_list);
|
INIT_LIST_HEAD(&node_devices[nid]->access_list);
|
||||||
/* initialize work queue for memory hot plug */
|
|
||||||
init_node_hugetlb_work(nid);
|
|
||||||
node_init_caches(nid);
|
node_init_caches(nid);
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
@ -1065,13 +943,8 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
|
|||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define NODE_CALLBACK_PRI 2 /* lower than SLAB */
|
|
||||||
void __init node_dev_init(void)
|
void __init node_dev_init(void)
|
||||||
{
|
{
|
||||||
static struct notifier_block node_memory_callback_nb = {
|
|
||||||
.notifier_call = node_memory_callback,
|
|
||||||
.priority = NODE_CALLBACK_PRI,
|
|
||||||
};
|
|
||||||
int ret, i;
|
int ret, i;
|
||||||
|
|
||||||
BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
|
BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
|
||||||
@ -1081,8 +954,6 @@ void __init node_dev_init(void)
|
|||||||
if (ret)
|
if (ret)
|
||||||
panic("%s() failed to register subsystem: %d\n", __func__, ret);
|
panic("%s() failed to register subsystem: %d\n", __func__, ret);
|
||||||
|
|
||||||
register_hotmemory_notifier(&node_memory_callback_nb);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create all node devices, which will properly link the node
|
* Create all node devices, which will properly link the node
|
||||||
* to applicable memory block devices and already created cpu devices.
|
* to applicable memory block devices and already created cpu devices.
|
||||||
|
@ -329,8 +329,8 @@ static ssize_t idle_store(struct device *dev,
|
|||||||
|
|
||||||
if (!sysfs_streq(buf, "all")) {
|
if (!sysfs_streq(buf, "all")) {
|
||||||
/*
|
/*
|
||||||
* If it did not parse as 'all' try to treat it as an integer when
|
* If it did not parse as 'all' try to treat it as an integer
|
||||||
* we have memory tracking enabled.
|
* when we have memory tracking enabled.
|
||||||
*/
|
*/
|
||||||
u64 age_sec;
|
u64 age_sec;
|
||||||
|
|
||||||
@ -345,7 +345,10 @@ static ssize_t idle_store(struct device *dev,
|
|||||||
if (!init_done(zram))
|
if (!init_done(zram))
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
/* A cutoff_time of 0 marks everything as idle, this is the "all" behavior */
|
/*
|
||||||
|
* A cutoff_time of 0 marks everything as idle, this is the
|
||||||
|
* "all" behavior.
|
||||||
|
*/
|
||||||
mark_idle(zram, cutoff_time);
|
mark_idle(zram, cutoff_time);
|
||||||
rv = len;
|
rv = len;
|
||||||
|
|
||||||
@ -1416,11 +1419,11 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
|
|||||||
if (comp_len != PAGE_SIZE)
|
if (comp_len != PAGE_SIZE)
|
||||||
goto compress_again;
|
goto compress_again;
|
||||||
/*
|
/*
|
||||||
* If the page is not compressible, you need to acquire the lock and
|
* If the page is not compressible, you need to acquire the
|
||||||
* execute the code below. The zcomp_stream_get() call is needed to
|
* lock and execute the code below. The zcomp_stream_get()
|
||||||
* disable the cpu hotplug and grab the zstrm buffer back.
|
* call is needed to disable the cpu hotplug and grab the
|
||||||
* It is necessary that the dereferencing of the zstrm variable below
|
* zstrm buffer back. It is necessary that the dereferencing
|
||||||
* occurs correctly.
|
* of the zstrm variable below occurs correctly.
|
||||||
*/
|
*/
|
||||||
zstrm = zcomp_stream_get(zram->comp);
|
zstrm = zcomp_stream_get(zram->comp);
|
||||||
}
|
}
|
||||||
@ -2131,6 +2134,8 @@ static int __init zram_init(void)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > BITS_PER_LONG);
|
||||||
|
|
||||||
ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
|
ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
|
||||||
zcomp_cpu_up_prepare, zcomp_cpu_dead);
|
zcomp_cpu_up_prepare, zcomp_cpu_dead);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
|
@ -30,16 +30,15 @@
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The lower ZRAM_FLAG_SHIFT bits of table.flags is for
|
* ZRAM is mainly used for memory efficiency so we want to keep memory
|
||||||
* object size (excluding header), the higher bits is for
|
* footprint small and thus squeeze size and zram pageflags into a flags
|
||||||
* zram_pageflags.
|
* member. The lower ZRAM_FLAG_SHIFT bits is for object size (excluding
|
||||||
|
* header), which cannot be larger than PAGE_SIZE (requiring PAGE_SHIFT
|
||||||
|
* bits), the higher bits are for zram_pageflags.
|
||||||
*
|
*
|
||||||
* zram is mainly used for memory efficiency so we want to keep memory
|
* We use BUILD_BUG_ON() to make sure that zram pageflags don't overflow.
|
||||||
* footprint small so we can squeeze size and flags into a field.
|
|
||||||
* The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header),
|
|
||||||
* the higher bits is for zram_pageflags.
|
|
||||||
*/
|
*/
|
||||||
#define ZRAM_FLAG_SHIFT 24
|
#define ZRAM_FLAG_SHIFT (PAGE_SHIFT + 1)
|
||||||
|
|
||||||
/* Flags for zram pages (table[page_no].flags) */
|
/* Flags for zram pages (table[page_no].flags) */
|
||||||
enum zram_pageflags {
|
enum zram_pageflags {
|
||||||
|
@ -59,7 +59,6 @@ static unsigned long __initdata rt_prop = EFI_INVALID_TABLE_ADDR;
|
|||||||
static unsigned long __initdata initrd = EFI_INVALID_TABLE_ADDR;
|
static unsigned long __initdata initrd = EFI_INVALID_TABLE_ADDR;
|
||||||
|
|
||||||
struct mm_struct efi_mm = {
|
struct mm_struct efi_mm = {
|
||||||
.mm_rb = RB_ROOT,
|
|
||||||
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, efi_mm.mmap_lock),
|
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, efi_mm.mmap_lock),
|
||||||
.mm_users = ATOMIC_INIT(2),
|
.mm_users = ATOMIC_INIT(2),
|
||||||
.mm_count = ATOMIC_INIT(1),
|
.mm_count = ATOMIC_INIT(1),
|
||||||
|
@ -57,6 +57,7 @@ GCOV_PROFILE := n
|
|||||||
# Sanitizer runtimes are unavailable and cannot be linked here.
|
# Sanitizer runtimes are unavailable and cannot be linked here.
|
||||||
KASAN_SANITIZE := n
|
KASAN_SANITIZE := n
|
||||||
KCSAN_SANITIZE := n
|
KCSAN_SANITIZE := n
|
||||||
|
KMSAN_SANITIZE := n
|
||||||
UBSAN_SANITIZE := n
|
UBSAN_SANITIZE := n
|
||||||
OBJECT_FILES_NON_STANDARD := y
|
OBJECT_FILES_NON_STANDARD := y
|
||||||
|
|
||||||
|
@ -426,12 +426,11 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
|
|||||||
static int
|
static int
|
||||||
probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len)
|
probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len)
|
||||||
{
|
{
|
||||||
const unsigned long end = addr + len;
|
VMA_ITERATOR(vmi, mm, addr);
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
int ret = -EFAULT;
|
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
|
for_each_vma_range(vmi, vma, addr + len) {
|
||||||
/* Check for holes, note that we also update the addr below */
|
/* Check for holes, note that we also update the addr below */
|
||||||
if (vma->vm_start > addr)
|
if (vma->vm_start > addr)
|
||||||
break;
|
break;
|
||||||
@ -439,16 +438,13 @@ probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len)
|
|||||||
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
|
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (vma->vm_end >= end) {
|
|
||||||
ret = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
addr = vma->vm_end;
|
addr = vma->vm_end;
|
||||||
}
|
}
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
return ret;
|
if (vma)
|
||||||
|
return -EFAULT;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
#include <linux/input.h>
|
#include <linux/input.h>
|
||||||
|
#include <linux/kmsan-checks.h>
|
||||||
#include <linux/serio.h>
|
#include <linux/serio.h>
|
||||||
#include <linux/i8042.h>
|
#include <linux/i8042.h>
|
||||||
#include <linux/libps2.h>
|
#include <linux/libps2.h>
|
||||||
@ -294,9 +295,11 @@ int __ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command)
|
|||||||
|
|
||||||
serio_pause_rx(ps2dev->serio);
|
serio_pause_rx(ps2dev->serio);
|
||||||
|
|
||||||
if (param)
|
if (param) {
|
||||||
for (i = 0; i < receive; i++)
|
for (i = 0; i < receive; i++)
|
||||||
param[i] = ps2dev->cmdbuf[(receive - 1) - i];
|
param[i] = ps2dev->cmdbuf[(receive - 1) - i];
|
||||||
|
kmsan_unpoison_memory(param, receive);
|
||||||
|
}
|
||||||
|
|
||||||
if (ps2dev->cmdcnt &&
|
if (ps2dev->cmdcnt &&
|
||||||
(command != PS2_CMD_RESET_BAT || ps2dev->cmdcnt != 1)) {
|
(command != PS2_CMD_RESET_BAT || ps2dev->cmdcnt != 1)) {
|
||||||
|
@ -280,22 +280,6 @@ void cxl_handle_fault(struct work_struct *fault_work)
|
|||||||
mmput(mm);
|
mmput(mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
|
|
||||||
{
|
|
||||||
struct mm_struct *mm;
|
|
||||||
|
|
||||||
mm = get_mem_context(ctx);
|
|
||||||
if (mm == NULL) {
|
|
||||||
pr_devel("cxl_prefault_one unable to get mm %i\n",
|
|
||||||
pid_nr(ctx->pid));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
cxl_fault_segment(ctx, mm, ea);
|
|
||||||
|
|
||||||
mmput(mm);
|
|
||||||
}
|
|
||||||
|
|
||||||
static u64 next_segment(u64 ea, u64 vsid)
|
static u64 next_segment(u64 ea, u64 vsid)
|
||||||
{
|
{
|
||||||
if (vsid & SLB_VSID_B_1T)
|
if (vsid & SLB_VSID_B_1T)
|
||||||
@ -306,23 +290,16 @@ static u64 next_segment(u64 ea, u64 vsid)
|
|||||||
return ea + 1;
|
return ea + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cxl_prefault_vma(struct cxl_context *ctx)
|
static void cxl_prefault_vma(struct cxl_context *ctx, struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
u64 ea, last_esid = 0;
|
u64 ea, last_esid = 0;
|
||||||
struct copro_slb slb;
|
struct copro_slb slb;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
int rc;
|
int rc;
|
||||||
struct mm_struct *mm;
|
|
||||||
|
|
||||||
mm = get_mem_context(ctx);
|
|
||||||
if (mm == NULL) {
|
|
||||||
pr_devel("cxl_prefault_vm unable to get mm %i\n",
|
|
||||||
pid_nr(ctx->pid));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for_each_vma(vmi, vma) {
|
||||||
for (ea = vma->vm_start; ea < vma->vm_end;
|
for (ea = vma->vm_start; ea < vma->vm_end;
|
||||||
ea = next_segment(ea, slb.vsid)) {
|
ea = next_segment(ea, slb.vsid)) {
|
||||||
rc = copro_calculate_slb(mm, ea, &slb);
|
rc = copro_calculate_slb(mm, ea, &slb);
|
||||||
@ -337,20 +314,28 @@ static void cxl_prefault_vma(struct cxl_context *ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
mmput(mm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void cxl_prefault(struct cxl_context *ctx, u64 wed)
|
void cxl_prefault(struct cxl_context *ctx, u64 wed)
|
||||||
{
|
{
|
||||||
|
struct mm_struct *mm = get_mem_context(ctx);
|
||||||
|
|
||||||
|
if (mm == NULL) {
|
||||||
|
pr_devel("cxl_prefault unable to get mm %i\n",
|
||||||
|
pid_nr(ctx->pid));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
switch (ctx->afu->prefault_mode) {
|
switch (ctx->afu->prefault_mode) {
|
||||||
case CXL_PREFAULT_WED:
|
case CXL_PREFAULT_WED:
|
||||||
cxl_prefault_one(ctx, wed);
|
cxl_fault_segment(ctx, mm, wed);
|
||||||
break;
|
break;
|
||||||
case CXL_PREFAULT_ALL:
|
case CXL_PREFAULT_ALL:
|
||||||
cxl_prefault_vma(ctx);
|
cxl_prefault_vma(ctx, mm);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mmput(mm);
|
||||||
}
|
}
|
||||||
|
@ -76,6 +76,7 @@ config WIREGUARD
|
|||||||
tristate "WireGuard secure network tunnel"
|
tristate "WireGuard secure network tunnel"
|
||||||
depends on NET && INET
|
depends on NET && INET
|
||||||
depends on IPV6 || !IPV6
|
depends on IPV6 || !IPV6
|
||||||
|
depends on !KMSAN # KMSAN doesn't support the crypto configs below
|
||||||
select NET_UDP_TUNNEL
|
select NET_UDP_TUNNEL
|
||||||
select DST_CACHE
|
select DST_CACHE
|
||||||
select CRYPTO
|
select CRYPTO
|
||||||
|
@ -652,7 +652,7 @@ void devm_namespace_disable(struct device *dev,
|
|||||||
struct nd_namespace_common *ndns);
|
struct nd_namespace_common *ndns);
|
||||||
#if IS_ENABLED(CONFIG_ND_CLAIM)
|
#if IS_ENABLED(CONFIG_ND_CLAIM)
|
||||||
/* max struct page size independent of kernel config */
|
/* max struct page size independent of kernel config */
|
||||||
#define MAX_STRUCT_PAGE_SIZE 64
|
#define MAX_STRUCT_PAGE_SIZE 128
|
||||||
int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
|
int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
|
||||||
#else
|
#else
|
||||||
static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
|
static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
|
||||||
|
@ -787,7 +787,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
|
|||||||
* when populating the vmemmap. This *should* be equal to
|
* when populating the vmemmap. This *should* be equal to
|
||||||
* PMD_SIZE for most architectures.
|
* PMD_SIZE for most architectures.
|
||||||
*
|
*
|
||||||
* Also make sure size of struct page is less than 64. We
|
* Also make sure size of struct page is less than 128. We
|
||||||
* want to make sure we use large enough size here so that
|
* want to make sure we use large enough size here so that
|
||||||
* we don't have a dynamic reserve space depending on
|
* we don't have a dynamic reserve space depending on
|
||||||
* struct page size. But we also want to make sure we notice
|
* struct page size. But we also want to make sure we notice
|
||||||
|
@ -492,15 +492,18 @@ static bool is_normal_memory(pgprot_t p)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __check_mem_type(struct vm_area_struct *vma, unsigned long end)
|
static int __check_mem_type(struct mm_struct *mm, unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
{
|
{
|
||||||
while (vma && is_normal_memory(vma->vm_page_prot)) {
|
struct vm_area_struct *vma;
|
||||||
if (vma->vm_end >= end)
|
VMA_ITERATOR(vmi, mm, start);
|
||||||
return 0;
|
|
||||||
vma = vma->vm_next;
|
for_each_vma_range(vmi, vma, end) {
|
||||||
|
if (!is_normal_memory(vma->vm_page_prot))
|
||||||
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return -EINVAL;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int optee_check_mem_type(unsigned long start, size_t num_pages)
|
int optee_check_mem_type(unsigned long start, size_t num_pages)
|
||||||
@ -516,8 +519,7 @@ int optee_check_mem_type(unsigned long start, size_t num_pages)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
rc = __check_mem_type(find_vma(mm, start),
|
rc = __check_mem_type(mm, start, start + num_pages * PAGE_SIZE);
|
||||||
start + num_pages * PAGE_SIZE);
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <linux/bitops.h>
|
#include <linux/bitops.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/log2.h>
|
#include <linux/log2.h>
|
||||||
|
#include <linux/kmsan.h>
|
||||||
#include <linux/usb.h>
|
#include <linux/usb.h>
|
||||||
#include <linux/wait.h>
|
#include <linux/wait.h>
|
||||||
#include <linux/usb/hcd.h>
|
#include <linux/usb/hcd.h>
|
||||||
@ -426,6 +427,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
|
|||||||
URB_SETUP_MAP_SINGLE | URB_SETUP_MAP_LOCAL |
|
URB_SETUP_MAP_SINGLE | URB_SETUP_MAP_LOCAL |
|
||||||
URB_DMA_SG_COMBINED);
|
URB_DMA_SG_COMBINED);
|
||||||
urb->transfer_flags |= (is_out ? URB_DIR_OUT : URB_DIR_IN);
|
urb->transfer_flags |= (is_out ? URB_DIR_OUT : URB_DIR_IN);
|
||||||
|
kmsan_handle_urb(urb, is_out);
|
||||||
|
|
||||||
if (xfertype != USB_ENDPOINT_XFER_CONTROL &&
|
if (xfertype != USB_ENDPOINT_XFER_CONTROL &&
|
||||||
dev->state < USB_STATE_CONFIGURED)
|
dev->state < USB_STATE_CONFIGURED)
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/hrtimer.h>
|
#include <linux/hrtimer.h>
|
||||||
#include <linux/dma-mapping.h>
|
#include <linux/dma-mapping.h>
|
||||||
|
#include <linux/kmsan.h>
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
#include <xen/xen.h>
|
#include <xen/xen.h>
|
||||||
|
|
||||||
@ -352,8 +353,15 @@ static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
|
|||||||
struct scatterlist *sg,
|
struct scatterlist *sg,
|
||||||
enum dma_data_direction direction)
|
enum dma_data_direction direction)
|
||||||
{
|
{
|
||||||
if (!vq->use_dma_api)
|
if (!vq->use_dma_api) {
|
||||||
|
/*
|
||||||
|
* If DMA is not used, KMSAN doesn't know that the scatterlist
|
||||||
|
* is initialized by the hardware. Explicitly check/unpoison it
|
||||||
|
* depending on the direction.
|
||||||
|
*/
|
||||||
|
kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction);
|
||||||
return (dma_addr_t)sg_phys(sg);
|
return (dma_addr_t)sg_phys(sg);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can't use dma_map_sg, because we don't use scatterlists in
|
* We can't use dma_map_sg, because we don't use scatterlists in
|
||||||
|
@ -282,7 +282,7 @@ static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
|
|||||||
struct page, lru);
|
struct page, lru);
|
||||||
struct privcmd_mmap_entry *msg = page_address(page);
|
struct privcmd_mmap_entry *msg = page_address(page);
|
||||||
|
|
||||||
vma = find_vma(mm, msg->va);
|
vma = vma_lookup(mm, msg->va);
|
||||||
rc = -EINVAL;
|
rc = -EINVAL;
|
||||||
|
|
||||||
if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
|
if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
|
||||||
|
@ -2341,7 +2341,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
|
|||||||
struct address_space *mapping = inode->i_mapping;
|
struct address_space *mapping = inode->i_mapping;
|
||||||
const struct address_space_operations *aops = mapping->a_ops;
|
const struct address_space_operations *aops = mapping->a_ops;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
void *fsdata;
|
void *fsdata = NULL;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
err = inode_newsize_ok(inode, size);
|
err = inode_newsize_ok(inode, size);
|
||||||
@ -2367,7 +2367,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
|
|||||||
const struct address_space_operations *aops = mapping->a_ops;
|
const struct address_space_operations *aops = mapping->a_ops;
|
||||||
unsigned int blocksize = i_blocksize(inode);
|
unsigned int blocksize = i_blocksize(inode);
|
||||||
struct page *page;
|
struct page *page;
|
||||||
void *fsdata;
|
void *fsdata = NULL;
|
||||||
pgoff_t index, curidx;
|
pgoff_t index, curidx;
|
||||||
loff_t curpos;
|
loff_t curpos;
|
||||||
unsigned zerofrom, offset, len;
|
unsigned zerofrom, offset, len;
|
||||||
|
@ -1100,30 +1100,20 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
|
|||||||
return vma->vm_end - vma->vm_start;
|
return vma->vm_end - vma->vm_start;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct vm_area_struct *first_vma(struct task_struct *tsk,
|
|
||||||
struct vm_area_struct *gate_vma)
|
|
||||||
{
|
|
||||||
struct vm_area_struct *ret = tsk->mm->mmap;
|
|
||||||
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
return gate_vma;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Helper function for iterating across a vma list. It ensures that the caller
|
* Helper function for iterating across a vma list. It ensures that the caller
|
||||||
* will visit `gate_vma' prior to terminating the search.
|
* will visit `gate_vma' prior to terminating the search.
|
||||||
*/
|
*/
|
||||||
static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
|
static struct vm_area_struct *coredump_next_vma(struct ma_state *mas,
|
||||||
|
struct vm_area_struct *vma,
|
||||||
struct vm_area_struct *gate_vma)
|
struct vm_area_struct *gate_vma)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *ret;
|
if (gate_vma && (vma == gate_vma))
|
||||||
|
|
||||||
ret = this_vma->vm_next;
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
if (this_vma == gate_vma)
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
vma = mas_next(mas, ULONG_MAX);
|
||||||
|
if (vma)
|
||||||
|
return vma;
|
||||||
return gate_vma;
|
return gate_vma;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1147,9 +1137,10 @@ static void free_vma_snapshot(struct coredump_params *cprm)
|
|||||||
*/
|
*/
|
||||||
static bool dump_vma_snapshot(struct coredump_params *cprm)
|
static bool dump_vma_snapshot(struct coredump_params *cprm)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma, *gate_vma;
|
struct vm_area_struct *gate_vma, *vma = NULL;
|
||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
int i;
|
MA_STATE(mas, &mm->mm_mt, 0, 0);
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Once the stack expansion code is fixed to not change VMA bounds
|
* Once the stack expansion code is fixed to not change VMA bounds
|
||||||
@ -1169,8 +1160,7 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
|
while ((vma = coredump_next_vma(&mas, vma, gate_vma)) != NULL) {
|
||||||
vma = next_vma(vma, gate_vma), i++) {
|
|
||||||
struct core_vma_metadata *m = cprm->vma_meta + i;
|
struct core_vma_metadata *m = cprm->vma_meta + i;
|
||||||
|
|
||||||
m->start = vma->vm_start;
|
m->start = vma->vm_start;
|
||||||
@ -1178,10 +1168,10 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
|
|||||||
m->flags = vma->vm_flags;
|
m->flags = vma->vm_flags;
|
||||||
m->dump_size = vma_dump_size(vma, cprm->mm_flags);
|
m->dump_size = vma_dump_size(vma, cprm->mm_flags);
|
||||||
m->pgoff = vma->vm_pgoff;
|
m->pgoff = vma->vm_pgoff;
|
||||||
|
|
||||||
m->file = vma->vm_file;
|
m->file = vma->vm_file;
|
||||||
if (m->file)
|
if (m->file)
|
||||||
get_file(m->file);
|
get_file(m->file);
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
|
12
fs/exec.c
12
fs/exec.c
@ -28,7 +28,6 @@
|
|||||||
#include <linux/file.h>
|
#include <linux/file.h>
|
||||||
#include <linux/fdtable.h>
|
#include <linux/fdtable.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/vmacache.h>
|
|
||||||
#include <linux/stat.h>
|
#include <linux/stat.h>
|
||||||
#include <linux/fcntl.h>
|
#include <linux/fcntl.h>
|
||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
@ -683,6 +682,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
|
|||||||
unsigned long length = old_end - old_start;
|
unsigned long length = old_end - old_start;
|
||||||
unsigned long new_start = old_start - shift;
|
unsigned long new_start = old_start - shift;
|
||||||
unsigned long new_end = old_end - shift;
|
unsigned long new_end = old_end - shift;
|
||||||
|
VMA_ITERATOR(vmi, mm, new_start);
|
||||||
|
struct vm_area_struct *next;
|
||||||
struct mmu_gather tlb;
|
struct mmu_gather tlb;
|
||||||
|
|
||||||
BUG_ON(new_start > new_end);
|
BUG_ON(new_start > new_end);
|
||||||
@ -691,7 +692,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
|
|||||||
* ensure there are no vmas between where we want to go
|
* ensure there are no vmas between where we want to go
|
||||||
* and where we are
|
* and where we are
|
||||||
*/
|
*/
|
||||||
if (vma != find_vma(mm, new_start))
|
if (vma != vma_next(&vmi))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -710,12 +711,13 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
|
|||||||
|
|
||||||
lru_add_drain();
|
lru_add_drain();
|
||||||
tlb_gather_mmu(&tlb, mm);
|
tlb_gather_mmu(&tlb, mm);
|
||||||
|
next = vma_next(&vmi);
|
||||||
if (new_end > old_start) {
|
if (new_end > old_start) {
|
||||||
/*
|
/*
|
||||||
* when the old and new regions overlap clear from new_end.
|
* when the old and new regions overlap clear from new_end.
|
||||||
*/
|
*/
|
||||||
free_pgd_range(&tlb, new_end, old_end, new_end,
|
free_pgd_range(&tlb, new_end, old_end, new_end,
|
||||||
vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
|
next ? next->vm_start : USER_PGTABLES_CEILING);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* otherwise, clean from old_start; this is done to not touch
|
* otherwise, clean from old_start; this is done to not touch
|
||||||
@ -724,7 +726,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
|
|||||||
* for the others its just a little faster.
|
* for the others its just a little faster.
|
||||||
*/
|
*/
|
||||||
free_pgd_range(&tlb, old_start, old_end, new_end,
|
free_pgd_range(&tlb, old_start, old_end, new_end,
|
||||||
vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
|
next ? next->vm_start : USER_PGTABLES_CEILING);
|
||||||
}
|
}
|
||||||
tlb_finish_mmu(&tlb);
|
tlb_finish_mmu(&tlb);
|
||||||
|
|
||||||
@ -1023,8 +1025,6 @@ static int exec_mmap(struct mm_struct *mm)
|
|||||||
activate_mm(active_mm, mm);
|
activate_mm(active_mm, mm);
|
||||||
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
|
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
tsk->mm->vmacache_seqnum = 0;
|
|
||||||
vmacache_flush(tsk);
|
|
||||||
task_unlock(tsk);
|
task_unlock(tsk);
|
||||||
lru_gen_use_mm(mm);
|
lru_gen_use_mm(mm);
|
||||||
if (old_mm) {
|
if (old_mm) {
|
||||||
|
@ -364,13 +364,155 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void remove_huge_page(struct page *page)
|
static void hugetlb_delete_from_page_cache(struct page *page)
|
||||||
{
|
{
|
||||||
ClearPageDirty(page);
|
ClearPageDirty(page);
|
||||||
ClearPageUptodate(page);
|
ClearPageUptodate(page);
|
||||||
delete_from_page_cache(page);
|
delete_from_page_cache(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called with i_mmap_rwsem held for inode based vma maps. This makes
|
||||||
|
* sure vma (and vm_mm) will not go away. We also hold the hugetlb fault
|
||||||
|
* mutex for the page in the mapping. So, we can not race with page being
|
||||||
|
* faulted into the vma.
|
||||||
|
*/
|
||||||
|
static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
|
||||||
|
unsigned long addr, struct page *page)
|
||||||
|
{
|
||||||
|
pte_t *ptep, pte;
|
||||||
|
|
||||||
|
ptep = huge_pte_offset(vma->vm_mm, addr,
|
||||||
|
huge_page_size(hstate_vma(vma)));
|
||||||
|
|
||||||
|
if (!ptep)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
pte = huge_ptep_get(ptep);
|
||||||
|
if (huge_pte_none(pte) || !pte_present(pte))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (pte_page(pte) == page)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Can vma_offset_start/vma_offset_end overflow on 32-bit arches?
|
||||||
|
* No, because the interval tree returns us only those vmas
|
||||||
|
* which overlap the truncated area starting at pgoff,
|
||||||
|
* and no vma on a 32-bit arch can span beyond the 4GB.
|
||||||
|
*/
|
||||||
|
static unsigned long vma_offset_start(struct vm_area_struct *vma, pgoff_t start)
|
||||||
|
{
|
||||||
|
if (vma->vm_pgoff < start)
|
||||||
|
return (start - vma->vm_pgoff) << PAGE_SHIFT;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned long vma_offset_end(struct vm_area_struct *vma, pgoff_t end)
|
||||||
|
{
|
||||||
|
unsigned long t_end;
|
||||||
|
|
||||||
|
if (!end)
|
||||||
|
return vma->vm_end;
|
||||||
|
|
||||||
|
t_end = ((end - vma->vm_pgoff) << PAGE_SHIFT) + vma->vm_start;
|
||||||
|
if (t_end > vma->vm_end)
|
||||||
|
t_end = vma->vm_end;
|
||||||
|
return t_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called with hugetlb fault mutex held. Therefore, no more mappings to
|
||||||
|
* this folio can be created while executing the routine.
|
||||||
|
*/
|
||||||
|
static void hugetlb_unmap_file_folio(struct hstate *h,
|
||||||
|
struct address_space *mapping,
|
||||||
|
struct folio *folio, pgoff_t index)
|
||||||
|
{
|
||||||
|
struct rb_root_cached *root = &mapping->i_mmap;
|
||||||
|
struct hugetlb_vma_lock *vma_lock;
|
||||||
|
struct page *page = &folio->page;
|
||||||
|
struct vm_area_struct *vma;
|
||||||
|
unsigned long v_start;
|
||||||
|
unsigned long v_end;
|
||||||
|
pgoff_t start, end;
|
||||||
|
|
||||||
|
start = index * pages_per_huge_page(h);
|
||||||
|
end = (index + 1) * pages_per_huge_page(h);
|
||||||
|
|
||||||
|
i_mmap_lock_write(mapping);
|
||||||
|
retry:
|
||||||
|
vma_lock = NULL;
|
||||||
|
vma_interval_tree_foreach(vma, root, start, end - 1) {
|
||||||
|
v_start = vma_offset_start(vma, start);
|
||||||
|
v_end = vma_offset_end(vma, end);
|
||||||
|
|
||||||
|
if (!hugetlb_vma_maps_page(vma, vma->vm_start + v_start, page))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!hugetlb_vma_trylock_write(vma)) {
|
||||||
|
vma_lock = vma->vm_private_data;
|
||||||
|
/*
|
||||||
|
* If we can not get vma lock, we need to drop
|
||||||
|
* immap_sema and take locks in order. First,
|
||||||
|
* take a ref on the vma_lock structure so that
|
||||||
|
* we can be guaranteed it will not go away when
|
||||||
|
* dropping immap_sema.
|
||||||
|
*/
|
||||||
|
kref_get(&vma_lock->refs);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
unmap_hugepage_range(vma, vma->vm_start + v_start, v_end,
|
||||||
|
NULL, ZAP_FLAG_DROP_MARKER);
|
||||||
|
hugetlb_vma_unlock_write(vma);
|
||||||
|
}
|
||||||
|
|
||||||
|
i_mmap_unlock_write(mapping);
|
||||||
|
|
||||||
|
if (vma_lock) {
|
||||||
|
/*
|
||||||
|
* Wait on vma_lock. We know it is still valid as we have
|
||||||
|
* a reference. We must 'open code' vma locking as we do
|
||||||
|
* not know if vma_lock is still attached to vma.
|
||||||
|
*/
|
||||||
|
down_write(&vma_lock->rw_sema);
|
||||||
|
i_mmap_lock_write(mapping);
|
||||||
|
|
||||||
|
vma = vma_lock->vma;
|
||||||
|
if (!vma) {
|
||||||
|
/*
|
||||||
|
* If lock is no longer attached to vma, then just
|
||||||
|
* unlock, drop our reference and retry looking for
|
||||||
|
* other vmas.
|
||||||
|
*/
|
||||||
|
up_write(&vma_lock->rw_sema);
|
||||||
|
kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* vma_lock is still attached to vma. Check to see if vma
|
||||||
|
* still maps page and if so, unmap.
|
||||||
|
*/
|
||||||
|
v_start = vma_offset_start(vma, start);
|
||||||
|
v_end = vma_offset_end(vma, end);
|
||||||
|
if (hugetlb_vma_maps_page(vma, vma->vm_start + v_start, page))
|
||||||
|
unmap_hugepage_range(vma, vma->vm_start + v_start,
|
||||||
|
v_end, NULL,
|
||||||
|
ZAP_FLAG_DROP_MARKER);
|
||||||
|
|
||||||
|
kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
|
||||||
|
hugetlb_vma_unlock_write(vma);
|
||||||
|
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
|
hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
|
||||||
zap_flags_t zap_flags)
|
zap_flags_t zap_flags)
|
||||||
@ -383,34 +525,68 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
|
|||||||
* an inclusive "last".
|
* an inclusive "last".
|
||||||
*/
|
*/
|
||||||
vma_interval_tree_foreach(vma, root, start, end ? end - 1 : ULONG_MAX) {
|
vma_interval_tree_foreach(vma, root, start, end ? end - 1 : ULONG_MAX) {
|
||||||
unsigned long v_offset;
|
unsigned long v_start;
|
||||||
unsigned long v_end;
|
unsigned long v_end;
|
||||||
|
|
||||||
/*
|
if (!hugetlb_vma_trylock_write(vma))
|
||||||
* Can the expression below overflow on 32-bit arches?
|
continue;
|
||||||
* No, because the interval tree returns us only those vmas
|
|
||||||
* which overlap the truncated area starting at pgoff,
|
|
||||||
* and no vma on a 32-bit arch can span beyond the 4GB.
|
|
||||||
*/
|
|
||||||
if (vma->vm_pgoff < start)
|
|
||||||
v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
|
|
||||||
else
|
|
||||||
v_offset = 0;
|
|
||||||
|
|
||||||
if (!end)
|
v_start = vma_offset_start(vma, start);
|
||||||
v_end = vma->vm_end;
|
v_end = vma_offset_end(vma, end);
|
||||||
else {
|
|
||||||
v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
|
|
||||||
+ vma->vm_start;
|
|
||||||
if (v_end > vma->vm_end)
|
|
||||||
v_end = vma->vm_end;
|
|
||||||
}
|
|
||||||
|
|
||||||
unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
|
unmap_hugepage_range(vma, vma->vm_start + v_start, v_end,
|
||||||
NULL, zap_flags);
|
NULL, zap_flags);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that vma lock only exists for shared/non-private
|
||||||
|
* vmas. Therefore, lock is not held when calling
|
||||||
|
* unmap_hugepage_range for private vmas.
|
||||||
|
*/
|
||||||
|
hugetlb_vma_unlock_write(vma);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called with hugetlb fault mutex held.
|
||||||
|
* Returns true if page was actually removed, false otherwise.
|
||||||
|
*/
|
||||||
|
static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
|
||||||
|
struct address_space *mapping,
|
||||||
|
struct folio *folio, pgoff_t index,
|
||||||
|
bool truncate_op)
|
||||||
|
{
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If folio is mapped, it was faulted in after being
|
||||||
|
* unmapped in caller. Unmap (again) while holding
|
||||||
|
* the fault mutex. The mutex will prevent faults
|
||||||
|
* until we finish removing the folio.
|
||||||
|
*/
|
||||||
|
if (unlikely(folio_mapped(folio)))
|
||||||
|
hugetlb_unmap_file_folio(h, mapping, folio, index);
|
||||||
|
|
||||||
|
folio_lock(folio);
|
||||||
|
/*
|
||||||
|
* We must remove the folio from page cache before removing
|
||||||
|
* the region/ reserve map (hugetlb_unreserve_pages). In
|
||||||
|
* rare out of memory conditions, removal of the region/reserve
|
||||||
|
* map could fail. Correspondingly, the subpool and global
|
||||||
|
* reserve usage count can need to be adjusted.
|
||||||
|
*/
|
||||||
|
VM_BUG_ON(HPageRestoreReserve(&folio->page));
|
||||||
|
hugetlb_delete_from_page_cache(&folio->page);
|
||||||
|
ret = true;
|
||||||
|
if (!truncate_op) {
|
||||||
|
if (unlikely(hugetlb_unreserve_pages(inode, index,
|
||||||
|
index + 1, 1)))
|
||||||
|
hugetlb_fix_reserve_counts(inode);
|
||||||
|
}
|
||||||
|
|
||||||
|
folio_unlock(folio);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* remove_inode_hugepages handles two distinct cases: truncation and hole
|
* remove_inode_hugepages handles two distinct cases: truncation and hole
|
||||||
* punch. There are subtle differences in operation for each case.
|
* punch. There are subtle differences in operation for each case.
|
||||||
@ -418,10 +594,10 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
|
|||||||
* truncation is indicated by end of range being LLONG_MAX
|
* truncation is indicated by end of range being LLONG_MAX
|
||||||
* In this case, we first scan the range and release found pages.
|
* In this case, we first scan the range and release found pages.
|
||||||
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserve
|
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserve
|
||||||
* maps and global counts. Page faults can not race with truncation
|
* maps and global counts. Page faults can race with truncation.
|
||||||
* in this routine. hugetlb_no_page() holds i_mmap_rwsem and prevents
|
* During faults, hugetlb_no_page() checks i_size before page allocation,
|
||||||
* page faults in the truncated range by checking i_size. i_size is
|
* and again after obtaining page table lock. It will 'back out'
|
||||||
* modified while holding i_mmap_rwsem.
|
* allocations in the truncated range.
|
||||||
* hole punch is indicated if end is not LLONG_MAX
|
* hole punch is indicated if end is not LLONG_MAX
|
||||||
* In the hole punch case we scan the range and release found pages.
|
* In the hole punch case we scan the range and release found pages.
|
||||||
* Only when releasing a page is the associated region/reserve map
|
* Only when releasing a page is the associated region/reserve map
|
||||||
@ -451,61 +627,17 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
|
|||||||
u32 hash = 0;
|
u32 hash = 0;
|
||||||
|
|
||||||
index = folio->index;
|
index = folio->index;
|
||||||
if (!truncate_op) {
|
hash = hugetlb_fault_mutex_hash(mapping, index);
|
||||||
/*
|
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
||||||
* Only need to hold the fault mutex in the
|
|
||||||
* hole punch case. This prevents races with
|
|
||||||
* page faults. Races are not possible in the
|
|
||||||
* case of truncation.
|
|
||||||
*/
|
|
||||||
hash = hugetlb_fault_mutex_hash(mapping, index);
|
|
||||||
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If folio is mapped, it was faulted in after being
|
* Remove folio that was part of folio_batch.
|
||||||
* unmapped in caller. Unmap (again) now after taking
|
|
||||||
* the fault mutex. The mutex will prevent faults
|
|
||||||
* until we finish removing the folio.
|
|
||||||
*
|
|
||||||
* This race can only happen in the hole punch case.
|
|
||||||
* Getting here in a truncate operation is a bug.
|
|
||||||
*/
|
*/
|
||||||
if (unlikely(folio_mapped(folio))) {
|
if (remove_inode_single_folio(h, inode, mapping, folio,
|
||||||
BUG_ON(truncate_op);
|
index, truncate_op))
|
||||||
|
freed++;
|
||||||
|
|
||||||
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
|
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
|
||||||
i_mmap_lock_write(mapping);
|
|
||||||
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
|
||||||
hugetlb_vmdelete_list(&mapping->i_mmap,
|
|
||||||
index * pages_per_huge_page(h),
|
|
||||||
(index + 1) * pages_per_huge_page(h),
|
|
||||||
ZAP_FLAG_DROP_MARKER);
|
|
||||||
i_mmap_unlock_write(mapping);
|
|
||||||
}
|
|
||||||
|
|
||||||
folio_lock(folio);
|
|
||||||
/*
|
|
||||||
* We must free the huge page and remove from page
|
|
||||||
* cache (remove_huge_page) BEFORE removing the
|
|
||||||
* region/reserve map (hugetlb_unreserve_pages). In
|
|
||||||
* rare out of memory conditions, removal of the
|
|
||||||
* region/reserve map could fail. Correspondingly,
|
|
||||||
* the subpool and global reserve usage count can need
|
|
||||||
* to be adjusted.
|
|
||||||
*/
|
|
||||||
VM_BUG_ON(HPageRestoreReserve(&folio->page));
|
|
||||||
remove_huge_page(&folio->page);
|
|
||||||
freed++;
|
|
||||||
if (!truncate_op) {
|
|
||||||
if (unlikely(hugetlb_unreserve_pages(inode,
|
|
||||||
index, index + 1, 1)))
|
|
||||||
hugetlb_fix_reserve_counts(inode);
|
|
||||||
}
|
|
||||||
|
|
||||||
folio_unlock(folio);
|
|
||||||
if (!truncate_op)
|
|
||||||
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
|
|
||||||
}
|
}
|
||||||
folio_batch_release(&fbatch);
|
folio_batch_release(&fbatch);
|
||||||
cond_resched();
|
cond_resched();
|
||||||
@ -543,8 +675,8 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
|
|||||||
BUG_ON(offset & ~huge_page_mask(h));
|
BUG_ON(offset & ~huge_page_mask(h));
|
||||||
pgoff = offset >> PAGE_SHIFT;
|
pgoff = offset >> PAGE_SHIFT;
|
||||||
|
|
||||||
i_mmap_lock_write(mapping);
|
|
||||||
i_size_write(inode, offset);
|
i_size_write(inode, offset);
|
||||||
|
i_mmap_lock_write(mapping);
|
||||||
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
|
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
|
||||||
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0,
|
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0,
|
||||||
ZAP_FLAG_DROP_MARKER);
|
ZAP_FLAG_DROP_MARKER);
|
||||||
@ -703,11 +835,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
|
|||||||
/* addr is the offset within the file (zero based) */
|
/* addr is the offset within the file (zero based) */
|
||||||
addr = index * hpage_size;
|
addr = index * hpage_size;
|
||||||
|
|
||||||
/*
|
/* mutex taken here, fault path and hole punch */
|
||||||
* fault mutex taken here, protects against fault path
|
|
||||||
* and hole punch. inode_lock previously taken protects
|
|
||||||
* against truncation.
|
|
||||||
*/
|
|
||||||
hash = hugetlb_fault_mutex_hash(mapping, index);
|
hash = hugetlb_fault_mutex_hash(mapping, index);
|
||||||
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
||||||
|
|
||||||
@ -737,7 +865,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
|
|||||||
}
|
}
|
||||||
clear_huge_page(page, addr, pages_per_huge_page(h));
|
clear_huge_page(page, addr, pages_per_huge_page(h));
|
||||||
__SetPageUptodate(page);
|
__SetPageUptodate(page);
|
||||||
error = huge_add_to_page_cache(page, mapping, index);
|
error = hugetlb_add_to_page_cache(page, mapping, index);
|
||||||
if (unlikely(error)) {
|
if (unlikely(error)) {
|
||||||
restore_reserve_on_error(h, &pseudo_vma, addr, page);
|
restore_reserve_on_error(h, &pseudo_vma, addr, page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
@ -749,7 +877,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
|
|||||||
|
|
||||||
SetHPageMigratable(page);
|
SetHPageMigratable(page);
|
||||||
/*
|
/*
|
||||||
* unlock_page because locked by huge_add_to_page_cache()
|
* unlock_page because locked by hugetlb_add_to_page_cache()
|
||||||
* put_page() due to reference from alloc_huge_page()
|
* put_page() due to reference from alloc_huge_page()
|
||||||
*/
|
*/
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
@ -994,7 +1122,7 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping,
|
|||||||
struct inode *inode = mapping->host;
|
struct inode *inode = mapping->host;
|
||||||
pgoff_t index = page->index;
|
pgoff_t index = page->index;
|
||||||
|
|
||||||
remove_huge_page(page);
|
hugetlb_delete_from_page_cache(page);
|
||||||
if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
|
if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
|
||||||
hugetlb_fix_reserve_counts(inode);
|
hugetlb_fix_reserve_counts(inode);
|
||||||
|
|
||||||
|
@ -5088,7 +5088,7 @@ int page_symlink(struct inode *inode, const char *symname, int len)
|
|||||||
const struct address_space_operations *aops = mapping->a_ops;
|
const struct address_space_operations *aops = mapping->a_ops;
|
||||||
bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS);
|
bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS);
|
||||||
struct page *page;
|
struct page *page;
|
||||||
void *fsdata;
|
void *fsdata = NULL;
|
||||||
int err;
|
int err;
|
||||||
unsigned int flags;
|
unsigned int flags;
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include <linux/user_namespace.h>
|
#include <linux/user_namespace.h>
|
||||||
#include <linux/namei.h>
|
#include <linux/namei.h>
|
||||||
#include <linux/mnt_idmapping.h>
|
#include <linux/mnt_idmapping.h>
|
||||||
|
#include <linux/iversion.h>
|
||||||
|
|
||||||
static struct posix_acl **acl_by_type(struct inode *inode, int type)
|
static struct posix_acl **acl_by_type(struct inode *inode, int type)
|
||||||
{
|
{
|
||||||
@ -1227,6 +1228,8 @@ int simple_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
|
|||||||
}
|
}
|
||||||
|
|
||||||
inode->i_ctime = current_time(inode);
|
inode->i_ctime = current_time(inode);
|
||||||
|
if (IS_I_VERSION(inode))
|
||||||
|
inode_inc_iversion(inode);
|
||||||
set_cached_acl(inode, type, acl);
|
set_cached_acl(inode, type, acl);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -2351,6 +2351,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
|
|||||||
GENRADIX(struct map_files_info) fa;
|
GENRADIX(struct map_files_info) fa;
|
||||||
struct map_files_info *p;
|
struct map_files_info *p;
|
||||||
int ret;
|
int ret;
|
||||||
|
struct vma_iterator vmi;
|
||||||
|
|
||||||
genradix_init(&fa);
|
genradix_init(&fa);
|
||||||
|
|
||||||
@ -2389,7 +2390,9 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
|
|||||||
* routine might require mmap_lock taken in might_fault().
|
* routine might require mmap_lock taken in might_fault().
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
|
pos = 2;
|
||||||
|
vma_iter_init(&vmi, mm, 0);
|
||||||
|
for_each_vma(vmi, vma) {
|
||||||
if (!vma->vm_file)
|
if (!vma->vm_file)
|
||||||
continue;
|
continue;
|
||||||
if (++pos <= ctx->pos)
|
if (++pos <= ctx->pos)
|
||||||
@ -3197,6 +3200,19 @@ static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns,
|
||||||
|
struct pid *pid, struct task_struct *task)
|
||||||
|
{
|
||||||
|
struct mm_struct *mm;
|
||||||
|
|
||||||
|
mm = get_task_mm(task);
|
||||||
|
if (mm) {
|
||||||
|
seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items);
|
||||||
|
mmput(mm);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
#endif /* CONFIG_KSM */
|
#endif /* CONFIG_KSM */
|
||||||
|
|
||||||
#ifdef CONFIG_STACKLEAK_METRICS
|
#ifdef CONFIG_STACKLEAK_METRICS
|
||||||
@ -3335,6 +3351,7 @@ static const struct pid_entry tgid_base_stuff[] = {
|
|||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_KSM
|
#ifdef CONFIG_KSM
|
||||||
ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
|
ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
|
||||||
|
ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat),
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -3672,6 +3689,7 @@ static const struct pid_entry tid_base_stuff[] = {
|
|||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_KSM
|
#ifdef CONFIG_KSM
|
||||||
ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
|
ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
|
||||||
|
ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat),
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_CPU_FREQ_TIMES
|
#ifdef CONFIG_CPU_FREQ_TIMES
|
||||||
ONE("time_in_state", 0444, proc_time_in_state_show),
|
ONE("time_in_state", 0444, proc_time_in_state_show),
|
||||||
|
@ -285,7 +285,7 @@ struct proc_maps_private {
|
|||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
#ifdef CONFIG_MMU
|
#ifdef CONFIG_MMU
|
||||||
struct vm_area_struct *tail_vma;
|
struct vma_iterator iter;
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
struct mempolicy *task_mempolicy;
|
struct mempolicy *task_mempolicy;
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#include <linux/pagewalk.h>
|
#include <linux/pagewalk.h>
|
||||||
#include <linux/vmacache.h>
|
|
||||||
#include <linux/mm_inline.h>
|
#include <linux/mm_inline.h>
|
||||||
#include <linux/hugetlb.h>
|
#include <linux/hugetlb.h>
|
||||||
#include <linux/huge_mm.h>
|
#include <linux/huge_mm.h>
|
||||||
@ -124,12 +123,26 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv,
|
||||||
|
loff_t *ppos)
|
||||||
|
{
|
||||||
|
struct vm_area_struct *vma = vma_next(&priv->iter);
|
||||||
|
|
||||||
|
if (vma) {
|
||||||
|
*ppos = vma->vm_start;
|
||||||
|
} else {
|
||||||
|
*ppos = -2UL;
|
||||||
|
vma = get_gate_vma(priv->mm);
|
||||||
|
}
|
||||||
|
|
||||||
|
return vma;
|
||||||
|
}
|
||||||
|
|
||||||
static void *m_start(struct seq_file *m, loff_t *ppos)
|
static void *m_start(struct seq_file *m, loff_t *ppos)
|
||||||
{
|
{
|
||||||
struct proc_maps_private *priv = m->private;
|
struct proc_maps_private *priv = m->private;
|
||||||
unsigned long last_addr = *ppos;
|
unsigned long last_addr = *ppos;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
struct vm_area_struct *vma;
|
|
||||||
|
|
||||||
/* See m_next(). Zero at the start or after lseek. */
|
/* See m_next(). Zero at the start or after lseek. */
|
||||||
if (last_addr == -1UL)
|
if (last_addr == -1UL)
|
||||||
@ -153,31 +166,21 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
|
|||||||
return ERR_PTR(-EINTR);
|
return ERR_PTR(-EINTR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vma_iter_init(&priv->iter, mm, last_addr);
|
||||||
hold_task_mempolicy(priv);
|
hold_task_mempolicy(priv);
|
||||||
priv->tail_vma = get_gate_vma(mm);
|
if (last_addr == -2UL)
|
||||||
|
return get_gate_vma(mm);
|
||||||
|
|
||||||
vma = find_vma(mm, last_addr);
|
return proc_get_vma(priv, ppos);
|
||||||
if (vma)
|
|
||||||
return vma;
|
|
||||||
|
|
||||||
return priv->tail_vma;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
|
static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
|
||||||
{
|
{
|
||||||
struct proc_maps_private *priv = m->private;
|
if (*ppos == -2UL) {
|
||||||
struct vm_area_struct *next, *vma = v;
|
*ppos = -1UL;
|
||||||
|
return NULL;
|
||||||
if (vma == priv->tail_vma)
|
}
|
||||||
next = NULL;
|
return proc_get_vma(m->private, ppos);
|
||||||
else if (vma->vm_next)
|
|
||||||
next = vma->vm_next;
|
|
||||||
else
|
|
||||||
next = priv->tail_vma;
|
|
||||||
|
|
||||||
*ppos = next ? next->vm_start : -1UL;
|
|
||||||
|
|
||||||
return next;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void m_stop(struct seq_file *m, void *v)
|
static void m_stop(struct seq_file *m, void *v)
|
||||||
@ -877,16 +880,16 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
|
|||||||
{
|
{
|
||||||
struct proc_maps_private *priv = m->private;
|
struct proc_maps_private *priv = m->private;
|
||||||
struct mem_size_stats mss;
|
struct mem_size_stats mss;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm = priv->mm;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
unsigned long last_vma_end = 0;
|
unsigned long vma_start = 0, last_vma_end = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
MA_STATE(mas, &mm->mm_mt, 0, 0);
|
||||||
|
|
||||||
priv->task = get_proc_task(priv->inode);
|
priv->task = get_proc_task(priv->inode);
|
||||||
if (!priv->task)
|
if (!priv->task)
|
||||||
return -ESRCH;
|
return -ESRCH;
|
||||||
|
|
||||||
mm = priv->mm;
|
|
||||||
if (!mm || !mmget_not_zero(mm)) {
|
if (!mm || !mmget_not_zero(mm)) {
|
||||||
ret = -ESRCH;
|
ret = -ESRCH;
|
||||||
goto out_put_task;
|
goto out_put_task;
|
||||||
@ -899,8 +902,13 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
|
|||||||
goto out_put_mm;
|
goto out_put_mm;
|
||||||
|
|
||||||
hold_task_mempolicy(priv);
|
hold_task_mempolicy(priv);
|
||||||
|
vma = mas_find(&mas, 0);
|
||||||
|
|
||||||
for (vma = priv->mm->mmap; vma;) {
|
if (unlikely(!vma))
|
||||||
|
goto empty_set;
|
||||||
|
|
||||||
|
vma_start = vma->vm_start;
|
||||||
|
do {
|
||||||
smap_gather_stats(vma, &mss, 0);
|
smap_gather_stats(vma, &mss, 0);
|
||||||
last_vma_end = vma->vm_end;
|
last_vma_end = vma->vm_end;
|
||||||
|
|
||||||
@ -909,6 +917,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
|
|||||||
* access it for write request.
|
* access it for write request.
|
||||||
*/
|
*/
|
||||||
if (mmap_lock_is_contended(mm)) {
|
if (mmap_lock_is_contended(mm)) {
|
||||||
|
mas_pause(&mas);
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
ret = mmap_read_lock_killable(mm);
|
ret = mmap_read_lock_killable(mm);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
@ -952,7 +961,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
|
|||||||
* contains last_vma_end.
|
* contains last_vma_end.
|
||||||
* Iterate VMA' from last_vma_end.
|
* Iterate VMA' from last_vma_end.
|
||||||
*/
|
*/
|
||||||
vma = find_vma(mm, last_vma_end - 1);
|
vma = mas_find(&mas, ULONG_MAX);
|
||||||
/* Case 3 above */
|
/* Case 3 above */
|
||||||
if (!vma)
|
if (!vma)
|
||||||
break;
|
break;
|
||||||
@ -966,11 +975,10 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
|
|||||||
smap_gather_stats(vma, &mss, last_vma_end);
|
smap_gather_stats(vma, &mss, last_vma_end);
|
||||||
}
|
}
|
||||||
/* Case 2 above */
|
/* Case 2 above */
|
||||||
vma = vma->vm_next;
|
} while ((vma = mas_find(&mas, ULONG_MAX)) != NULL);
|
||||||
}
|
|
||||||
|
|
||||||
show_vma_header_prefix(m, priv->mm->mmap->vm_start,
|
empty_set:
|
||||||
last_vma_end, 0, 0, 0, 0);
|
show_vma_header_prefix(m, vma_start, last_vma_end, 0, 0, 0, 0);
|
||||||
seq_pad(m, ' ');
|
seq_pad(m, ' ');
|
||||||
seq_puts(m, "[rollup]\n");
|
seq_puts(m, "[rollup]\n");
|
||||||
|
|
||||||
@ -1263,6 +1271,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
|||||||
return -ESRCH;
|
return -ESRCH;
|
||||||
mm = get_task_mm(task);
|
mm = get_task_mm(task);
|
||||||
if (mm) {
|
if (mm) {
|
||||||
|
MA_STATE(mas, &mm->mm_mt, 0, 0);
|
||||||
struct mmu_notifier_range range;
|
struct mmu_notifier_range range;
|
||||||
struct clear_refs_private cp = {
|
struct clear_refs_private cp = {
|
||||||
.type = type,
|
.type = type,
|
||||||
@ -1282,7 +1291,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (type == CLEAR_REFS_SOFT_DIRTY) {
|
if (type == CLEAR_REFS_SOFT_DIRTY) {
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
mas_for_each(&mas, vma, ULONG_MAX) {
|
||||||
if (!(vma->vm_flags & VM_SOFTDIRTY))
|
if (!(vma->vm_flags & VM_SOFTDIRTY))
|
||||||
continue;
|
continue;
|
||||||
vma->vm_flags &= ~VM_SOFTDIRTY;
|
vma->vm_flags &= ~VM_SOFTDIRTY;
|
||||||
@ -1294,8 +1303,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
|||||||
0, NULL, mm, 0, -1UL);
|
0, NULL, mm, 0, -1UL);
|
||||||
mmu_notifier_invalidate_range_start(&range);
|
mmu_notifier_invalidate_range_start(&range);
|
||||||
}
|
}
|
||||||
walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
|
walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
|
||||||
&cp);
|
|
||||||
if (type == CLEAR_REFS_SOFT_DIRTY) {
|
if (type == CLEAR_REFS_SOFT_DIRTY) {
|
||||||
mmu_notifier_invalidate_range_end(&range);
|
mmu_notifier_invalidate_range_end(&range);
|
||||||
flush_tlb_mm(mm);
|
flush_tlb_mm(mm);
|
||||||
|
@ -611,14 +611,16 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
|
|||||||
if (release_new_ctx) {
|
if (release_new_ctx) {
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
struct mm_struct *mm = release_new_ctx->mm;
|
struct mm_struct *mm = release_new_ctx->mm;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
|
||||||
/* the various vma->vm_userfaultfd_ctx still points to it */
|
/* the various vma->vm_userfaultfd_ctx still points to it */
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next)
|
for_each_vma(vmi, vma) {
|
||||||
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
|
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
|
||||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
||||||
vma->vm_flags &= ~__VM_UFFD_FLAGS;
|
vma->vm_flags &= ~__VM_UFFD_FLAGS;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
|
|
||||||
userfaultfd_ctx_put(release_new_ctx);
|
userfaultfd_ctx_put(release_new_ctx);
|
||||||
@ -799,11 +801,13 @@ static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int userfaultfd_unmap_prep(struct vm_area_struct *vma,
|
int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
|
||||||
unsigned long start, unsigned long end,
|
unsigned long end, struct list_head *unmaps)
|
||||||
struct list_head *unmaps)
|
|
||||||
{
|
{
|
||||||
for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
|
VMA_ITERATOR(vmi, mm, start);
|
||||||
|
struct vm_area_struct *vma;
|
||||||
|
|
||||||
|
for_each_vma_range(vmi, vma, end) {
|
||||||
struct userfaultfd_unmap_ctx *unmap_ctx;
|
struct userfaultfd_unmap_ctx *unmap_ctx;
|
||||||
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
|
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
|
||||||
|
|
||||||
@ -853,6 +857,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
|||||||
/* len == 0 means wake all */
|
/* len == 0 means wake all */
|
||||||
struct userfaultfd_wake_range range = { .len = 0, };
|
struct userfaultfd_wake_range range = { .len = 0, };
|
||||||
unsigned long new_flags;
|
unsigned long new_flags;
|
||||||
|
MA_STATE(mas, &mm->mm_mt, 0, 0);
|
||||||
|
|
||||||
WRITE_ONCE(ctx->released, true);
|
WRITE_ONCE(ctx->released, true);
|
||||||
|
|
||||||
@ -869,7 +874,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
|||||||
*/
|
*/
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
prev = NULL;
|
prev = NULL;
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
mas_for_each(&mas, vma, ULONG_MAX) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
|
BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
|
||||||
!!(vma->vm_flags & __VM_UFFD_FLAGS));
|
!!(vma->vm_flags & __VM_UFFD_FLAGS));
|
||||||
@ -883,10 +888,13 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
|||||||
vma->vm_file, vma->vm_pgoff,
|
vma->vm_file, vma->vm_pgoff,
|
||||||
vma_policy(vma),
|
vma_policy(vma),
|
||||||
NULL_VM_UFFD_CTX, anon_vma_name(vma));
|
NULL_VM_UFFD_CTX, anon_vma_name(vma));
|
||||||
if (prev)
|
if (prev) {
|
||||||
|
mas_pause(&mas);
|
||||||
vma = prev;
|
vma = prev;
|
||||||
else
|
} else {
|
||||||
prev = vma;
|
prev = vma;
|
||||||
|
}
|
||||||
|
|
||||||
vma->vm_flags = new_flags;
|
vma->vm_flags = new_flags;
|
||||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
||||||
}
|
}
|
||||||
@ -1268,6 +1276,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
bool found;
|
bool found;
|
||||||
bool basic_ioctls;
|
bool basic_ioctls;
|
||||||
unsigned long start, end, vma_end;
|
unsigned long start, end, vma_end;
|
||||||
|
MA_STATE(mas, &mm->mm_mt, 0, 0);
|
||||||
|
|
||||||
user_uffdio_register = (struct uffdio_register __user *) arg;
|
user_uffdio_register = (struct uffdio_register __user *) arg;
|
||||||
|
|
||||||
@ -1310,7 +1319,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
vma = find_vma_prev(mm, start, &prev);
|
mas_set(&mas, start);
|
||||||
|
vma = mas_find(&mas, ULONG_MAX);
|
||||||
if (!vma)
|
if (!vma)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
@ -1335,7 +1345,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
*/
|
*/
|
||||||
found = false;
|
found = false;
|
||||||
basic_ioctls = false;
|
basic_ioctls = false;
|
||||||
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
|
for (cur = vma; cur; cur = mas_next(&mas, end - 1)) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
|
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
|
||||||
@ -1395,8 +1405,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
}
|
}
|
||||||
BUG_ON(!found);
|
BUG_ON(!found);
|
||||||
|
|
||||||
if (vma->vm_start < start)
|
mas_set(&mas, start);
|
||||||
prev = vma;
|
prev = mas_prev(&mas, 0);
|
||||||
|
if (prev != vma)
|
||||||
|
mas_next(&mas, ULONG_MAX);
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
do {
|
do {
|
||||||
@ -1426,6 +1438,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
((struct vm_userfaultfd_ctx){ ctx }),
|
((struct vm_userfaultfd_ctx){ ctx }),
|
||||||
anon_vma_name(vma));
|
anon_vma_name(vma));
|
||||||
if (prev) {
|
if (prev) {
|
||||||
|
/* vma_merge() invalidated the mas */
|
||||||
|
mas_pause(&mas);
|
||||||
vma = prev;
|
vma = prev;
|
||||||
goto next;
|
goto next;
|
||||||
}
|
}
|
||||||
@ -1433,11 +1447,15 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
ret = split_vma(mm, vma, start, 1);
|
ret = split_vma(mm, vma, start, 1);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
/* split_vma() invalidated the mas */
|
||||||
|
mas_pause(&mas);
|
||||||
}
|
}
|
||||||
if (vma->vm_end > end) {
|
if (vma->vm_end > end) {
|
||||||
ret = split_vma(mm, vma, end, 0);
|
ret = split_vma(mm, vma, end, 0);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
/* split_vma() invalidated the mas */
|
||||||
|
mas_pause(&mas);
|
||||||
}
|
}
|
||||||
next:
|
next:
|
||||||
/*
|
/*
|
||||||
@ -1454,8 +1472,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
skip:
|
skip:
|
||||||
prev = vma;
|
prev = vma;
|
||||||
start = vma->vm_end;
|
start = vma->vm_end;
|
||||||
vma = vma->vm_next;
|
vma = mas_next(&mas, end - 1);
|
||||||
} while (vma && vma->vm_start < end);
|
} while (vma);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
mmput(mm);
|
mmput(mm);
|
||||||
@ -1499,6 +1517,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
bool found;
|
bool found;
|
||||||
unsigned long start, end, vma_end;
|
unsigned long start, end, vma_end;
|
||||||
const void __user *buf = (void __user *)arg;
|
const void __user *buf = (void __user *)arg;
|
||||||
|
MA_STATE(mas, &mm->mm_mt, 0, 0);
|
||||||
|
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
|
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
|
||||||
@ -1517,7 +1536,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
vma = find_vma_prev(mm, start, &prev);
|
mas_set(&mas, start);
|
||||||
|
vma = mas_find(&mas, ULONG_MAX);
|
||||||
if (!vma)
|
if (!vma)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
@ -1542,7 +1562,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
*/
|
*/
|
||||||
found = false;
|
found = false;
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
|
for (cur = vma; cur; cur = mas_next(&mas, end - 1)) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
|
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
|
||||||
@ -1562,8 +1582,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
}
|
}
|
||||||
BUG_ON(!found);
|
BUG_ON(!found);
|
||||||
|
|
||||||
if (vma->vm_start < start)
|
mas_set(&mas, start);
|
||||||
prev = vma;
|
prev = mas_prev(&mas, 0);
|
||||||
|
if (prev != vma)
|
||||||
|
mas_next(&mas, ULONG_MAX);
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
do {
|
do {
|
||||||
@ -1632,8 +1654,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
skip:
|
skip:
|
||||||
prev = vma;
|
prev = vma;
|
||||||
start = vma->vm_end;
|
start = vma->vm_end;
|
||||||
vma = vma->vm_next;
|
vma = mas_next(&mas, end - 1);
|
||||||
} while (vma && vma->vm_start < end);
|
} while (vma);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
mmput(mm);
|
mmput(mm);
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
#ifndef _ASM_GENERIC_CACHEFLUSH_H
|
#ifndef _ASM_GENERIC_CACHEFLUSH_H
|
||||||
#define _ASM_GENERIC_CACHEFLUSH_H
|
#define _ASM_GENERIC_CACHEFLUSH_H
|
||||||
|
|
||||||
|
#include <linux/instrumented.h>
|
||||||
|
|
||||||
struct mm_struct;
|
struct mm_struct;
|
||||||
struct vm_area_struct;
|
struct vm_area_struct;
|
||||||
struct page;
|
struct page;
|
||||||
@ -105,14 +107,22 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
|
|||||||
#ifndef copy_to_user_page
|
#ifndef copy_to_user_page
|
||||||
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
|
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
|
||||||
do { \
|
do { \
|
||||||
|
instrument_copy_to_user((void __user *)dst, src, len); \
|
||||||
memcpy(dst, src, len); \
|
memcpy(dst, src, len); \
|
||||||
flush_icache_user_page(vma, page, vaddr, len); \
|
flush_icache_user_page(vma, page, vaddr, len); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifndef copy_from_user_page
|
#ifndef copy_from_user_page
|
||||||
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
|
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
|
||||||
memcpy(dst, src, len)
|
do { \
|
||||||
|
instrument_copy_from_user_before(dst, (void __user *)src, \
|
||||||
|
len); \
|
||||||
|
memcpy(dst, src, len); \
|
||||||
|
instrument_copy_from_user_after(dst, (void __user *)src, len, \
|
||||||
|
0); \
|
||||||
|
} while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* _ASM_GENERIC_CACHEFLUSH_H */
|
#endif /* _ASM_GENERIC_CACHEFLUSH_H */
|
||||||
|
@ -85,4 +85,17 @@
|
|||||||
#define cache_line_size() L1_CACHE_BYTES
|
#define cache_line_size() L1_CACHE_BYTES
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helper to add padding within a struct to ensure data fall into separate
|
||||||
|
* cachelines.
|
||||||
|
*/
|
||||||
|
#if defined(CONFIG_SMP)
|
||||||
|
struct cacheline_padding {
|
||||||
|
char x[0];
|
||||||
|
} ____cacheline_internodealigned_in_smp;
|
||||||
|
#define CACHELINE_PADDING(name) struct cacheline_padding name
|
||||||
|
#else
|
||||||
|
#define CACHELINE_PADDING(name)
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* __LINUX_CACHE_H */
|
#endif /* __LINUX_CACHE_H */
|
||||||
|
@ -51,6 +51,29 @@
|
|||||||
#define __no_sanitize_undefined
|
#define __no_sanitize_undefined
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if __has_feature(memory_sanitizer)
|
||||||
|
#define __SANITIZE_MEMORY__
|
||||||
|
/*
|
||||||
|
* Unlike other sanitizers, KMSAN still inserts code into functions marked with
|
||||||
|
* no_sanitize("kernel-memory"). Using disable_sanitizer_instrumentation
|
||||||
|
* provides the behavior consistent with other __no_sanitize_ attributes,
|
||||||
|
* guaranteeing that __no_sanitize_memory functions remain uninstrumented.
|
||||||
|
*/
|
||||||
|
#define __no_sanitize_memory __disable_sanitizer_instrumentation
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The __no_kmsan_checks attribute ensures that a function does not produce
|
||||||
|
* false positive reports by:
|
||||||
|
* - initializing all local variables and memory stores in this function;
|
||||||
|
* - skipping all shadow checks;
|
||||||
|
* - passing initialized arguments to this function's callees.
|
||||||
|
*/
|
||||||
|
#define __no_kmsan_checks __attribute__((no_sanitize("kernel-memory")))
|
||||||
|
#else
|
||||||
|
#define __no_sanitize_memory
|
||||||
|
#define __no_kmsan_checks
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Support for __has_feature(coverage_sanitizer) was added in Clang 13 together
|
* Support for __has_feature(coverage_sanitizer) was added in Clang 13 together
|
||||||
* with no_sanitize("coverage"). Prior versions of Clang support coverage
|
* with no_sanitize("coverage"). Prior versions of Clang support coverage
|
||||||
|
@ -114,6 +114,12 @@
|
|||||||
#define __SANITIZE_ADDRESS__
|
#define __SANITIZE_ADDRESS__
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GCC does not support KMSAN.
|
||||||
|
*/
|
||||||
|
#define __no_sanitize_memory
|
||||||
|
#define __no_kmsan_checks
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Turn individual warnings and errors on and off locally, depending
|
* Turn individual warnings and errors on and off locally, depending
|
||||||
* on version.
|
* on version.
|
||||||
|
@ -233,7 +233,8 @@ struct ftrace_likely_data {
|
|||||||
/* Section for code which can't be instrumented at all */
|
/* Section for code which can't be instrumented at all */
|
||||||
#define noinstr \
|
#define noinstr \
|
||||||
noinline notrace __attribute((__section__(".noinstr.text"))) \
|
noinline notrace __attribute((__section__(".noinstr.text"))) \
|
||||||
__no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage
|
__no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \
|
||||||
|
__no_sanitize_memory
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
|
|
||||||
|
@ -216,13 +216,26 @@ struct damos_stat {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct damos - Represents a Data Access Monitoring-based Operation Scheme.
|
* struct damos_access_pattern - Target access pattern of the given scheme.
|
||||||
* @min_sz_region: Minimum size of target regions.
|
* @min_sz_region: Minimum size of target regions.
|
||||||
* @max_sz_region: Maximum size of target regions.
|
* @max_sz_region: Maximum size of target regions.
|
||||||
* @min_nr_accesses: Minimum ``->nr_accesses`` of target regions.
|
* @min_nr_accesses: Minimum ``->nr_accesses`` of target regions.
|
||||||
* @max_nr_accesses: Maximum ``->nr_accesses`` of target regions.
|
* @max_nr_accesses: Maximum ``->nr_accesses`` of target regions.
|
||||||
* @min_age_region: Minimum age of target regions.
|
* @min_age_region: Minimum age of target regions.
|
||||||
* @max_age_region: Maximum age of target regions.
|
* @max_age_region: Maximum age of target regions.
|
||||||
|
*/
|
||||||
|
struct damos_access_pattern {
|
||||||
|
unsigned long min_sz_region;
|
||||||
|
unsigned long max_sz_region;
|
||||||
|
unsigned int min_nr_accesses;
|
||||||
|
unsigned int max_nr_accesses;
|
||||||
|
unsigned int min_age_region;
|
||||||
|
unsigned int max_age_region;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct damos - Represents a Data Access Monitoring-based Operation Scheme.
|
||||||
|
* @pattern: Access pattern of target regions.
|
||||||
* @action: &damo_action to be applied to the target regions.
|
* @action: &damo_action to be applied to the target regions.
|
||||||
* @quota: Control the aggressiveness of this scheme.
|
* @quota: Control the aggressiveness of this scheme.
|
||||||
* @wmarks: Watermarks for automated (in)activation of this scheme.
|
* @wmarks: Watermarks for automated (in)activation of this scheme.
|
||||||
@ -230,10 +243,8 @@ struct damos_stat {
|
|||||||
* @list: List head for siblings.
|
* @list: List head for siblings.
|
||||||
*
|
*
|
||||||
* For each aggregation interval, DAMON finds regions which fit in the
|
* For each aggregation interval, DAMON finds regions which fit in the
|
||||||
* condition (&min_sz_region, &max_sz_region, &min_nr_accesses,
|
* &pattern and applies &action to those. To avoid consuming too much
|
||||||
* &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to
|
* CPU time or IO resources for the &action, "a is used.
|
||||||
* those. To avoid consuming too much CPU time or IO resources for the
|
|
||||||
* &action, "a is used.
|
|
||||||
*
|
*
|
||||||
* To do the work only when needed, schemes can be activated for specific
|
* To do the work only when needed, schemes can be activated for specific
|
||||||
* system situations using &wmarks. If all schemes that registered to the
|
* system situations using &wmarks. If all schemes that registered to the
|
||||||
@ -248,12 +259,7 @@ struct damos_stat {
|
|||||||
* &action is applied.
|
* &action is applied.
|
||||||
*/
|
*/
|
||||||
struct damos {
|
struct damos {
|
||||||
unsigned long min_sz_region;
|
struct damos_access_pattern pattern;
|
||||||
unsigned long max_sz_region;
|
|
||||||
unsigned int min_nr_accesses;
|
|
||||||
unsigned int max_nr_accesses;
|
|
||||||
unsigned int min_age_region;
|
|
||||||
unsigned int max_age_region;
|
|
||||||
enum damos_action action;
|
enum damos_action action;
|
||||||
struct damos_quota quota;
|
struct damos_quota quota;
|
||||||
struct damos_watermarks wmarks;
|
struct damos_watermarks wmarks;
|
||||||
@ -340,7 +346,7 @@ struct damon_operations {
|
|||||||
unsigned long (*apply_scheme)(struct damon_ctx *context,
|
unsigned long (*apply_scheme)(struct damon_ctx *context,
|
||||||
struct damon_target *t, struct damon_region *r,
|
struct damon_target *t, struct damon_region *r,
|
||||||
struct damos *scheme);
|
struct damos *scheme);
|
||||||
bool (*target_valid)(void *target);
|
bool (*target_valid)(struct damon_target *t);
|
||||||
void (*cleanup)(struct damon_ctx *context);
|
void (*cleanup)(struct damon_ctx *context);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -383,13 +389,15 @@ struct damon_callback {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct damon_ctx - Represents a context for each monitoring. This is the
|
* struct damon_attrs - Monitoring attributes for accuracy/overhead control.
|
||||||
* main interface that allows users to set the attributes and get the results
|
|
||||||
* of the monitoring.
|
|
||||||
*
|
*
|
||||||
* @sample_interval: The time between access samplings.
|
* @sample_interval: The time between access samplings.
|
||||||
* @aggr_interval: The time between monitor results aggregations.
|
* @aggr_interval: The time between monitor results aggregations.
|
||||||
* @ops_update_interval: The time between monitoring operations updates.
|
* @ops_update_interval: The time between monitoring operations updates.
|
||||||
|
* @min_nr_regions: The minimum number of adaptive monitoring
|
||||||
|
* regions.
|
||||||
|
* @max_nr_regions: The maximum number of adaptive monitoring
|
||||||
|
* regions.
|
||||||
*
|
*
|
||||||
* For each @sample_interval, DAMON checks whether each region is accessed or
|
* For each @sample_interval, DAMON checks whether each region is accessed or
|
||||||
* not. It aggregates and keeps the access information (number of accesses to
|
* not. It aggregates and keeps the access information (number of accesses to
|
||||||
@ -399,7 +407,21 @@ struct damon_callback {
|
|||||||
* @ops_update_interval. All time intervals are in micro-seconds.
|
* @ops_update_interval. All time intervals are in micro-seconds.
|
||||||
* Please refer to &struct damon_operations and &struct damon_callback for more
|
* Please refer to &struct damon_operations and &struct damon_callback for more
|
||||||
* detail.
|
* detail.
|
||||||
|
*/
|
||||||
|
struct damon_attrs {
|
||||||
|
unsigned long sample_interval;
|
||||||
|
unsigned long aggr_interval;
|
||||||
|
unsigned long ops_update_interval;
|
||||||
|
unsigned long min_nr_regions;
|
||||||
|
unsigned long max_nr_regions;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct damon_ctx - Represents a context for each monitoring. This is the
|
||||||
|
* main interface that allows users to set the attributes and get the results
|
||||||
|
* of the monitoring.
|
||||||
*
|
*
|
||||||
|
* @attrs: Monitoring attributes for accuracy/overhead control.
|
||||||
* @kdamond: Kernel thread who does the monitoring.
|
* @kdamond: Kernel thread who does the monitoring.
|
||||||
* @kdamond_lock: Mutex for the synchronizations with @kdamond.
|
* @kdamond_lock: Mutex for the synchronizations with @kdamond.
|
||||||
*
|
*
|
||||||
@ -421,15 +443,11 @@ struct damon_callback {
|
|||||||
* @ops: Set of monitoring operations for given use cases.
|
* @ops: Set of monitoring operations for given use cases.
|
||||||
* @callback: Set of callbacks for monitoring events notifications.
|
* @callback: Set of callbacks for monitoring events notifications.
|
||||||
*
|
*
|
||||||
* @min_nr_regions: The minimum number of adaptive monitoring regions.
|
|
||||||
* @max_nr_regions: The maximum number of adaptive monitoring regions.
|
|
||||||
* @adaptive_targets: Head of monitoring targets (&damon_target) list.
|
* @adaptive_targets: Head of monitoring targets (&damon_target) list.
|
||||||
* @schemes: Head of schemes (&damos) list.
|
* @schemes: Head of schemes (&damos) list.
|
||||||
*/
|
*/
|
||||||
struct damon_ctx {
|
struct damon_ctx {
|
||||||
unsigned long sample_interval;
|
struct damon_attrs attrs;
|
||||||
unsigned long aggr_interval;
|
|
||||||
unsigned long ops_update_interval;
|
|
||||||
|
|
||||||
/* private: internal use only */
|
/* private: internal use only */
|
||||||
struct timespec64 last_aggregation;
|
struct timespec64 last_aggregation;
|
||||||
@ -442,8 +460,6 @@ struct damon_ctx {
|
|||||||
struct damon_operations ops;
|
struct damon_operations ops;
|
||||||
struct damon_callback callback;
|
struct damon_callback callback;
|
||||||
|
|
||||||
unsigned long min_nr_regions;
|
|
||||||
unsigned long max_nr_regions;
|
|
||||||
struct list_head adaptive_targets;
|
struct list_head adaptive_targets;
|
||||||
struct list_head schemes;
|
struct list_head schemes;
|
||||||
};
|
};
|
||||||
@ -463,9 +479,17 @@ static inline struct damon_region *damon_last_region(struct damon_target *t)
|
|||||||
return list_last_entry(&t->regions_list, struct damon_region, list);
|
return list_last_entry(&t->regions_list, struct damon_region, list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct damon_region *damon_first_region(struct damon_target *t)
|
||||||
|
{
|
||||||
|
return list_first_entry(&t->regions_list, struct damon_region, list);
|
||||||
|
}
|
||||||
|
|
||||||
#define damon_for_each_region(r, t) \
|
#define damon_for_each_region(r, t) \
|
||||||
list_for_each_entry(r, &t->regions_list, list)
|
list_for_each_entry(r, &t->regions_list, list)
|
||||||
|
|
||||||
|
#define damon_for_each_region_from(r, t) \
|
||||||
|
list_for_each_entry_from(r, &t->regions_list, list)
|
||||||
|
|
||||||
#define damon_for_each_region_safe(r, next, t) \
|
#define damon_for_each_region_safe(r, next, t) \
|
||||||
list_for_each_entry_safe(r, next, &t->regions_list, list)
|
list_for_each_entry_safe(r, next, &t->regions_list, list)
|
||||||
|
|
||||||
@ -501,12 +525,9 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t);
|
|||||||
int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
|
int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
|
||||||
unsigned int nr_ranges);
|
unsigned int nr_ranges);
|
||||||
|
|
||||||
struct damos *damon_new_scheme(
|
struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
|
||||||
unsigned long min_sz_region, unsigned long max_sz_region,
|
enum damos_action action, struct damos_quota *quota,
|
||||||
unsigned int min_nr_accesses, unsigned int max_nr_accesses,
|
struct damos_watermarks *wmarks);
|
||||||
unsigned int min_age_region, unsigned int max_age_region,
|
|
||||||
enum damos_action action, struct damos_quota *quota,
|
|
||||||
struct damos_watermarks *wmarks);
|
|
||||||
void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
|
void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
|
||||||
void damon_destroy_scheme(struct damos *s);
|
void damon_destroy_scheme(struct damos *s);
|
||||||
|
|
||||||
@ -519,10 +540,8 @@ unsigned int damon_nr_regions(struct damon_target *t);
|
|||||||
|
|
||||||
struct damon_ctx *damon_new_ctx(void);
|
struct damon_ctx *damon_new_ctx(void);
|
||||||
void damon_destroy_ctx(struct damon_ctx *ctx);
|
void damon_destroy_ctx(struct damon_ctx *ctx);
|
||||||
int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
|
int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs);
|
||||||
unsigned long aggr_int, unsigned long ops_upd_int,
|
void damon_set_schemes(struct damon_ctx *ctx,
|
||||||
unsigned long min_nr_reg, unsigned long max_nr_reg);
|
|
||||||
int damon_set_schemes(struct damon_ctx *ctx,
|
|
||||||
struct damos **schemes, ssize_t nr_schemes);
|
struct damos **schemes, ssize_t nr_schemes);
|
||||||
int damon_nr_running_ctxs(void);
|
int damon_nr_running_ctxs(void);
|
||||||
bool damon_is_registered_ops(enum damon_ops_id id);
|
bool damon_is_registered_ops(enum damon_ops_id id);
|
||||||
@ -538,6 +557,9 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx)
|
|||||||
int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
|
int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
|
||||||
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
|
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
|
||||||
|
|
||||||
|
int damon_set_region_biggest_system_ram_default(struct damon_target *t,
|
||||||
|
unsigned long *start, unsigned long *end);
|
||||||
|
|
||||||
#endif /* CONFIG_DAMON */
|
#endif /* CONFIG_DAMON */
|
||||||
|
|
||||||
#endif /* _DAMON_H */
|
#endif /* _DAMON_H */
|
||||||
|
@ -328,8 +328,10 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size,
|
|||||||
* __struct_size() vs __member_size() must be captured here to avoid
|
* __struct_size() vs __member_size() must be captured here to avoid
|
||||||
* evaluating argument side-effects further into the macro layers.
|
* evaluating argument side-effects further into the macro layers.
|
||||||
*/
|
*/
|
||||||
|
#ifndef CONFIG_KMSAN
|
||||||
#define memset(p, c, s) __fortify_memset_chk(p, c, s, \
|
#define memset(p, c, s) __fortify_memset_chk(p, c, s, \
|
||||||
__struct_size(p), __member_size(p))
|
__struct_size(p), __member_size(p))
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To make sure the compiler can enforce protection against buffer overflows,
|
* To make sure the compiler can enforce protection against buffer overflows,
|
||||||
|
@ -36,29 +36,6 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
|
|||||||
return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
|
return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* gfpflags_normal_context - is gfp_flags a normal sleepable context?
|
|
||||||
* @gfp_flags: gfp_flags to test
|
|
||||||
*
|
|
||||||
* Test whether @gfp_flags indicates that the allocation is from the
|
|
||||||
* %current context and allowed to sleep.
|
|
||||||
*
|
|
||||||
* An allocation being allowed to block doesn't mean it owns the %current
|
|
||||||
* context. When direct reclaim path tries to allocate memory, the
|
|
||||||
* allocation context is nested inside whatever %current was doing at the
|
|
||||||
* time of the original allocation. The nested allocation may be allowed
|
|
||||||
* to block but modifying anything %current owns can corrupt the outer
|
|
||||||
* context's expectations.
|
|
||||||
*
|
|
||||||
* %true result from this function indicates that the allocation context
|
|
||||||
* can sleep and use anything that's associated with %current.
|
|
||||||
*/
|
|
||||||
static inline bool gfpflags_normal_context(const gfp_t gfp_flags)
|
|
||||||
{
|
|
||||||
return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) ==
|
|
||||||
__GFP_DIRECT_RECLAIM;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_HIGHMEM
|
#ifdef CONFIG_HIGHMEM
|
||||||
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
|
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
|
||||||
#else
|
#else
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/bug.h>
|
#include <linux/bug.h>
|
||||||
#include <linux/cacheflush.h>
|
#include <linux/cacheflush.h>
|
||||||
|
#include <linux/kmsan.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/hardirq.h>
|
#include <linux/hardirq.h>
|
||||||
@ -311,6 +312,7 @@ static inline void copy_user_highpage(struct page *to, struct page *from,
|
|||||||
vfrom = kmap_local_page(from);
|
vfrom = kmap_local_page(from);
|
||||||
vto = kmap_local_page(to);
|
vto = kmap_local_page(to);
|
||||||
copy_user_page(vto, vfrom, vaddr, to);
|
copy_user_page(vto, vfrom, vaddr, to);
|
||||||
|
kmsan_unpoison_memory(page_address(to), PAGE_SIZE);
|
||||||
kunmap_local(vto);
|
kunmap_local(vto);
|
||||||
kunmap_local(vfrom);
|
kunmap_local(vfrom);
|
||||||
}
|
}
|
||||||
@ -326,6 +328,7 @@ static inline void copy_highpage(struct page *to, struct page *from)
|
|||||||
vfrom = kmap_local_page(from);
|
vfrom = kmap_local_page(from);
|
||||||
vto = kmap_local_page(to);
|
vto = kmap_local_page(to);
|
||||||
copy_page(vto, vfrom);
|
copy_page(vto, vfrom);
|
||||||
|
kmsan_copy_page_meta(to, from);
|
||||||
kunmap_local(vto);
|
kunmap_local(vto);
|
||||||
kunmap_local(vfrom);
|
kunmap_local(vfrom);
|
||||||
}
|
}
|
||||||
|
@ -444,6 +444,11 @@ static inline int split_folio_to_list(struct folio *folio,
|
|||||||
return split_huge_page_to_list(&folio->page, list);
|
return split_huge_page_to_list(&folio->page, list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int split_folio(struct folio *folio)
|
||||||
|
{
|
||||||
|
return split_folio_to_list(folio, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
|
* archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
|
||||||
* limitations in the implementation like arm64 MTE can override this to
|
* limitations in the implementation like arm64 MTE can override this to
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
struct ctl_table;
|
struct ctl_table;
|
||||||
struct user_struct;
|
struct user_struct;
|
||||||
struct mmu_gather;
|
struct mmu_gather;
|
||||||
|
struct node;
|
||||||
|
|
||||||
#ifndef CONFIG_ARCH_HAS_HUGEPD
|
#ifndef CONFIG_ARCH_HAS_HUGEPD
|
||||||
typedef struct { unsigned long pd; } hugepd_t;
|
typedef struct { unsigned long pd; } hugepd_t;
|
||||||
@ -114,6 +115,12 @@ struct file_region {
|
|||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct hugetlb_vma_lock {
|
||||||
|
struct kref refs;
|
||||||
|
struct rw_semaphore rw_sema;
|
||||||
|
struct vm_area_struct *vma;
|
||||||
|
};
|
||||||
|
|
||||||
extern struct resv_map *resv_map_alloc(void);
|
extern struct resv_map *resv_map_alloc(void);
|
||||||
void resv_map_release(struct kref *ref);
|
void resv_map_release(struct kref *ref);
|
||||||
|
|
||||||
@ -126,7 +133,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
|
|||||||
long min_hpages);
|
long min_hpages);
|
||||||
void hugepage_put_subpool(struct hugepage_subpool *spool);
|
void hugepage_put_subpool(struct hugepage_subpool *spool);
|
||||||
|
|
||||||
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
|
void hugetlb_dup_vma_private(struct vm_area_struct *vma);
|
||||||
void clear_vma_resv_huge_pages(struct vm_area_struct *vma);
|
void clear_vma_resv_huge_pages(struct vm_area_struct *vma);
|
||||||
int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *);
|
int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *);
|
||||||
int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *,
|
int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *,
|
||||||
@ -214,6 +221,14 @@ struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
|||||||
struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
|
struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
|
||||||
pgd_t *pgd, int flags);
|
pgd_t *pgd, int flags);
|
||||||
|
|
||||||
|
void hugetlb_vma_lock_read(struct vm_area_struct *vma);
|
||||||
|
void hugetlb_vma_unlock_read(struct vm_area_struct *vma);
|
||||||
|
void hugetlb_vma_lock_write(struct vm_area_struct *vma);
|
||||||
|
void hugetlb_vma_unlock_write(struct vm_area_struct *vma);
|
||||||
|
int hugetlb_vma_trylock_write(struct vm_area_struct *vma);
|
||||||
|
void hugetlb_vma_assert_locked(struct vm_area_struct *vma);
|
||||||
|
void hugetlb_vma_lock_release(struct kref *kref);
|
||||||
|
|
||||||
int pmd_huge(pmd_t pmd);
|
int pmd_huge(pmd_t pmd);
|
||||||
int pud_huge(pud_t pud);
|
int pud_huge(pud_t pud);
|
||||||
unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
|
unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
|
||||||
@ -225,7 +240,7 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
|
|||||||
|
|
||||||
#else /* !CONFIG_HUGETLB_PAGE */
|
#else /* !CONFIG_HUGETLB_PAGE */
|
||||||
|
|
||||||
static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
|
static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -336,6 +351,31 @@ static inline int prepare_hugepage_range(struct file *file,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
static inline int pmd_huge(pmd_t pmd)
|
static inline int pmd_huge(pmd_t pmd)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
@ -665,7 +705,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
|
|||||||
nodemask_t *nmask, gfp_t gfp_mask);
|
nodemask_t *nmask, gfp_t gfp_mask);
|
||||||
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
|
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
|
||||||
unsigned long address);
|
unsigned long address);
|
||||||
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
|
int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping,
|
||||||
pgoff_t idx);
|
pgoff_t idx);
|
||||||
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
|
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
|
||||||
unsigned long address, struct page *page);
|
unsigned long address, struct page *page);
|
||||||
@ -935,6 +975,11 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
void hugetlb_register_node(struct node *node);
|
||||||
|
void hugetlb_unregister_node(struct node *node);
|
||||||
|
#endif
|
||||||
|
|
||||||
#else /* CONFIG_HUGETLB_PAGE */
|
#else /* CONFIG_HUGETLB_PAGE */
|
||||||
struct hstate {};
|
struct hstate {};
|
||||||
|
|
||||||
@ -1109,6 +1154,14 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
|||||||
pte_t *ptep, pte_t pte)
|
pte_t *ptep, pte_t pte)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void hugetlb_register_node(struct node *node)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hugetlb_unregister_node(struct node *node)
|
||||||
|
{
|
||||||
|
}
|
||||||
#endif /* CONFIG_HUGETLB_PAGE */
|
#endif /* CONFIG_HUGETLB_PAGE */
|
||||||
|
|
||||||
static inline spinlock_t *huge_pte_lock(struct hstate *h,
|
static inline spinlock_t *huge_pte_lock(struct hstate *h,
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* This header provides generic wrappers for memory access instrumentation that
|
* This header provides generic wrappers for memory access instrumentation that
|
||||||
* the compiler cannot emit for: KASAN, KCSAN.
|
* the compiler cannot emit for: KASAN, KCSAN, KMSAN.
|
||||||
*/
|
*/
|
||||||
#ifndef _LINUX_INSTRUMENTED_H
|
#ifndef _LINUX_INSTRUMENTED_H
|
||||||
#define _LINUX_INSTRUMENTED_H
|
#define _LINUX_INSTRUMENTED_H
|
||||||
@ -10,6 +10,7 @@
|
|||||||
#include <linux/compiler.h>
|
#include <linux/compiler.h>
|
||||||
#include <linux/kasan-checks.h>
|
#include <linux/kasan-checks.h>
|
||||||
#include <linux/kcsan-checks.h>
|
#include <linux/kcsan-checks.h>
|
||||||
|
#include <linux/kmsan-checks.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -117,10 +118,11 @@ instrument_copy_to_user(void __user *to, const void *from, unsigned long n)
|
|||||||
{
|
{
|
||||||
kasan_check_read(from, n);
|
kasan_check_read(from, n);
|
||||||
kcsan_check_read(from, n);
|
kcsan_check_read(from, n);
|
||||||
|
kmsan_copy_to_user(to, from, n, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* instrument_copy_from_user - instrument writes of copy_from_user
|
* instrument_copy_from_user_before - add instrumentation before copy_from_user
|
||||||
*
|
*
|
||||||
* Instrument writes to kernel memory, that are due to copy_from_user (and
|
* Instrument writes to kernel memory, that are due to copy_from_user (and
|
||||||
* variants). The instrumentation should be inserted before the accesses.
|
* variants). The instrumentation should be inserted before the accesses.
|
||||||
@ -130,10 +132,61 @@ instrument_copy_to_user(void __user *to, const void *from, unsigned long n)
|
|||||||
* @n number of bytes to copy
|
* @n number of bytes to copy
|
||||||
*/
|
*/
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
instrument_copy_from_user(const void *to, const void __user *from, unsigned long n)
|
instrument_copy_from_user_before(const void *to, const void __user *from, unsigned long n)
|
||||||
{
|
{
|
||||||
kasan_check_write(to, n);
|
kasan_check_write(to, n);
|
||||||
kcsan_check_write(to, n);
|
kcsan_check_write(to, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* instrument_copy_from_user_after - add instrumentation after copy_from_user
|
||||||
|
*
|
||||||
|
* Instrument writes to kernel memory, that are due to copy_from_user (and
|
||||||
|
* variants). The instrumentation should be inserted after the accesses.
|
||||||
|
*
|
||||||
|
* @to destination address
|
||||||
|
* @from source address
|
||||||
|
* @n number of bytes to copy
|
||||||
|
* @left number of bytes not copied (as returned by copy_from_user)
|
||||||
|
*/
|
||||||
|
static __always_inline void
|
||||||
|
instrument_copy_from_user_after(const void *to, const void __user *from,
|
||||||
|
unsigned long n, unsigned long left)
|
||||||
|
{
|
||||||
|
kmsan_unpoison_memory(to, n - left);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* instrument_get_user() - add instrumentation to get_user()-like macros
|
||||||
|
*
|
||||||
|
* get_user() and friends are fragile, so it may depend on the implementation
|
||||||
|
* whether the instrumentation happens before or after the data is copied from
|
||||||
|
* the userspace.
|
||||||
|
*
|
||||||
|
* @to destination variable, may not be address-taken
|
||||||
|
*/
|
||||||
|
#define instrument_get_user(to) \
|
||||||
|
({ \
|
||||||
|
u64 __tmp = (u64)(to); \
|
||||||
|
kmsan_unpoison_memory(&__tmp, sizeof(__tmp)); \
|
||||||
|
to = __tmp; \
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* instrument_put_user() - add instrumentation to put_user()-like macros
|
||||||
|
*
|
||||||
|
* put_user() and friends are fragile, so it may depend on the implementation
|
||||||
|
* whether the instrumentation happens before or after the data is copied from
|
||||||
|
* the userspace.
|
||||||
|
*
|
||||||
|
* @from source address
|
||||||
|
* @ptr userspace pointer to copy to
|
||||||
|
* @size number of bytes to copy
|
||||||
|
*/
|
||||||
|
#define instrument_put_user(from, ptr, size) \
|
||||||
|
({ \
|
||||||
|
kmsan_copy_to_user(ptr, &from, sizeof(from), 0); \
|
||||||
|
})
|
||||||
|
|
||||||
#endif /* _LINUX_INSTRUMENTED_H */
|
#endif /* _LINUX_INSTRUMENTED_H */
|
||||||
|
@ -98,19 +98,13 @@ static inline bool kasan_has_integrated_init(void)
|
|||||||
#ifdef CONFIG_KASAN
|
#ifdef CONFIG_KASAN
|
||||||
|
|
||||||
struct kasan_cache {
|
struct kasan_cache {
|
||||||
|
#ifdef CONFIG_KASAN_GENERIC
|
||||||
int alloc_meta_offset;
|
int alloc_meta_offset;
|
||||||
int free_meta_offset;
|
int free_meta_offset;
|
||||||
|
#endif
|
||||||
bool is_kmalloc;
|
bool is_kmalloc;
|
||||||
};
|
};
|
||||||
|
|
||||||
slab_flags_t __kasan_never_merge(void);
|
|
||||||
static __always_inline slab_flags_t kasan_never_merge(void)
|
|
||||||
{
|
|
||||||
if (kasan_enabled())
|
|
||||||
return __kasan_never_merge();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void __kasan_unpoison_range(const void *addr, size_t size);
|
void __kasan_unpoison_range(const void *addr, size_t size);
|
||||||
static __always_inline void kasan_unpoison_range(const void *addr, size_t size)
|
static __always_inline void kasan_unpoison_range(const void *addr, size_t size)
|
||||||
{
|
{
|
||||||
@ -134,15 +128,6 @@ static __always_inline void kasan_unpoison_pages(struct page *page,
|
|||||||
__kasan_unpoison_pages(page, order, init);
|
__kasan_unpoison_pages(page, order, init);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
|
|
||||||
slab_flags_t *flags);
|
|
||||||
static __always_inline void kasan_cache_create(struct kmem_cache *cache,
|
|
||||||
unsigned int *size, slab_flags_t *flags)
|
|
||||||
{
|
|
||||||
if (kasan_enabled())
|
|
||||||
__kasan_cache_create(cache, size, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __kasan_cache_create_kmalloc(struct kmem_cache *cache);
|
void __kasan_cache_create_kmalloc(struct kmem_cache *cache);
|
||||||
static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache)
|
static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache)
|
||||||
{
|
{
|
||||||
@ -150,14 +135,6 @@ static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache)
|
|||||||
__kasan_cache_create_kmalloc(cache);
|
__kasan_cache_create_kmalloc(cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t __kasan_metadata_size(struct kmem_cache *cache);
|
|
||||||
static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
|
|
||||||
{
|
|
||||||
if (kasan_enabled())
|
|
||||||
return __kasan_metadata_size(cache);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void __kasan_poison_slab(struct slab *slab);
|
void __kasan_poison_slab(struct slab *slab);
|
||||||
static __always_inline void kasan_poison_slab(struct slab *slab)
|
static __always_inline void kasan_poison_slab(struct slab *slab)
|
||||||
{
|
{
|
||||||
@ -269,20 +246,12 @@ static __always_inline bool kasan_check_byte(const void *addr)
|
|||||||
|
|
||||||
#else /* CONFIG_KASAN */
|
#else /* CONFIG_KASAN */
|
||||||
|
|
||||||
static inline slab_flags_t kasan_never_merge(void)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
static inline void kasan_unpoison_range(const void *address, size_t size) {}
|
static inline void kasan_unpoison_range(const void *address, size_t size) {}
|
||||||
static inline void kasan_poison_pages(struct page *page, unsigned int order,
|
static inline void kasan_poison_pages(struct page *page, unsigned int order,
|
||||||
bool init) {}
|
bool init) {}
|
||||||
static inline void kasan_unpoison_pages(struct page *page, unsigned int order,
|
static inline void kasan_unpoison_pages(struct page *page, unsigned int order,
|
||||||
bool init) {}
|
bool init) {}
|
||||||
static inline void kasan_cache_create(struct kmem_cache *cache,
|
|
||||||
unsigned int *size,
|
|
||||||
slab_flags_t *flags) {}
|
|
||||||
static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
|
static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
|
||||||
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
|
|
||||||
static inline void kasan_poison_slab(struct slab *slab) {}
|
static inline void kasan_poison_slab(struct slab *slab) {}
|
||||||
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
|
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
|
||||||
void *object) {}
|
void *object) {}
|
||||||
@ -333,6 +302,11 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
|
|||||||
|
|
||||||
#ifdef CONFIG_KASAN_GENERIC
|
#ifdef CONFIG_KASAN_GENERIC
|
||||||
|
|
||||||
|
size_t kasan_metadata_size(struct kmem_cache *cache);
|
||||||
|
slab_flags_t kasan_never_merge(void);
|
||||||
|
void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
|
||||||
|
slab_flags_t *flags);
|
||||||
|
|
||||||
void kasan_cache_shrink(struct kmem_cache *cache);
|
void kasan_cache_shrink(struct kmem_cache *cache);
|
||||||
void kasan_cache_shutdown(struct kmem_cache *cache);
|
void kasan_cache_shutdown(struct kmem_cache *cache);
|
||||||
void kasan_record_aux_stack(void *ptr);
|
void kasan_record_aux_stack(void *ptr);
|
||||||
@ -340,6 +314,21 @@ void kasan_record_aux_stack_noalloc(void *ptr);
|
|||||||
|
|
||||||
#else /* CONFIG_KASAN_GENERIC */
|
#else /* CONFIG_KASAN_GENERIC */
|
||||||
|
|
||||||
|
/* Tag-based KASAN modes do not use per-object metadata. */
|
||||||
|
static inline size_t kasan_metadata_size(struct kmem_cache *cache)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/* And thus nothing prevents cache merging. */
|
||||||
|
static inline slab_flags_t kasan_never_merge(void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/* And no cache-related metadata initialization is required. */
|
||||||
|
static inline void kasan_cache_create(struct kmem_cache *cache,
|
||||||
|
unsigned int *size,
|
||||||
|
slab_flags_t *flags) {}
|
||||||
|
|
||||||
static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
|
static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
|
||||||
static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
|
static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
|
||||||
static inline void kasan_record_aux_stack(void *ptr) {}
|
static inline void kasan_record_aux_stack(void *ptr) {}
|
||||||
|
@ -16,11 +16,13 @@ extern void khugepaged_enter_vma(struct vm_area_struct *vma,
|
|||||||
unsigned long vm_flags);
|
unsigned long vm_flags);
|
||||||
extern void khugepaged_min_free_kbytes_update(void);
|
extern void khugepaged_min_free_kbytes_update(void);
|
||||||
#ifdef CONFIG_SHMEM
|
#ifdef CONFIG_SHMEM
|
||||||
extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr);
|
extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
|
||||||
|
bool install_pmd);
|
||||||
#else
|
#else
|
||||||
static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
|
static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
|
||||||
unsigned long addr)
|
unsigned long addr, bool install_pmd)
|
||||||
{
|
{
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -46,9 +48,10 @@ static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
|
|||||||
unsigned long vm_flags)
|
unsigned long vm_flags)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
|
static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
|
||||||
unsigned long addr)
|
unsigned long addr, bool install_pmd)
|
||||||
{
|
{
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void khugepaged_min_free_kbytes_update(void)
|
static inline void khugepaged_min_free_kbytes_update(void)
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user