Merge b34133fec8 ("Merge tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip") into android-mainline

Steps on the way to 5.9-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: Ib901b7670a0313d91a57bce86c9587a25020cc3d
2020-08-06 20:25:54 +02:00 · 2020-08-06 20:25:54 +02:00 · d332fba061
commit d332fba061
parent 98777a7eb7 b34133fec8
584 changed files with 12936 additions and 23257 deletions
--- a/Documentation/ABI/testing/sysfs-devices-mapping
+++ b/Documentation/ABI/testing/sysfs-devices-mapping
@ -0,0 +1,33 @@
+What:           /sys/devices/uncore_iio_x/dieX
+Date:           February 2020
+Contact:        Roman Sudarikov <roman.sudarikov@linux.intel.com>
+Description:
+                Each IIO stack (PCIe root port) has its own IIO PMON block, so
+                each dieX file (where X is die number) holds "Segment:Root Bus"
+                for PCIe root port, which can be monitored by that IIO PMON
+                block.
+                For example, on 4-die Xeon platform with up to 6 IIO stacks per
+                die and, therefore, 6 IIO PMON blocks per die, the mapping of
+                IIO PMON block 0 exposes as the following:
+
+                $ ls /sys/devices/uncore_iio_0/die*
+                -r--r--r-- /sys/devices/uncore_iio_0/die0
+                -r--r--r-- /sys/devices/uncore_iio_0/die1
+                -r--r--r-- /sys/devices/uncore_iio_0/die2
+                -r--r--r-- /sys/devices/uncore_iio_0/die3
+
+                $ tail /sys/devices/uncore_iio_0/die*
+                ==> /sys/devices/uncore_iio_0/die0 <==
+                0000:00
+                ==> /sys/devices/uncore_iio_0/die1 <==
+                0000:40
+                ==> /sys/devices/uncore_iio_0/die2 <==
+                0000:80
+                ==> /sys/devices/uncore_iio_0/die3 <==
+                0000:c0
+
+                Which means:
+                IIO PMU 0 on die 0 belongs to PCI RP on bus 0x00, domain 0x0000
+                IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
+                IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
+                IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@ -463,7 +463,7 @@ again without disrupting RCU readers.
 This guarantee was only partially premeditated. DYNIX/ptx used an
 explicit memory barrier for publication, but had nothing resembling
 ``rcu_dereference()`` for subscription, nor did it have anything
-resembling the ``smp_read_barrier_depends()`` that was later subsumed
+resembling the dependency-ordering barrier that was later subsumed
 into ``rcu_dereference()`` and later still into ``READ_ONCE()``. The
 need for these operations made itself known quite suddenly at a
 late-1990s meeting with the DEC Alpha architects, back in the days when
@ -2583,7 +2583,12 @@ not work to have these markers in the trampoline itself, because there
 would need to be instructions following ``rcu_read_unlock()``. Although
 ``synchronize_rcu()`` would guarantee that execution reached the
 ``rcu_read_unlock()``, it would not be able to guarantee that execution
-had completely left the trampoline.
+had completely left the trampoline. Worse yet, in some situations
+the trampoline's protection must extend a few instructions *prior* to
+execution reaching the trampoline.  For example, these few instructions
+might calculate the address of the trampoline, so that entering the
+trampoline would be pre-ordained a surprisingly long time before execution
+actually reached the trampoline itself.

 The solution, in the form of `Tasks
 RCU <https://lwn.net/Articles/607117/>`__, is to have implicit read-side
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@ -1,4 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
 Review Checklist for RCU Patches
+================================


 This document contains a checklist for producing and reviewing patches
@ -411,18 +415,21 @@ over a rather long period of time, but improvements are always welcome!
 	__rcu sparse checks to validate your RCU code.	These can help
 	find problems as follows:

-	CONFIG_PROVE_LOCKING: check that accesses to RCU-protected data
+	CONFIG_PROVE_LOCKING:
+		check that accesses to RCU-protected data
 		structures are carried out under the proper RCU
 		read-side critical section, while holding the right
 		combination of locks, or whatever other conditions
 		are appropriate.

-	CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the
+	CONFIG_DEBUG_OBJECTS_RCU_HEAD:
+		check that you don't pass the
 		same object to call_rcu() (or friends) before an RCU
 		grace period has elapsed since the last time that you
 		passed that same object to call_rcu() (or friends).

-	__rcu sparse checks: tag the pointer to the RCU-protected data
+	__rcu sparse checks:
+		tag the pointer to the RCU-protected data
 		structure with __rcu, and sparse will warn you if you
 		access that pointer without the services of one of the
 		variants of rcu_dereference().
@ -442,8 +449,8 @@ over a rather long period of time, but improvements are always welcome!

 	You instead need to use one of the barrier functions:

-	o	call_rcu() -> rcu_barrier()
-	o	call_srcu() -> srcu_barrier()
+	-	call_rcu() -> rcu_barrier()
+	-	call_srcu() -> srcu_barrier()

 	However, these barrier functions are absolutely -not- guaranteed
 	to wait for a grace period.  In fact, if there are no call_rcu()
--- a/Documentation/RCU/index.rst
+++ b/Documentation/RCU/index.rst
@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 .. _rcu_concepts:

 ============
@ -8,10 +10,17 @@ RCU concepts
   :maxdepth: 3

   arrayRCU
+   checklist
+   lockdep
+   lockdep-splat
   rcubarrier
   rcu_dereference
   whatisRCU
   rcu
+   rculist_nulls
+   rcuref
+   torture
+   stallwarn
   listRCU
   NMI-RCU
   UP
--- a/Documentation/RCU/lockdep-splat.rst
+++ b/Documentation/RCU/lockdep-splat.rst
@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Lockdep-RCU Splat
+=================
+
 Lockdep-RCU was added to the Linux kernel in early 2010
 (http://lwn.net/Articles/371986/).  This facility checks for some common
 misuses of the RCU API, most notably using one of the rcu_dereference()
@ -12,55 +18,54 @@ overwriting or worse.  There can of course be false positives, this
 being the real world and all that.

 So let's look at an example RCU lockdep splat from 3.0-rc5, one that
-has long since been fixed:
+has long since been fixed::

-=============================
-WARNING: suspicious RCU usage
-----------------------------
-block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!
+    =============================
+    WARNING: suspicious RCU usage
+    -----------------------------
+    block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!

-other info that might help us debug this:
+other info that might help us debug this::

+    rcu_scheduler_active = 1, debug_locks = 0
+    3 locks held by scsi_scan_6/1552:
+    #0:  (&shost->scan_mutex){+.+.}, at: [<ffffffff8145efca>]
+    scsi_scan_host_selected+0x5a/0x150
+    #1:  (&eq->sysfs_lock){+.+.}, at: [<ffffffff812a5032>]
+    elevator_exit+0x22/0x60
+    #2:  (&(&q->__queue_lock)->rlock){-.-.}, at: [<ffffffff812b6233>]
+    cfq_exit_queue+0x43/0x190

-rcu_scheduler_active = 1, debug_locks = 0
-3 locks held by scsi_scan_6/1552:
- #0:  (&shost->scan_mutex){+.+.}, at: [<ffffffff8145efca>]
-scsi_scan_host_selected+0x5a/0x150
- #1:  (&eq->sysfs_lock){+.+.}, at: [<ffffffff812a5032>]
-elevator_exit+0x22/0x60
- #2:  (&(&q->__queue_lock)->rlock){-.-.}, at: [<ffffffff812b6233>]
-cfq_exit_queue+0x43/0x190
+    stack backtrace:
+    Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17
+    Call Trace:
+    [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0
+    [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120
+    [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190
+    [<ffffffff812a5046>] elevator_exit+0x36/0x60
+    [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60
+    [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10
+    [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0
+    [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10
+    [<ffffffff817da069>] ? error_exit+0x29/0xb0
+    [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80
+    [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680
+    [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
+    [<ffffffff817da069>] ? error_exit+0x29/0xb0
+    [<ffffffff812bcc60>] ? kobject_del+0x40/0x40
+    [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0
+    [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150
+    [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90
+    [<ffffffff8145f170>] do_scan_async+0x20/0x160
+    [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90
+    [<ffffffff810975b6>] kthread+0xa6/0xb0
+    [<ffffffff817db154>] kernel_thread_helper+0x4/0x10
+    [<ffffffff81066430>] ? finish_task_switch+0x80/0x110
+    [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe
+    [<ffffffff81097510>] ? __kthread_init_worker+0x70/0x70
+    [<ffffffff817db150>] ? gs_change+0xb/0xb

-stack backtrace:
-Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17
-Call Trace:
- [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0
- [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120
- [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190
- [<ffffffff812a5046>] elevator_exit+0x36/0x60
- [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60
- [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10
- [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0
- [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10
- [<ffffffff817da069>] ? error_exit+0x29/0xb0
- [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80
- [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680
- [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
- [<ffffffff817da069>] ? error_exit+0x29/0xb0
- [<ffffffff812bcc60>] ? kobject_del+0x40/0x40
- [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0
- [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150
- [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90
- [<ffffffff8145f170>] do_scan_async+0x20/0x160
- [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90
- [<ffffffff810975b6>] kthread+0xa6/0xb0
- [<ffffffff817db154>] kernel_thread_helper+0x4/0x10
- [<ffffffff81066430>] ? finish_task_switch+0x80/0x110
- [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe
- [<ffffffff81097510>] ? __kthread_init_worker+0x70/0x70
- [<ffffffff817db150>] ? gs_change+0xb/0xb
-
-Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows:
+Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows::

 	if (rcu_dereference(ioc->ioc_data) == cic) {

@ -70,7 +75,7 @@ case.  Instead, we hold three locks, one of which might be RCU related.
 And maybe that lock really does protect this reference.  If so, the fix
 is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to
 take the struct request_queue "q" from cfq_exit_queue() as an argument,
-which would permit us to invoke rcu_dereference_protected as follows:
+which would permit us to invoke rcu_dereference_protected as follows::

 	if (rcu_dereference_protected(ioc->ioc_data,
 				      lockdep_is_held(&q->queue_lock)) == cic) {
@ -85,7 +90,7 @@ On the other hand, perhaps we really do need an RCU read-side critical
 section.  In this case, the critical section must span the use of the
 return value from rcu_dereference(), or at least until there is some
 reference count incremented or some such.  One way to handle this is to
-add rcu_read_lock() and rcu_read_unlock() as follows:
+add rcu_read_lock() and rcu_read_unlock() as follows::

 	rcu_read_lock();
 	if (rcu_dereference(ioc->ioc_data) == cic) {
@ -102,7 +107,7 @@ above lockdep-RCU splat.
 But in this particular case, we don't actually dereference the pointer
 returned from rcu_dereference().  Instead, that pointer is just compared
 to the cic pointer, which means that the rcu_dereference() can be replaced
-by rcu_access_pointer() as follows:
+by rcu_access_pointer() as follows::

 	if (rcu_access_pointer(ioc->ioc_data) == cic) {

--- a/Documentation/RCU/lockdep.rst
+++ b/Documentation/RCU/lockdep.rst
@ -1,4 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
 RCU and lockdep checking
+========================

 All flavors of RCU have lockdep checking available, so that lockdep is
 aware of when each task enters and leaves any flavor of RCU read-side
@ -8,7 +12,7 @@ tracking to include RCU state, which can sometimes help when debugging
 deadlocks and the like.

 In addition, RCU provides the following primitives that check lockdep's
-state:
+state::

 	rcu_read_lock_held() for normal RCU.
 	rcu_read_lock_bh_held() for RCU-bh.
@ -63,7 +67,7 @@ checking of rcu_dereference() primitives:
 The rcu_dereference_check() check expression can be any boolean
 expression, but would normally include a lockdep expression.  However,
 any boolean expression can be used.  For a moderately ornate example,
-consider the following:
+consider the following::

 	file = rcu_dereference_check(fdt->fd[fd],
 				     lockdep_is_held(&files->file_lock) ||
@ -82,7 +86,7 @@ RCU read-side critical sections, in case (2) the ->file_lock prevents
 any change from taking place, and finally, in case (3) the current task
 is the only task accessing the file_struct, again preventing any change
 from taking place.  If the above statement was invoked only from updater
-code, it could instead be written as follows:
+code, it could instead be written as follows::

 	file = rcu_dereference_protected(fdt->fd[fd],
 					 lockdep_is_held(&files->file_lock) ||
@ -105,7 +109,7 @@ false and they are called from outside any RCU read-side critical section.

 For example, the workqueue for_each_pwq() macro is intended to be used
 either within an RCU read-side critical section or with wq->mutex held.
-It is thus implemented as follows:
+It is thus implemented as follows::

 	#define for_each_pwq(pwq, wq)
 		list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,
--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@ -0,0 +1,200 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================================
+Using RCU hlist_nulls to protect list and objects
+=================================================
+
+This section describes how to use hlist_nulls to
+protect read-mostly linked lists and
+objects using SLAB_TYPESAFE_BY_RCU allocations.
+
+Please read the basics in Documentation/RCU/listRCU.rst
+
+Using 'nulls'
+=============
+
+Using special makers (called 'nulls') is a convenient way
+to solve following problem :
+
+A typical RCU linked list managing objects which are
+allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
+use following algos :
+
+1) Lookup algo
+--------------
+
+::
+
+  rcu_read_lock()
+  begin:
+  obj = lockless_lookup(key);
+  if (obj) {
+    if (!try_get_ref(obj)) // might fail for free objects
+      goto begin;
+    /*
+    * Because a writer could delete object, and a writer could
+    * reuse these object before the RCU grace period, we
+    * must check key after getting the reference on object
+    */
+    if (obj->key != key) { // not the object we expected
+      put_ref(obj);
+      goto begin;
+    }
+  }
+  rcu_read_unlock();
+
+Beware that lockless_lookup(key) cannot use traditional hlist_for_each_entry_rcu()
+but a version with an additional memory barrier (smp_rmb())
+
+::
+
+  lockless_lookup(key)
+  {
+    struct hlist_node *node, *next;
+    for (pos = rcu_dereference((head)->first);
+        pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
+        ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+        pos = rcu_dereference(next))
+      if (obj->key == key)
+        return obj;
+    return NULL;
+  }
+
+And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
+
+  struct hlist_node *node;
+  for (pos = rcu_dereference((head)->first);
+        pos && ({ prefetch(pos->next); 1; }) &&
+        ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+        pos = rcu_dereference(pos->next))
+   if (obj->key == key)
+     return obj;
+  return NULL;
+
+Quoting Corey Minyard::
+
+  "If the object is moved from one list to another list in-between the
+  time the hash is calculated and the next field is accessed, and the
+  object has moved to the end of a new list, the traversal will not
+  complete properly on the list it should have, since the object will
+  be on the end of the new list and there's not a way to tell it's on a
+  new list and restart the list traversal. I think that this can be
+  solved by pre-fetching the "next" field (with proper barriers) before
+  checking the key."
+
+2) Insert algo
+--------------
+
+We need to make sure a reader cannot read the new 'obj->obj_next' value
+and previous value of 'obj->key'. Or else, an item could be deleted
+from a chain, and inserted into another chain. If new chain was empty
+before the move, 'next' pointer is NULL, and lockless reader can
+not detect it missed following items in original chain.
+
+::
+
+  /*
+  * Please note that new inserts are done at the head of list,
+  * not in the middle or end.
+  */
+  obj = kmem_cache_alloc(...);
+  lock_chain(); // typically a spin_lock()
+  obj->key = key;
+  /*
+  * we need to make sure obj->key is updated before obj->next
+  * or obj->refcnt
+  */
+  smp_wmb();
+  atomic_set(&obj->refcnt, 1);
+  hlist_add_head_rcu(&obj->obj_node, list);
+  unlock_chain(); // typically a spin_unlock()
+
+
+3) Remove algo
+--------------
+Nothing special here, we can use a standard RCU hlist deletion.
+But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
+very very fast (before the end of RCU grace period)
+
+::
+
+  if (put_last_reference_on(obj) {
+    lock_chain(); // typically a spin_lock()
+    hlist_del_init_rcu(&obj->obj_node);
+    unlock_chain(); // typically a spin_unlock()
+    kmem_cache_free(cachep, obj);
+  }
+
+
+
+--------------------------------------------------------------------------
+
+Avoiding extra smp_rmb()
+========================
+
+With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
+and extra smp_wmb() in insert function.
+
+For example, if we choose to store the slot number as the 'nulls'
+end-of-list marker for each slot of the hash table, we can detect
+a race (some writer did a delete and/or a move of an object
+to another chain) checking the final 'nulls' value if
+the lookup met the end of chain. If final 'nulls' value
+is not the slot number, then we must restart the lookup at
+the beginning. If the object was moved to the same chain,
+then the reader doesn't care : It might eventually
+scan the list again without harm.
+
+
+1) lookup algo
+--------------
+
+::
+
+  head = &table[slot];
+  rcu_read_lock();
+  begin:
+  hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
+    if (obj->key == key) {
+      if (!try_get_ref(obj)) // might fail for free objects
+        goto begin;
+      if (obj->key != key) { // not the object we expected
+        put_ref(obj);
+        goto begin;
+      }
+    goto out;
+  }
+  /*
+  * if the nulls value we got at the end of this lookup is
+  * not the expected one, we must restart lookup.
+  * We probably met an item that was moved to another chain.
+  */
+  if (get_nulls_value(node) != slot)
+  goto begin;
+  obj = NULL;
+
+  out:
+  rcu_read_unlock();
+
+2) Insert function
+------------------
+
+::
+
+  /*
+  * Please note that new inserts are done at the head of list,
+  * not in the middle or end.
+  */
+  obj = kmem_cache_alloc(cachep);
+  lock_chain(); // typically a spin_lock()
+  obj->key = key;
+  /*
+  * changes to obj->key must be visible before refcnt one
+  */
+  smp_wmb();
+  atomic_set(&obj->refcnt, 1);
+  /*
+  * insert obj in RCU way (readers might be traversing chain)
+  */
+  hlist_nulls_add_head_rcu(&obj->obj_node, list);
+  unlock_chain(); // typically a spin_unlock()
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@ -1,172 +0,0 @@
-Using hlist_nulls to protect read-mostly linked lists and
-objects using SLAB_TYPESAFE_BY_RCU allocations.
-
-Please read the basics in Documentation/RCU/listRCU.rst
-
-Using special makers (called 'nulls') is a convenient way
-to solve following problem :
-
-A typical RCU linked list managing objects which are
-allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
-use following algos :
-
-1) Lookup algo
--------------
-rcu_read_lock()
-begin:
-obj = lockless_lookup(key);
-if (obj) {
-  if (!try_get_ref(obj)) // might fail for free objects
-    goto begin;
-  /*
-   * Because a writer could delete object, and a writer could
-   * reuse these object before the RCU grace period, we
-   * must check key after getting the reference on object
-   */
-  if (obj->key != key) { // not the object we expected
-     put_ref(obj);
-     goto begin;
-   }
-}
-rcu_read_unlock();
-
-Beware that lockless_lookup(key) cannot use traditional hlist_for_each_entry_rcu()
-but a version with an additional memory barrier (smp_rmb())
-
-lockless_lookup(key)
-{
-   struct hlist_node *node, *next;
-   for (pos = rcu_dereference((head)->first);
-          pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
-          ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
-          pos = rcu_dereference(next))
-      if (obj->key == key)
-         return obj;
-   return NULL;
-
-And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb() :
-
-   struct hlist_node *node;
-   for (pos = rcu_dereference((head)->first);
-		pos && ({ prefetch(pos->next); 1; }) &&
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
-		pos = rcu_dereference(pos->next))
-      if (obj->key == key)
-         return obj;
-   return NULL;
-}
-
-Quoting Corey Minyard :
-
-"If the object is moved from one list to another list in-between the
- time the hash is calculated and the next field is accessed, and the
- object has moved to the end of a new list, the traversal will not
- complete properly on the list it should have, since the object will
- be on the end of the new list and there's not a way to tell it's on a
- new list and restart the list traversal.  I think that this can be
- solved by pre-fetching the "next" field (with proper barriers) before
- checking the key."
-
-2) Insert algo :
----------------
-
-We need to make sure a reader cannot read the new 'obj->obj_next' value
-and previous value of 'obj->key'. Or else, an item could be deleted
-from a chain, and inserted into another chain. If new chain was empty
-before the move, 'next' pointer is NULL, and lockless reader can
-not detect it missed following items in original chain.
-
-/*
- * Please note that new inserts are done at the head of list,
- * not in the middle or end.
- */
-obj = kmem_cache_alloc(...);
-lock_chain(); // typically a spin_lock()
-obj->key = key;
-/*
- * we need to make sure obj->key is updated before obj->next
- * or obj->refcnt
- */
-smp_wmb();
-atomic_set(&obj->refcnt, 1);
-hlist_add_head_rcu(&obj->obj_node, list);
-unlock_chain(); // typically a spin_unlock()
-
-
-3) Remove algo
--------------
-Nothing special here, we can use a standard RCU hlist deletion.
-But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
-very very fast (before the end of RCU grace period)
-
-if (put_last_reference_on(obj) {
-   lock_chain(); // typically a spin_lock()
-   hlist_del_init_rcu(&obj->obj_node);
-   unlock_chain(); // typically a spin_unlock()
-   kmem_cache_free(cachep, obj);
-}
-
-
-
--------------------------------------------------------------------------
-With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
-and extra smp_wmb() in insert function.
-
-For example, if we choose to store the slot number as the 'nulls'
-end-of-list marker for each slot of the hash table, we can detect
-a race (some writer did a delete and/or a move of an object
-to another chain) checking the final 'nulls' value if
-the lookup met the end of chain. If final 'nulls' value
-is not the slot number, then we must restart the lookup at
-the beginning. If the object was moved to the same chain,
-then the reader doesn't care : It might eventually
-scan the list again without harm.
-
-
-1) lookup algo
-
- head = &table[slot];
- rcu_read_lock();
-begin:
- hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
-   if (obj->key == key) {
-      if (!try_get_ref(obj)) // might fail for free objects
-         goto begin;
-      if (obj->key != key) { // not the object we expected
-         put_ref(obj);
-         goto begin;
-      }
-  goto out;
- }
-/*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
-   goto begin;
- obj = NULL;
-
-out:
- rcu_read_unlock();
-
-2) Insert function :
--------------------
-
-/*
- * Please note that new inserts are done at the head of list,
- * not in the middle or end.
- */
-obj = kmem_cache_alloc(cachep);
-lock_chain(); // typically a spin_lock()
-obj->key = key;
-/*
- * changes to obj->key must be visible before refcnt one
- */
-smp_wmb();
-atomic_set(&obj->refcnt, 1);
-/*
- * insert obj in RCU way (readers might be traversing chain)
- */
-hlist_nulls_add_head_rcu(&obj->obj_node, list);
-unlock_chain(); // typically a spin_unlock()
--- a/Documentation/RCU/rcuref.rst
+++ b/Documentation/RCU/rcuref.rst
@ -1,4 +1,8 @@
-Reference-count design for elements of lists/arrays protected by RCU.
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================================================
+Reference-count design for elements of lists/arrays protected by RCU
+====================================================================


 Please note that the percpu-ref feature is likely your first
@ -12,32 +16,33 @@ please read on.
 Reference counting on elements of lists which are protected by traditional
 reader/writer spinlocks or semaphores are straightforward:

-CODE LISTING A:
-1.				2.
-add()				search_and_reference()
-{				{
-    alloc_object		    read_lock(&list_lock);
-    ...				    search_for_element
-    atomic_set(&el->rc, 1);	    atomic_inc(&el->rc);
-    write_lock(&list_lock);	     ...
-    add_element			    read_unlock(&list_lock);
-    ...				    ...
-    write_unlock(&list_lock);	}
-}
+CODE LISTING A::

-3.					4.
-release_referenced()			delete()
-{					{
-    ...					    write_lock(&list_lock);
-    if(atomic_dec_and_test(&el->rc))	    ...
-	kfree(el);
-    ...					    remove_element
-}					    write_unlock(&list_lock);
- 					    ...
-					    if (atomic_dec_and_test(&el->rc))
-					        kfree(el);
-					    ...
-					}
+    1.					    2.
+    add()				    search_and_reference()
+    {					    {
+	alloc_object				read_lock(&list_lock);
+	...					search_for_element
+	atomic_set(&el->rc, 1);			atomic_inc(&el->rc);
+	write_lock(&list_lock);			 ...
+	add_element				read_unlock(&list_lock);
+	...					...
+	write_unlock(&list_lock);	   }
+    }
+
+    3.					    4.
+    release_referenced()		    delete()
+    {					    {
+	...					write_lock(&list_lock);
+	if(atomic_dec_and_test(&el->rc))	...
+	    kfree(el);
+	...					remove_element
+    }						write_unlock(&list_lock);
+						...
+						if (atomic_dec_and_test(&el->rc))
+						    kfree(el);
+						...
+					    }

 If this list/array is made lock free using RCU as in changing the
 write_lock() in add() and delete() to spin_lock() and changing read_lock()
@ -46,34 +51,35 @@ search_and_reference() could potentially hold reference to an element which
 has already been deleted from the list/array.  Use atomic_inc_not_zero()
 in this scenario as follows:

-CODE LISTING B:
-1.					2.
-add()					search_and_reference()
-{					{
-    alloc_object			    rcu_read_lock();
-    ...					    search_for_element
-    atomic_set(&el->rc, 1);		    if (!atomic_inc_not_zero(&el->rc)) {
-    spin_lock(&list_lock);		        rcu_read_unlock();
-					        return FAIL;
-    add_element				    }
-    ...					    ...
-    spin_unlock(&list_lock);		    rcu_read_unlock();
-}					}
-3.					4.
-release_referenced()			delete()
-{					{
-    ...					    spin_lock(&list_lock);
-    if (atomic_dec_and_test(&el->rc))       ...
-        call_rcu(&el->head, el_free);       remove_element
-    ...                                     spin_unlock(&list_lock);
-} 					    ...
-					    if (atomic_dec_and_test(&el->rc))
-					        call_rcu(&el->head, el_free);
-					    ...
-					}
+CODE LISTING B::
+
+    1.					    2.
+    add()				    search_and_reference()
+    {					    {
+	alloc_object				rcu_read_lock();
+	...					search_for_element
+	atomic_set(&el->rc, 1);			if (!atomic_inc_not_zero(&el->rc)) {
+	spin_lock(&list_lock);			    rcu_read_unlock();
+						    return FAIL;
+	add_element				}
+	...					...
+	spin_unlock(&list_lock);		rcu_read_unlock();
+    }					    }
+    3.					    4.
+    release_referenced()		    delete()
+    {					    {
+	...					spin_lock(&list_lock);
+	if (atomic_dec_and_test(&el->rc))	...
+	    call_rcu(&el->head, el_free);	remove_element
+	...					spin_unlock(&list_lock);
+    }						...
+						if (atomic_dec_and_test(&el->rc))
+						    call_rcu(&el->head, el_free);
+						...
+					    }

 Sometimes, a reference to the element needs to be obtained in the
-update (write) stream.  In such cases, atomic_inc_not_zero() might be
+update (write) stream.	In such cases, atomic_inc_not_zero() might be
 overkill, since we hold the update-side spinlock.  One might instead
 use atomic_inc() in such cases.

@ -82,39 +88,40 @@ search_and_reference() code path.  In such cases, the
 atomic_dec_and_test() may be moved from delete() to el_free()
 as follows:

-CODE LISTING C:
-1.					2.
-add()					search_and_reference()
-{					{
-    alloc_object			    rcu_read_lock();
-    ...					    search_for_element
-    atomic_set(&el->rc, 1);		    atomic_inc(&el->rc);
-    spin_lock(&list_lock);		    ...
+CODE LISTING C::

-    add_element				    rcu_read_unlock();
-    ...					}
-    spin_unlock(&list_lock);		4.
-}					delete()
-3.					{
-release_referenced()			    spin_lock(&list_lock);
-{					    ...
-    ...					    remove_element
-    if (atomic_dec_and_test(&el->rc))       spin_unlock(&list_lock);
-        kfree(el);			    ...
-    ...                                     call_rcu(&el->head, el_free);
-} 					    ...
-5.					}
-void el_free(struct rcu_head *rhp)
-{
-    release_referenced();
-}
+    1.					    2.
+    add()				    search_and_reference()
+    {					    {
+	alloc_object				rcu_read_lock();
+	...					search_for_element
+	atomic_set(&el->rc, 1);			atomic_inc(&el->rc);
+	spin_lock(&list_lock);			...
+
+	add_element				rcu_read_unlock();
+	...				    }
+	spin_unlock(&list_lock);	    4.
+    }					    delete()
+    3.					    {
+    release_referenced()			spin_lock(&list_lock);
+    {						...
+	...					remove_element
+	if (atomic_dec_and_test(&el->rc))	spin_unlock(&list_lock);
+	    kfree(el);				...
+	...					call_rcu(&el->head, el_free);
+    }						...
+    5.					    }
+    void el_free(struct rcu_head *rhp)
+    {
+	release_referenced();
+    }

 The key point is that the initial reference added by add() is not removed
 until after a grace period has elapsed following removal.  This means that
 search_and_reference() cannot find this element, which means that the value
 of el->rc cannot increase.  Thus, once it reaches zero, there are no
-readers that can or ever will be able to reference the element.  The
-element can therefore safely be freed.  This in turn guarantees that if
+readers that can or ever will be able to reference the element.	 The
+element can therefore safely be freed.	This in turn guarantees that if
 any reader finds the element, that reader may safely acquire a reference
 without checking the value of the reference counter.

@ -130,21 +137,21 @@ the eventual invocation of kfree(), which is usually not a problem on
 modern computer systems, even the small ones.

 In cases where delete() can sleep, synchronize_rcu() can be called from
-delete(), so that el_free() can be subsumed into delete as follows:
+delete(), so that el_free() can be subsumed into delete as follows::

-4.
-delete()
-{
-    spin_lock(&list_lock);
-    ...
-    remove_element
-    spin_unlock(&list_lock);
-    ...
-    synchronize_rcu();
-    if (atomic_dec_and_test(&el->rc))
-    	kfree(el);
-    ...
-}
+    4.
+    delete()
+    {
+	spin_lock(&list_lock);
+	...
+	remove_element
+	spin_unlock(&list_lock);
+	...
+	synchronize_rcu();
+	if (atomic_dec_and_test(&el->rc))
+	    kfree(el);
+	...
+    }

 As additional examples in the kernel, the pattern in listing C is used by
 reference counting of struct pid, while the pattern in listing B is used by
--- a/Documentation/RCU/stallwarn.rst
+++ b/Documentation/RCU/stallwarn.rst
@ -1,4 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
 Using RCU's CPU Stall Detector
+==============================

 This document first discusses what sorts of issues RCU's CPU stall
 detector can locate, and then discusses kernel parameters and Kconfig
@ -7,39 +11,40 @@ this document explains the stall detector's "splat" format.


 What Causes RCU CPU Stall Warnings?
+===================================

 So your kernel printed an RCU CPU stall warning.  The next question is
 "What caused it?"  The following problems can result in RCU CPU stall
 warnings:

-o	A CPU looping in an RCU read-side critical section.
+-	A CPU looping in an RCU read-side critical section.

-o	A CPU looping with interrupts disabled.
+-	A CPU looping with interrupts disabled.

-o	A CPU looping with preemption disabled.
+-	A CPU looping with preemption disabled.

-o	A CPU looping with bottom halves disabled.
+-	A CPU looping with bottom halves disabled.

-o	For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
+-	For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
 	without invoking schedule().  If the looping in the kernel is
 	really expected and desirable behavior, you might need to add
 	some calls to cond_resched().

-o	Booting Linux using a console connection that is too slow to
+-	Booting Linux using a console connection that is too slow to
 	keep up with the boot-time console-message rate.  For example,
 	a 115Kbaud serial console can be -way- too slow to keep up
 	with boot-time message rates, and will frequently result in
 	RCU CPU stall warning messages.  Especially if you have added
 	debug printk()s.

-o	Anything that prevents RCU's grace-period kthreads from running.
+-	Anything that prevents RCU's grace-period kthreads from running.
 	This can result in the "All QSes seen" console-log message.
 	This message will include information on when the kthread last
 	ran and how often it should be expected to run.  It can also
-	result in the "rcu_.*kthread starved for" console-log message,
+	result in the ``rcu_.*kthread starved for`` console-log message,
 	which will include additional debugging information.

-o	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
+-	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
 	happen to preempt a low-priority task in the middle of an RCU
 	read-side critical section.   This is especially damaging if
 	that low-priority task is not permitted to run on any other CPU,
@ -48,7 +53,7 @@ o	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
 	While the system is in the process of running itself out of
 	memory, you might see stall-warning messages.

-o	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
+-	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
 	is running at a higher priority than the RCU softirq threads.
 	This will prevent RCU callbacks from ever being invoked,
 	and in a CONFIG_PREEMPT_RCU kernel will further prevent
@ -63,7 +68,7 @@ o	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
 	can increase your system's context-switch rate and thus degrade
 	performance.

-o	A periodic interrupt whose handler takes longer than the time
+-	A periodic interrupt whose handler takes longer than the time
 	interval between successive pairs of interrupts.  This can
 	prevent RCU's kthreads and softirq handlers from running.
 	Note that certain high-overhead debugging options, for example
@ -71,20 +76,27 @@ o	A periodic interrupt whose handler takes longer than the time
 	considerably longer than normal, which can in turn result in
 	RCU CPU stall warnings.

-o	Testing a workload on a fast system, tuning the stall-warning
+-	Testing a workload on a fast system, tuning the stall-warning
 	timeout down to just barely avoid RCU CPU stall warnings, and then
 	running the same workload with the same stall-warning timeout on a
 	slow system.  Note that thermal throttling and on-demand governors
 	can cause a single system to be sometimes fast and sometimes slow!

-o	A hardware or software issue shuts off the scheduler-clock
+-	A hardware or software issue shuts off the scheduler-clock
 	interrupt on a CPU that is not in dyntick-idle mode.  This
 	problem really has happened, and seems to be most likely to
 	result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.

-o	A bug in the RCU implementation.
+-	A hardware or software issue that prevents time-based wakeups
+	from occurring.  These issues can range from misconfigured or
+	buggy timer hardware through bugs in the interrupt or exception
+	path (whether hardware, firmware, or software) through bugs
+	in Linux's timer subsystem through bugs in the scheduler, and,
+	yes, even including bugs in RCU itself.

-o	A hardware failure.  This is quite unlikely, but has occurred
+-	A bug in the RCU implementation.
+
+-	A hardware failure.  This is quite unlikely, but has occurred
 	at least once in real life.  A CPU failed in a running system,
 	becoming unresponsive, but not causing an immediate crash.
 	This resulted in a series of RCU CPU stall warnings, eventually
@ -109,6 +121,7 @@ see include/trace/events/rcu.h.


 Fine-Tuning the RCU CPU Stall Detector
+======================================

 The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
 CPU stall detector, which detects conditions that unduly delay RCU grace
@ -118,6 +131,7 @@ The stall detector's idea of what constitutes "unduly delayed" is
 controlled by a set of kernel configuration variables and cpp macros:

 CONFIG_RCU_CPU_STALL_TIMEOUT
+----------------------------

 	This kernel configuration parameter defines the period of time
 	that RCU will wait from the beginning of a grace period until it
@ -137,6 +151,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
 	/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.

 RCU_STALL_DELAY_DELTA
+---------------------

 	Although the lockdep facility is extremely useful, it does add
 	some overhead.  Therefore, under CONFIG_PROVE_RCU, the
@ -145,6 +160,7 @@ RCU_STALL_DELAY_DELTA
 	macro, not a kernel configuration parameter.)

 RCU_STALL_RAT_DELAY
+-------------------

 	The CPU stall detector tries to make the offending CPU print its
 	own warnings, as this often gives better-quality stack traces.
@ -155,6 +171,7 @@ RCU_STALL_RAT_DELAY
 	parameter.)

 rcupdate.rcu_task_stall_timeout
+-------------------------------

 	This boot/sysfs parameter controls the RCU-tasks stall warning
 	interval.  A value of zero or less suppresses RCU-tasks stall
@ -168,9 +185,10 @@ rcupdate.rcu_task_stall_timeout


 Interpreting RCU's CPU Stall-Detector "Splats"
+==============================================

 For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling,
-it will print a message similar to the following:
+it will print a message similar to the following::

 	INFO: rcu_sched detected stalls on CPUs/tasks:
 	2-...: (3 GPs behind) idle=06c/0/0 softirq=1453/1455 fqs=0
@ -223,7 +241,7 @@ an estimate of the total number of RCU callbacks queued across all CPUs
 (625 in this case).

 In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed
-for each CPU:
+for each CPU::

 	0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 dyntick_enabled: 1

@ -235,7 +253,7 @@ processing is enabled.

 If the grace period ends just as the stall warning starts printing,
 there will be a spurious stall-warning message, which will include
-the following:
+the following::

 	INFO: Stall ended before state dump start

@ -248,7 +266,7 @@ which is overkill for this sort of problem.

 If all CPUs and tasks have passed through quiescent states, but the
 grace period has nevertheless failed to end, the stall-warning splat
-will include something like the following:
+will include something like the following::

 	All QSes seen, last rcu_preempt kthread activity 23807 (4297905177-4297881370), jiffies_till_next_fqs=3, root ->qsmask 0x0

@ -261,7 +279,7 @@ which is way less than 23807.  Finally, the root rcu_node structure's

 If the relevant grace-period kthread has been unable to run prior to
 the stall warning, as was the case in the "All QSes seen" line above,
-the following additional line is printed:
+the following additional line is printed::

 	kthread starved for 23807 jiffies! g7075 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x1 ->cpu=5

@ -276,6 +294,7 @@ kthread last ran on CPU 5.


 Multiple Warnings From One Stall
+================================

 If a stall lasts long enough, multiple stall-warning messages will be
 printed for it.  The second and subsequent messages are printed at
@ -285,9 +304,10 @@ of the stall and the first message.


 Stall Warnings for Expedited Grace Periods
+==========================================

 If an expedited grace period detects a stall, it will place a message
-like the following in dmesg:
+like the following in dmesg::

 	INFO: rcu_sched detected expedited stalls on CPUs/tasks: { 7-... } 21119 jiffies s: 73 root: 0x2/.

--- a/Documentation/RCU/torture.rst
+++ b/Documentation/RCU/torture.rst
@ -1,7 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
 RCU Torture Test Operation
+==========================


 CONFIG_RCU_TORTURE_TEST
+=======================

 The CONFIG_RCU_TORTURE_TEST config option is available for all RCU
 implementations.  It creates an rcutorture kernel module that can
@ -13,9 +18,10 @@ when the module is loaded, and stops when the module is unloaded.
 Module parameters are prefixed by "rcutorture." in
 Documentation/admin-guide/kernel-parameters.txt.

-OUTPUT
+Output
+======

-The statistics output is as follows:
+The statistics output is as follows::

 	rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
 	rcu-torture: rtc:           (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
@ -36,53 +42,53 @@ automatic determination as to whether RCU operated correctly.

 The entries are as follows:

-o	"rtc": The hexadecimal address of the structure currently visible
+*	"rtc": The hexadecimal address of the structure currently visible
 	to readers.

-o	"ver": The number of times since boot that the RCU writer task
+*	"ver": The number of times since boot that the RCU writer task
 	has changed the structure visible to readers.

-o	"tfle": If non-zero, indicates that the "torture freelist"
+*	"tfle": If non-zero, indicates that the "torture freelist"
 	containing structures to be placed into the "rtc" area is empty.
 	This condition is important, since it can fool you into thinking
 	that RCU is working when it is not.  :-/

-o	"rta": Number of structures allocated from the torture freelist.
+*	"rta": Number of structures allocated from the torture freelist.

-o	"rtaf": Number of allocations from the torture freelist that have
+*	"rtaf": Number of allocations from the torture freelist that have
 	failed due to the list being empty.  It is not unusual for this
 	to be non-zero, but it is bad for it to be a large fraction of
 	the value indicated by "rta".

-o	"rtf": Number of frees into the torture freelist.
+*	"rtf": Number of frees into the torture freelist.

-o	"rtmbe": A non-zero value indicates that rcutorture believes that
+*	"rtmbe": A non-zero value indicates that rcutorture believes that
 	rcu_assign_pointer() and rcu_dereference() are not working
 	correctly.  This value should be zero.

-o	"rtbe": A non-zero value indicates that one of the rcu_barrier()
+*	"rtbe": A non-zero value indicates that one of the rcu_barrier()
 	family of functions is not working correctly.

-o	"rtbke": rcutorture was unable to create the real-time kthreads
+*	"rtbke": rcutorture was unable to create the real-time kthreads
 	used to force RCU priority inversion.  This value should be zero.

-o	"rtbre": Although rcutorture successfully created the kthreads
+*	"rtbre": Although rcutorture successfully created the kthreads
 	used to force RCU priority inversion, it was unable to set them
 	to the real-time priority level of 1.  This value should be zero.

-o	"rtbf": The number of times that RCU priority boosting failed
+*	"rtbf": The number of times that RCU priority boosting failed
 	to resolve RCU priority inversion.

-o	"rtb": The number of times that rcutorture attempted to force
+*	"rtb": The number of times that rcutorture attempted to force
 	an RCU priority inversion condition.  If you are testing RCU
 	priority boosting via the "test_boost" module parameter, this
 	value should be non-zero.

-o	"nt": The number of times rcutorture ran RCU read-side code from
+*	"nt": The number of times rcutorture ran RCU read-side code from
 	within a timer handler.  This value should be non-zero only
 	if you specified the "irqreader" module parameter.

-o	"Reader Pipe": Histogram of "ages" of structures seen by readers.
+*	"Reader Pipe": Histogram of "ages" of structures seen by readers.
 	If any entries past the first two are non-zero, RCU is broken.
 	And rcutorture prints the error flag string "!!!" to make sure
 	you notice.  The age of a newly allocated structure is zero,
@ -94,14 +100,14 @@ o	"Reader Pipe": Histogram of "ages" of structures seen by readers.
 	RCU.  If you want to see what it looks like when broken, break
 	it yourself.  ;-)

-o	"Reader Batch": Another histogram of "ages" of structures seen
+*	"Reader Batch": Another histogram of "ages" of structures seen
 	by readers, but in terms of counter flips (or batches) rather
 	than in terms of grace periods.  The legal number of non-zero
 	entries is again two.  The reason for this separate view is that
 	it is sometimes easier to get the third entry to show up in the
 	"Reader Batch" list than in the "Reader Pipe" list.

-o	"Free-Block Circulation": Shows the number of torture structures
+*	"Free-Block Circulation": Shows the number of torture structures
 	that have reached a given point in the pipeline.  The first element
 	should closely correspond to the number of structures allocated,
 	the second to the number that have been removed from reader view,
@ -112,7 +118,7 @@ o	"Free-Block Circulation": Shows the number of torture structures

 Different implementations of RCU can provide implementation-specific
 additional information.  For example, Tree SRCU provides the following
-additional line:
+additional line::

 	srcud-torture: Tree SRCU per-CPU(idx=0): 0(35,-21) 1(-4,24) 2(1,1) 3(-26,20) 4(28,-47) 5(-9,4) 6(-10,14) 7(-14,11) T(1,6)

@ -123,15 +129,15 @@ using a dynamically allocated srcu_struct (hence "srcud-" rather than
 "old" and "current" values to the underlying array, and is useful for
 debugging.  The final "T" entry contains the totals of the counters.

-
-USAGE ON SPECIFIC KERNEL BUILDS
+Usage on Specific Kernel Builds
+===============================

 It is sometimes desirable to torture RCU on a specific kernel build,
 for example, when preparing to put that kernel build into production.
 In that case, the kernel should be built with CONFIG_RCU_TORTURE_TEST=m
 so that the test can be started using modprobe and terminated using rmmod.

-For example, the following script may be used to torture RCU:
+For example, the following script may be used to torture RCU::

 	#!/bin/sh

@ -148,7 +154,8 @@ two are self-explanatory, while the last indicates that while there
 were no RCU failures, CPU-hotplug problems were detected.


-USAGE ON MAINLINE KERNELS
+Usage on Mainline Kernels
+=========================

 When using rcutorture to test changes to RCU itself, it is often
 necessary to build a number of kernels in order to test that change
@ -180,16 +187,16 @@ to Tree SRCU might run only the SRCU-N and SRCU-P scenarios using the
 --configs argument to kvm.sh as follows:  "--configs 'SRCU-N SRCU-P'".
 Large systems can run multiple copies of of the full set of scenarios,
 for example, a system with 448 hardware threads can run five instances
-of the full set concurrently.  To make this happen:
+of the full set concurrently.  To make this happen::

 	kvm.sh --cpus 448 --configs '5*CFLIST'

 Alternatively, such a system can run 56 concurrent instances of a single
-eight-CPU scenario:
+eight-CPU scenario::

 	kvm.sh --cpus 448 --configs '56*TREE04'

-Or 28 concurrent instances of each of two eight-CPU scenarios:
+Or 28 concurrent instances of each of two eight-CPU scenarios::

 	kvm.sh --cpus 448 --configs '28*TREE03 28*TREE04'

@ -199,14 +206,14 @@ values for memory may require disabling the callback-flooding tests
 using the --bootargs parameter discussed below.

 Sometimes additional debugging is useful, and in such cases the --kconfig
-parameter to kvm.sh may be used, for example, "--kconfig 'CONFIG_KASAN=y'".
+parameter to kvm.sh may be used, for example, ``--kconfig 'CONFIG_KASAN=y'``.

 Kernel boot arguments can also be supplied, for example, to control
 rcutorture's module parameters.  For example, to test a change to RCU's
 CPU stall-warning code, use "--bootargs 'rcutorture.stall_cpu=30'".
 This will of course result in the scripting reporting a failure, namely
 the resuling RCU CPU stall warning.  As noted above, reducing memory may
-require disabling rcutorture's callback-flooding tests:
+require disabling rcutorture's callback-flooding tests::

 	kvm.sh --cpus 448 --configs '56*TREE04' --memory 128M \
 		--bootargs 'rcutorture.fwd_progress=0'
@ -225,7 +232,7 @@ is listed at the end of the kvm.sh output, which you really should redirect
 to a file.  The build products and console output of each run is kept in
 tools/testing/selftests/rcutorture/res in timestamped directories.  A
 given directory can be supplied to kvm-find-errors.sh in order to have
-it cycle you through summaries of errors and full error logs.  For example:
+it cycle you through summaries of errors and full error logs.  For example::

 	tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh \
 		tools/testing/selftests/rcutorture/res/2020.01.20-15.54.23
@ -245,38 +252,42 @@ that was tested and any uncommitted changes in diff format.

 The most frequently used files in each per-scenario-run directory are:

-.config: This file contains the Kconfig options.
+.config:
+	This file contains the Kconfig options.

-Make.out: This contains build output for a specific scenario.
+Make.out:
+	This contains build output for a specific scenario.

-console.log: This contains the console output for a specific scenario.
+console.log:
+	This contains the console output for a specific scenario.
 	This file may be examined once the kernel has booted, but
 	it might not exist if the build failed.

-vmlinux: This contains the kernel, which can be useful with tools like
+vmlinux:
+	This contains the kernel, which can be useful with tools like
 	objdump and gdb.

 A number of additional files are available, but are less frequently used.
 Many are intended for debugging of rcutorture itself or of its scripting.

 As of v5.4, a successful run with the default set of scenarios produces
-the following summary at the end of the run on a 12-CPU system:
+the following summary at the end of the run on a 12-CPU system::

-SRCU-N ------- 804233 GPs (148.932/s) [srcu: g10008272 f0x0 ]
-SRCU-P ------- 202320 GPs (37.4667/s) [srcud: g1809476 f0x0 ]
-SRCU-t ------- 1122086 GPs (207.794/s) [srcu: g0 f0x0 ]
-SRCU-u ------- 1111285 GPs (205.794/s) [srcud: g1 f0x0 ]
-TASKS01 ------- 19666 GPs (3.64185/s) [tasks: g0 f0x0 ]
-TASKS02 ------- 20541 GPs (3.80389/s) [tasks: g0 f0x0 ]
-TASKS03 ------- 19416 GPs (3.59556/s) [tasks: g0 f0x0 ]
-TINY01 ------- 836134 GPs (154.84/s) [rcu: g0 f0x0 ] n_max_cbs: 34198
-TINY02 ------- 850371 GPs (157.476/s) [rcu: g0 f0x0 ] n_max_cbs: 2631
-TREE01 ------- 162625 GPs (30.1157/s) [rcu: g1124169 f0x0 ]
-TREE02 ------- 333003 GPs (61.6672/s) [rcu: g2647753 f0x0 ] n_max_cbs: 35844
-TREE03 ------- 306623 GPs (56.782/s) [rcu: g2975325 f0x0 ] n_max_cbs: 1496497
-CPU count limited from 16 to 12
-TREE04 ------- 246149 GPs (45.5831/s) [rcu: g1695737 f0x0 ] n_max_cbs: 434961
-TREE05 ------- 314603 GPs (58.2598/s) [rcu: g2257741 f0x2 ] n_max_cbs: 193997
-TREE07 ------- 167347 GPs (30.9902/s) [rcu: g1079021 f0x0 ] n_max_cbs: 478732
-CPU count limited from 16 to 12
-TREE09 ------- 752238 GPs (139.303/s) [rcu: g13075057 f0x0 ] n_max_cbs: 99011
+    SRCU-N ------- 804233 GPs (148.932/s) [srcu: g10008272 f0x0 ]
+    SRCU-P ------- 202320 GPs (37.4667/s) [srcud: g1809476 f0x0 ]
+    SRCU-t ------- 1122086 GPs (207.794/s) [srcu: g0 f0x0 ]
+    SRCU-u ------- 1111285 GPs (205.794/s) [srcud: g1 f0x0 ]
+    TASKS01 ------- 19666 GPs (3.64185/s) [tasks: g0 f0x0 ]
+    TASKS02 ------- 20541 GPs (3.80389/s) [tasks: g0 f0x0 ]
+    TASKS03 ------- 19416 GPs (3.59556/s) [tasks: g0 f0x0 ]
+    TINY01 ------- 836134 GPs (154.84/s) [rcu: g0 f0x0 ] n_max_cbs: 34198
+    TINY02 ------- 850371 GPs (157.476/s) [rcu: g0 f0x0 ] n_max_cbs: 2631
+    TREE01 ------- 162625 GPs (30.1157/s) [rcu: g1124169 f0x0 ]
+    TREE02 ------- 333003 GPs (61.6672/s) [rcu: g2647753 f0x0 ] n_max_cbs: 35844
+    TREE03 ------- 306623 GPs (56.782/s) [rcu: g2975325 f0x0 ] n_max_cbs: 1496497
+    CPU count limited from 16 to 12
+    TREE04 ------- 246149 GPs (45.5831/s) [rcu: g1695737 f0x0 ] n_max_cbs: 434961
+    TREE05 ------- 314603 GPs (58.2598/s) [rcu: g2257741 f0x2 ] n_max_cbs: 193997
+    TREE07 ------- 167347 GPs (30.9902/s) [rcu: g1079021 f0x0 ] n_max_cbs: 478732
+    CPU count limited from 16 to 12
+    TREE09 ------- 752238 GPs (139.303/s) [rcu: g13075057 f0x0 ] n_max_cbs: 99011
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@ -93,6 +93,11 @@ It exists in the sparse memory mapping model, and it is also somewhat
 similar to the mem_map variable, both of them are used to translate an
 address.

+MAX_PHYSMEM_BITS
+----------------
+
+Defines the maximum supported physical address space memory.
+
 page
 ----

@ -399,6 +404,17 @@ KERNELPACMASK
 The mask to extract the Pointer Authentication Code from a kernel virtual
 address.

+TCR_EL1.T1SZ
+------------
+
+Indicates the size offset of the memory region addressed by TTBR1_EL1.
+The region size is 2^(64-T1SZ) bytes.
+
+TTBR1_EL1 is the table base address register specified by ARMv8-A
+architecture which is used to lookup the page-tables for the Virtual
+addresses in the higher VA range (refer to ARMv8 ARM document for
+more details).
+
 arm
 ===

--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@ -4038,6 +4038,14 @@
 			latencies, which will choose a value aligned
 			with the appropriate hardware boundaries.

+	rcutree.rcu_min_cached_objs= [KNL]
+			Minimum number of objects which are cached and
+			maintained per one CPU. Object size is equal
+			to PAGE_SIZE. The cache allows to reduce the
+			pressure to page allocator, also it makes the
+			whole algorithm to behave better in low memory
+			condition.
+
 	rcutree.jiffies_till_first_fqs= [KNL]
 			Set delay from grace-period initialization to
 			first attempt to force quiescent states.
@ -4258,6 +4266,20 @@
 			Set time (jiffies) between CPU-hotplug operations,
 			or zero to disable CPU-hotplug testing.

+	rcutorture.read_exit= [KNL]
+			Set the number of read-then-exit kthreads used
+			to test the interaction of RCU updaters and
+			task-exit processing.
+
+	rcutorture.read_exit_burst= [KNL]
+			The number of times in a given read-then-exit
+			episode that a set of read-then-exit kthreads
+			is spawned.
+
+	rcutorture.read_exit_delay= [KNL]
+			The delay, in seconds, between successive
+			read-then-exit testing episodes.
+
 	rcutorture.shuffle_interval= [KNL]
 			Set task-shuffle interval (s).  Shuffling tasks
 			allows some CPUs to go into dyntick-idle mode
@ -4407,6 +4429,45 @@
 			      reboot_cpu is s[mp]#### with #### being the processor
 					to be used for rebooting.

+	refscale.holdoff= [KNL]
+			Set test-start holdoff period.  The purpose of
+			this parameter is to delay the start of the
+			test until boot completes in order to avoid
+			interference.
+
+	refscale.loops= [KNL]
+			Set the number of loops over the synchronization
+			primitive under test.  Increasing this number
+			reduces noise due to loop start/end overhead,
+			but the default has already reduced the per-pass
+			noise to a handful of picoseconds on ca. 2020
+			x86 laptops.
+
+	refscale.nreaders= [KNL]
+			Set number of readers.  The default value of -1
+			selects N, where N is roughly 75% of the number
+			of CPUs.  A value of zero is an interesting choice.
+
+	refscale.nruns= [KNL]
+			Set number of runs, each of which is dumped onto
+			the console log.
+
+	refscale.readdelay= [KNL]
+			Set the read-side critical-section duration,
+			measured in microseconds.
+
+	refscale.scale_type= [KNL]
+			Specify the read-protection implementation to test.
+
+	refscale.shutdown= [KNL]
+			Shut down the system at the end of the performance
+			test.  This defaults to 1 (shut it down) when
+			rcuperf is built into the kernel and to 0 (leave
+			it running) when rcuperf is built as a module.
+
+	refscale.verbose= [KNL]
+			Enable additional printk() statements.
+
 	relax_domain_level=
 			[KNL, SMP] Set scheduler's default relax_domain_level.
 			See Documentation/admin-guide/cgroup-v1/cpusets.rst.
@ -5082,6 +5143,13 @@
 			Prevent the CPU-hotplug component of torturing
 			until after init has spawned.

+	torture.ftrace_dump_at_shutdown= [KNL]
+			Dump the ftrace buffer at torture-test shutdown,
+			even if there were no errors.  This can be a
+			very costly operation when many torture tests
+			are running concurrently, especially on systems
+			with rotating-rust storage.
+
 	tp720=		[HW,PS2]

 	tpm_suspend_pcr=[HW,TPM]
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@ -85,21 +85,21 @@ smp_store_release() respectively. Therefore, if you find yourself only using
 the Non-RMW operations of atomic_t, you do not in fact need atomic_t at all
 and are doing it wrong.

-A subtle detail of atomic_set{}() is that it should be observable to the RMW
-ops. That is:
+A note for the implementation of atomic_set{}() is that it must not break the
+atomicity of the RMW ops. That is:

-  C atomic-set
+  C Atomic-RMW-ops-are-atomic-WRT-atomic_set

  {
-    atomic_set(v, 1);
+    atomic_t v = ATOMIC_INIT(1);
+  }
+
+  P0(atomic_t *v)
+  {
+    (void)atomic_add_unless(v, 1, 0);
  }

  P1(atomic_t *v)
-  {
-    atomic_add_unless(v, 1, 0);
-  }
-
-  P2(atomic_t *v)
  {
    atomic_set(v, 0);
  }
@ -233,19 +233,19 @@ as well. Similarly, something like:
 is an ACQUIRE pattern (though very much not typical), but again the barrier is
 strictly stronger than ACQUIRE. As illustrated:

-  C strong-acquire
+  C Atomic-RMW+mb__after_atomic-is-stronger-than-acquire

  {
  }

-  P1(int *x, atomic_t *y)
+  P0(int *x, atomic_t *y)
  {
    r0 = READ_ONCE(*x);
    smp_rmb();
    r1 = atomic_read(y);
  }

-  P2(int *x, atomic_t *y)
+  P1(int *x, atomic_t *y)
  {
    atomic_inc(y);
    smp_mb__after_atomic();
@ -253,14 +253,14 @@ strictly stronger than ACQUIRE. As illustrated:
  }

  exists
-  (r0=1 /\ r1=0)
+  (0:r0=1 /\ 0:r1=0)

 This should not happen; but a hypothetical atomic_inc_acquire() --
 (void)atomic_fetch_inc_acquire() for instance -- would allow the outcome,
 because it would not order the W part of the RMW against the following
 WRITE_ONCE.  Thus:

-  P1			P2
+  P0			P1

 			t = LL.acq *y (0)
 			t++;
--- a/Documentation/dev-tools/kcsan.rst
+++ b/Documentation/dev-tools/kcsan.rst
@ -8,7 +8,8 @@ approach to detect races. KCSAN's primary purpose is to detect `data races`_.
 Usage
 -----

-KCSAN requires Clang version 11 or later.
+KCSAN is supported by both GCC and Clang. With GCC we require version 11 or
+later, and with Clang also require version 11 or later.

 To enable KCSAN configure the kernel with::

--- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
@ -28,6 +28,16 @@ Documentation/devicetree/bindings/iommu/iommu.txt.
 For arm-smmu binding, see:
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml.

+The MSI writes are accompanied by sideband data which is derived from the ICID.
+The msi-map property is used to associate the devices with both the ITS
+controller and the sideband data which accompanies the writes.
+
+For generic MSI bindings, see
+Documentation/devicetree/bindings/interrupt-controller/msi.txt.
+
+For GICv3 and GIC ITS bindings, see:
+Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml.
+
 Required properties:

    - compatible
@ -49,11 +59,6 @@ Required properties:
                        region may not be present in some scenarios, such
                        as in the device tree presented to a virtual machine.

-    - msi-parent
-        Value type: <phandle>
-        Definition: Must be present and point to the MSI controller node
-                    handling message interrupts for the MC.
-
    - ranges
        Value type: <prop-encoded-array>
        Definition: A standard property.  Defines the mapping between the child
@ -119,6 +124,28 @@ Optional properties:
  associated with the listed IOMMU, with the iommu-specifier
  (i - icid-base + iommu-base).

+- msi-map: Maps an ICID to a GIC ITS and associated msi-specifier
+  data.
+
+  The property is an arbitrary number of tuples of
+  (icid-base,gic-its,msi-base,length).
+
+  Any ICID in the interval [icid-base, icid-base + length) is
+  associated with the listed GIC ITS, with the msi-specifier
+  (i - icid-base + msi-base).
+
+Deprecated properties:
+
+    - msi-parent
+        Value type: <phandle>
+        Definition: Describes the MSI controller node handling message
+                    interrupts for the MC. When there is no translation
+                    between the ICID and deviceID this property can be used
+                    to describe the MSI controller used by the devices on the
+                    mc-bus.
+                    The use of this property for mc-bus is deprecated. Please
+                    use msi-map.
+
 Example:

        smmu: iommu@5000000 {
@ -128,13 +155,24 @@ Example:
               ...
        };

+        gic: interrupt-controller@6000000 {
+               compatible = "arm,gic-v3";
+               ...
+        }
+        its: gic-its@6020000 {
+               compatible = "arm,gic-v3-its";
+               msi-controller;
+               ...
+        };
+
        fsl_mc: fsl-mc@80c000000 {
                compatible = "fsl,qoriq-mc";
                reg = <0x00000008 0x0c000000 0 0x40>,    /* MC portal base */
                      <0x00000000 0x08340000 0 0x40000>; /* MC control reg */
-                msi-parent = <&its>;
                /* define map for ICIDs 23-64 */
                iommu-map = <23 &smmu 23 41>;
+                /* define msi map for ICIDs 23-64 */
+                msi-map = <23 &its 23 41>;
                #address-cells = <3>;
                #size-cells = <1>;

--- a/Documentation/features/core/cBPF-JIT/arch-support.txt
+++ b/Documentation/features/core/cBPF-JIT/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: | TODO |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/core/eBPF-JIT/arch-support.txt
+++ b/Documentation/features/core/eBPF-JIT/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/core/generic-idle-thread/arch-support.txt
+++ b/Documentation/features/core/generic-idle-thread/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/core/jump-labels/arch-support.txt
+++ b/Documentation/features/core/jump-labels/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/core/tracehook/arch-support.txt
+++ b/Documentation/features/core/tracehook/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/debug/KASAN/arch-support.txt
+++ b/Documentation/features/debug/KASAN/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
+++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/debug/kgdb/arch-support.txt
+++ b/Documentation/features/debug/kgdb/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
+++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/debug/kprobes/arch-support.txt
+++ b/Documentation/features/debug/kprobes/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/debug/kretprobes/arch-support.txt
+++ b/Documentation/features/debug/kretprobes/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/debug/stackprotector/arch-support.txt
+++ b/Documentation/features/debug/stackprotector/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/debug/uprobes/arch-support.txt
+++ b/Documentation/features/debug/uprobes/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/debug/user-ret-profiler/arch-support.txt
+++ b/Documentation/features/debug/user-ret-profiler/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/io/dma-contiguous/arch-support.txt
+++ b/Documentation/features/io/dma-contiguous/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/locking/cmpxchg-local/arch-support.txt
+++ b/Documentation/features/locking/cmpxchg-local/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/locking/lockdep/arch-support.txt
+++ b/Documentation/features/locking/lockdep/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: |  ok  |
    |          um: |  ok  |
-    |   unicore32: |  ok  |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/locking/queued-rwlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-rwlocks/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/perf/kprobes-event/arch-support.txt
+++ b/Documentation/features/perf/kprobes-event/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/perf/perf-regs/arch-support.txt
+++ b/Documentation/features/perf/perf-regs/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/perf/perf-stackdump/arch-support.txt
+++ b/Documentation/features/perf/perf-stackdump/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@ -51,7 +51,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/sched/numa-balancing/arch-support.txt
+++ b/Documentation/features/sched/numa-balancing/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ..  |
    |       sparc: | TODO |
    |          um: |  ..  |
-    |   unicore32: |  ..  |
    |         x86: |  ok  |
    |      xtensa: |  ..  |
    -----------------------
--- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt
+++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: |  ok  |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/time/arch-tick-broadcast/arch-support.txt
+++ b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: | TODO |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/time/clockevents/arch-support.txt
+++ b/Documentation/features/time/clockevents/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: |  ok  |
    |          um: |  ok  |
-    |   unicore32: |  ok  |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/time/context-tracking/arch-support.txt
+++ b/Documentation/features/time/context-tracking/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/time/irq-time-acct/arch-support.txt
+++ b/Documentation/features/time/irq-time-acct/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: |  ..  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/time/modern-timekeeping/arch-support.txt
+++ b/Documentation/features/time/modern-timekeeping/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: |  ok  |
    |          um: |  ok  |
-    |   unicore32: |  ok  |
    |         x86: |  ok  |
    |      xtensa: |  ok  |
    -----------------------
--- a/Documentation/features/time/virt-cpuacct/arch-support.txt
+++ b/Documentation/features/time/virt-cpuacct/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/vm/ELF-ASLR/arch-support.txt
+++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/vm/PG_uncached/arch-support.txt
+++ b/Documentation/features/vm/PG_uncached/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/vm/THP/arch-support.txt
+++ b/Documentation/features/vm/THP/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ..  |
    |       sparc: |  ok  |
    |          um: |  ..  |
-    |   unicore32: |  ..  |
    |         x86: |  ok  |
    |      xtensa: |  ..  |
    -----------------------
--- a/Documentation/features/vm/TLB/arch-support.txt
+++ b/Documentation/features/vm/TLB/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: |  ..  |
-    |   unicore32: |  ..  |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: | TODO |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/vm/ioremap_prot/arch-support.txt
+++ b/Documentation/features/vm/ioremap_prot/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: | TODO |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@ -28,7 +28,6 @@
    |          sh: |  ok  |
    |       sparc: |  ok  |
    |          um: | TODO |
-    |   unicore32: | TODO |
    |         x86: |  ok  |
    |      xtensa: | TODO |
    -----------------------
--- a/Documentation/litmus-tests/README
+++ b/Documentation/litmus-tests/README
@ -0,0 +1,35 @@
+============
+LITMUS TESTS
+============
+
+Each subdirectory contains litmus tests that are typical to describe the
+semantics of respective kernel APIs.
+For more information about how to "run" a litmus test or how to generate
+a kernel test module based on a litmus test, please see
+tools/memory-model/README.
+
+
+atomic (/atomic derectory)
+--------------------------
+
+Atomic-RMW+mb__after_atomic-is-stronger-than-acquire.litmus
+    Test that an atomic RMW followed by a smp_mb__after_atomic() is
+    stronger than a normal acquire: both the read and write parts of
+    the RMW are ordered before the subsequential memory accesses.
+
+Atomic-RMW-ops-are-atomic-WRT-atomic_set.litmus
+    Test that atomic_set() cannot break the atomicity of atomic RMWs.
+    NOTE: Require herd7 7.56 or later which supports "(void)expr".
+
+
+RCU (/rcu directory)
+--------------------
+
+MP+onceassign+derefonce.litmus (under tools/memory-model/litmus-tests/)
+    Demonstrates the use of rcu_assign_pointer() and rcu_dereference() to
+    ensure that an RCU reader will not see pre-initialization garbage.
+
+RCU+sync+read.litmus
+RCU+sync+free.litmus
+    Both the above litmus tests demonstrate the RCU grace period guarantee
+    that an RCU read-side critical section can never span a grace period.
--- a/Documentation/litmus-tests/atomic/Atomic-RMW+mb__after_atomic-is-stronger-than-acquire.litmus
+++ b/Documentation/litmus-tests/atomic/Atomic-RMW+mb__after_atomic-is-stronger-than-acquire.litmus
@ -0,0 +1,32 @@
+C Atomic-RMW+mb__after_atomic-is-stronger-than-acquire
+
+(*
+ * Result: Never
+ *
+ * Test that an atomic RMW followed by a smp_mb__after_atomic() is
+ * stronger than a normal acquire: both the read and write parts of
+ * the RMW are ordered before the subsequential memory accesses.
+ *)
+
+{
+}
+
+P0(int *x, atomic_t *y)
+{
+	int r0;
+	int r1;
+
+	r0 = READ_ONCE(*x);
+	smp_rmb();
+	r1 = atomic_read(y);
+}
+
+P1(int *x, atomic_t *y)
+{
+	atomic_inc(y);
+	smp_mb__after_atomic();
+	WRITE_ONCE(*x, 1);
+}
+
+exists
+(0:r0=1 /\ 0:r1=0)
--- a/Documentation/litmus-tests/atomic/Atomic-RMW-ops-are-atomic-WRT-atomic_set.litmus
+++ b/Documentation/litmus-tests/atomic/Atomic-RMW-ops-are-atomic-WRT-atomic_set.litmus
@ -0,0 +1,25 @@
+C Atomic-RMW-ops-are-atomic-WRT-atomic_set
+
+(*
+ * Result: Never
+ *
+ * Test that atomic_set() cannot break the atomicity of atomic RMWs.
+ * NOTE: This requires herd7 7.56 or later which supports "(void)expr".
+ *)
+
+{
+	atomic_t v = ATOMIC_INIT(1);
+}
+
+P0(atomic_t *v)
+{
+	(void)atomic_add_unless(v, 1, 0);
+}
+
+P1(atomic_t *v)
+{
+	atomic_set(v, 0);
+}
+
+exists
+(v=2)
--- a/Documentation/litmus-tests/rcu/RCU+sync+free.litmus
+++ b/Documentation/litmus-tests/rcu/RCU+sync+free.litmus
@ -0,0 +1,42 @@
+C RCU+sync+free
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that an RCU reader can never see a write that
+ * follows a grace period, if it did not see writes that precede that grace
+ * period.
+ *
+ * This is a typical pattern of RCU usage, where the write before the grace
+ * period assigns a pointer, and the writes following the grace period destroy
+ * the object that the pointer used to point to.
+ *
+ * This is one implication of the RCU grace-period guarantee, which says (among
+ * other things) that an RCU read-side critical section cannot span a grace period.
+ *)
+
+{
+int x = 1;
+int *y = &x;
+int z = 1;
+}
+
+P0(int *x, int *z, int **y)
+{
+	int *r0;
+	int r1;
+
+	rcu_read_lock();
+	r0 = rcu_dereference(*y);
+	r1 = READ_ONCE(*r0);
+	rcu_read_unlock();
+}
+
+P1(int *x, int *z, int **y)
+{
+	rcu_assign_pointer(*y, z);
+	synchronize_rcu();
+	WRITE_ONCE(*x, 0);
+}
+
+exists (0:r0=x /\ 0:r1=0)
--- a/Documentation/litmus-tests/rcu/RCU+sync+read.litmus
+++ b/Documentation/litmus-tests/rcu/RCU+sync+read.litmus
@ -0,0 +1,37 @@
+C RCU+sync+read
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that after a grace period, an RCU updater always
+ * sees all stores done in prior RCU read-side critical sections. Such
+ * read-side critical sections would have ended before the grace period ended.
+ *
+ * This is one implication of the RCU grace-period guarantee, which says (among
+ * other things) that an RCU read-side critical section cannot span a grace period.
+ *)
+
+{
+int x = 0;
+int y = 0;
+}
+
+P0(int *x, int *y)
+{
+	rcu_read_lock();
+	WRITE_ONCE(*x, 1);
+	WRITE_ONCE(*y, 1);
+	rcu_read_unlock();
+}
+
+P1(int *x, int *y)
+{
+	int r0;
+	int r1;
+
+	r0 = READ_ONCE(*x);
+	synchronize_rcu();
+	r1 = READ_ONCE(*y);
+}
+
+exists (1:r0=1 /\ 1:r1=0)
--- a/Documentation/locking/index.rst
+++ b/Documentation/locking/index.rst
@ -14,6 +14,7 @@ locking
    mutex-design
    rt-mutex-design
    rt-mutex
+    seqlock
    spinlocks
    ww-mutex-design
    preempt-locking
--- a/Documentation/locking/locktorture.rst
+++ b/Documentation/locking/locktorture.rst
@ -166,4 +166,4 @@ checked for such errors.  The "rmmod" command forces a "SUCCESS",
 two are self-explanatory, while the last indicates that while there
 were no locking failures, CPU-hotplug problems were detected.

-Also see: Documentation/RCU/torture.txt
+Also see: Documentation/RCU/torture.rst
--- a/Documentation/locking/mutex-design.rst
+++ b/Documentation/locking/mutex-design.rst
@ -18,7 +18,7 @@ as an alternative to these. This new data structure provided a number
 of advantages, including simpler interfaces, and at that time smaller
 code (see Disadvantages).

-[1] http://lwn.net/Articles/164802/
+[1] https://lwn.net/Articles/164802/

 Implementation
 --------------
--- a/Documentation/locking/seqlock.rst
+++ b/Documentation/locking/seqlock.rst
@ -0,0 +1,170 @@
+======================================
+Sequence counters and sequential locks
+======================================
+
+Introduction
+============
+
+Sequence counters are a reader-writer consistency mechanism with
+lockless readers (read-only retry loops), and no writer starvation. They
+are used for data that's rarely written to (e.g. system time), where the
+reader wants a consistent set of information and is willing to retry if
+that information changes.
+
+A data set is consistent when the sequence count at the beginning of the
+read side critical section is even and the same sequence count value is
+read again at the end of the critical section. The data in the set must
+be copied out inside the read side critical section. If the sequence
+count has changed between the start and the end of the critical section,
+the reader must retry.
+
+Writers increment the sequence count at the start and the end of their
+critical section. After starting the critical section the sequence count
+is odd and indicates to the readers that an update is in progress. At
+the end of the write side critical section the sequence count becomes
+even again which lets readers make progress.
+
+A sequence counter write side critical section must never be preempted
+or interrupted by read side sections. Otherwise the reader will spin for
+the entire scheduler tick due to the odd sequence count value and the
+interrupted writer. If that reader belongs to a real-time scheduling
+class, it can spin forever and the kernel will livelock.
+
+This mechanism cannot be used if the protected data contains pointers,
+as the writer can invalidate a pointer that the reader is following.
+
+
+.. _seqcount_t:
+
+Sequence counters (``seqcount_t``)
+==================================
+
+This is the the raw counting mechanism, which does not protect against
+multiple writers.  Write side critical sections must thus be serialized
+by an external lock.
+
+If the write serialization primitive is not implicitly disabling
+preemption, preemption must be explicitly disabled before entering the
+write side section. If the read section can be invoked from hardirq or
+softirq contexts, interrupts or bottom halves must also be respectively
+disabled before entering the write section.
+
+If it's desired to automatically handle the sequence counter
+requirements of writer serialization and non-preemptibility, use
+:ref:`seqlock_t` instead.
+
+Initialization::
+
+	/* dynamic */
+	seqcount_t foo_seqcount;
+	seqcount_init(&foo_seqcount);
+
+	/* static */
+	static seqcount_t foo_seqcount = SEQCNT_ZERO(foo_seqcount);
+
+	/* C99 struct init */
+	struct {
+		.seq   = SEQCNT_ZERO(foo.seq),
+	} foo;
+
+Write path::
+
+	/* Serialized context with disabled preemption */
+
+	write_seqcount_begin(&foo_seqcount);
+
+	/* ... [[write-side critical section]] ... */
+
+	write_seqcount_end(&foo_seqcount);
+
+Read path::
+
+	do {
+		seq = read_seqcount_begin(&foo_seqcount);
+
+		/* ... [[read-side critical section]] ... */
+
+	} while (read_seqcount_retry(&foo_seqcount, seq));
+
+
+.. _seqlock_t:
+
+Sequential locks (``seqlock_t``)
+================================
+
+This contains the :ref:`seqcount_t` mechanism earlier discussed, plus an
+embedded spinlock for writer serialization and non-preemptibility.
+
+If the read side section can be invoked from hardirq or softirq context,
+use the write side function variants which disable interrupts or bottom
+halves respectively.
+
+Initialization::
+
+	/* dynamic */
+	seqlock_t foo_seqlock;
+	seqlock_init(&foo_seqlock);
+
+	/* static */
+	static DEFINE_SEQLOCK(foo_seqlock);
+
+	/* C99 struct init */
+	struct {
+		.seql   = __SEQLOCK_UNLOCKED(foo.seql)
+	} foo;
+
+Write path::
+
+	write_seqlock(&foo_seqlock);
+
+	/* ... [[write-side critical section]] ... */
+
+	write_sequnlock(&foo_seqlock);
+
+Read path, three categories:
+
+1. Normal Sequence readers which never block a writer but they must
+   retry if a writer is in progress by detecting change in the sequence
+   number.  Writers do not wait for a sequence reader::
+
+	do {
+		seq = read_seqbegin(&foo_seqlock);
+
+		/* ... [[read-side critical section]] ... */
+
+	} while (read_seqretry(&foo_seqlock, seq));
+
+2. Locking readers which will wait if a writer or another locking reader
+   is in progress. A locking reader in progress will also block a writer
+   from entering its critical section. This read lock is
+   exclusive. Unlike rwlock_t, only one locking reader can acquire it::
+
+	read_seqlock_excl(&foo_seqlock);
+
+	/* ... [[read-side critical section]] ... */
+
+	read_sequnlock_excl(&foo_seqlock);
+
+3. Conditional lockless reader (as in 1), or locking reader (as in 2),
+   according to a passed marker. This is used to avoid lockless readers
+   starvation (too much retry loops) in case of a sharp spike in write
+   activity. First, a lockless read is tried (even marker passed). If
+   that trial fails (odd sequence counter is returned, which is used as
+   the next iteration marker), the lockless read is transformed to a
+   full locking read and no retry loop is necessary::
+
+	/* marker; even initialization */
+	int seq = 0;
+	do {
+		read_seqbegin_or_lock(&foo_seqlock, &seq);
+
+		/* ... [[read-side critical section]] ... */
+
+	} while (need_seqretry(&foo_seqlock, seq));
+	done_seqretry(&foo_seqlock, seq);
+
+
+API documentation
+=================
+
+.. kernel-doc:: include/linux/seqlock.h
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@ -553,12 +553,12 @@ There are certain things that the Linux kernel memory barriers do not guarantee:
 DATA DEPENDENCY BARRIERS (HISTORICAL)
 -------------------------------------

-As of v4.15 of the Linux kernel, an smp_read_barrier_depends() was
-added to READ_ONCE(), which means that about the only people who
-need to pay attention to this section are those working on DEC Alpha
-architecture-specific code and those working on READ_ONCE() itself.
-For those who need it, and for those who are interested in the history,
-here is the story of data-dependency barriers.
+As of v4.15 of the Linux kernel, an smp_mb() was added to READ_ONCE() for
+DEC Alpha, which means that about the only people who need to pay attention
+to this section are those working on DEC Alpha architecture-specific code
+and those working on READ_ONCE() itself.  For those who need it, and for
+those who are interested in the history, here is the story of
+data-dependency barriers.

 The usage requirements of data dependency barriers are a little subtle, and
 it's not always obvious that they're needed.  To illustrate, consider the
@ -2708,144 +2708,6 @@ the properties of the memory window through which devices are accessed and/or
 the use of any special device communication instructions the CPU may have.


-CACHE COHERENCY
---------------
-
-Life isn't quite as simple as it may appear above, however: for while the
-caches are expected to be coherent, there's no guarantee that that coherency
-will be ordered.  This means that while changes made on one CPU will
-eventually become visible on all CPUs, there's no guarantee that they will
-become apparent in the same order on those other CPUs.
-
-
-Consider dealing with a system that has a pair of CPUs (1 & 2), each of which
-has a pair of parallel data caches (CPU 1 has A/B, and CPU 2 has C/D):
-
-	            :
-	            :                          +--------+
-	            :      +---------+         |        |
-	+--------+  : +--->| Cache A |<------->|        |
-	|        |  : |    +---------+         |        |
-	|  CPU 1 |<---+                        |        |
-	|        |  : |    +---------+         |        |
-	+--------+  : +--->| Cache B |<------->|        |
-	            :      +---------+         |        |
-	            :                          | Memory |
-	            :      +---------+         | System |
-	+--------+  : +--->| Cache C |<------->|        |
-	|        |  : |    +---------+         |        |
-	|  CPU 2 |<---+                        |        |
-	|        |  : |    +---------+         |        |
-	+--------+  : +--->| Cache D |<------->|        |
-	            :      +---------+         |        |
-	            :                          +--------+
-	            :
-
-Imagine the system has the following properties:
-
- (*) an odd-numbered cache line may be in cache A, cache C or it may still be
-     resident in memory;
-
- (*) an even-numbered cache line may be in cache B, cache D or it may still be
-     resident in memory;
-
- (*) while the CPU core is interrogating one cache, the other cache may be
-     making use of the bus to access the rest of the system - perhaps to
-     displace a dirty cacheline or to do a speculative load;
-
- (*) each cache has a queue of operations that need to be applied to that cache
-     to maintain coherency with the rest of the system;
-
- (*) the coherency queue is not flushed by normal loads to lines already
-     present in the cache, even though the contents of the queue may
-     potentially affect those loads.
-
-Imagine, then, that two writes are made on the first CPU, with a write barrier
-between them to guarantee that they will appear to reach that CPU's caches in
-the requisite order:
-
-	CPU 1		CPU 2		COMMENT
-	===============	===============	=======================================
-					u == 0, v == 1 and p == &u, q == &u
-	v = 2;
-	smp_wmb();			Make sure change to v is visible before
-					 change to p
-	<A:modify v=2>			v is now in cache A exclusively
-	p = &v;
-	<B:modify p=&v>			p is now in cache B exclusively
-
-The write memory barrier forces the other CPUs in the system to perceive that
-the local CPU's caches have apparently been updated in the correct order.  But
-now imagine that the second CPU wants to read those values:
-
-	CPU 1		CPU 2		COMMENT
-	===============	===============	=======================================
-	...
-			q = p;
-			x = *q;
-
-The above pair of reads may then fail to happen in the expected order, as the
-cacheline holding p may get updated in one of the second CPU's caches while
-the update to the cacheline holding v is delayed in the other of the second
-CPU's caches by some other cache event:
-
-	CPU 1		CPU 2		COMMENT
-	===============	===============	=======================================
-					u == 0, v == 1 and p == &u, q == &u
-	v = 2;
-	smp_wmb();
-	<A:modify v=2>	<C:busy>
-			<C:queue v=2>
-	p = &v;		q = p;
-			<D:request p>
-	<B:modify p=&v>	<D:commit p=&v>
-			<D:read p>
-			x = *q;
-			<C:read *q>	Reads from v before v updated in cache
-			<C:unbusy>
-			<C:commit v=2>
-
-Basically, while both cachelines will be updated on CPU 2 eventually, there's
-no guarantee that, without intervention, the order of update will be the same
-as that committed on CPU 1.
-
-
-To intervene, we need to interpolate a data dependency barrier or a read
-barrier between the loads (which as of v4.15 is supplied unconditionally
-by the READ_ONCE() macro).  This will force the cache to commit its
-coherency queue before processing any further requests:
-
-	CPU 1		CPU 2		COMMENT
-	===============	===============	=======================================
-					u == 0, v == 1 and p == &u, q == &u
-	v = 2;
-	smp_wmb();
-	<A:modify v=2>	<C:busy>
-			<C:queue v=2>
-	p = &v;		q = p;
-			<D:request p>
-	<B:modify p=&v>	<D:commit p=&v>
-			<D:read p>
-			smp_read_barrier_depends()
-			<C:unbusy>
-			<C:commit v=2>
-			x = *q;
-			<C:read *q>	Reads from v after v updated in cache
-
-
-This sort of problem can be encountered on DEC Alpha processors as they have a
-split cache that improves performance by making better use of the data bus.
-While most CPUs do imply a data dependency barrier on the read when a memory
-access depends on a read, not all do, so it may not be relied on.
-
-Other CPUs may also have split caches, but must coordinate between the various
-cachelets for normal memory accesses.  The semantics of the Alpha removes the
-need for hardware coordination in the absence of memory barriers, which
-permitted Alpha to sport higher CPU clock rates back in the day.  However,
-please note that (again, as of v4.15) smp_read_barrier_depends() should not
-be used except in Alpha arch-specific code and within the READ_ONCE() macro.
-
-
 CACHE COHERENCY VS DMA
 ----------------------

@ -3009,10 +2871,8 @@ caches with the memory coherence system, thus making it seem like pointer
 changes vs new data occur in the right order.

 The Alpha defines the Linux kernel's memory model, although as of v4.15
-the Linux kernel's addition of smp_read_barrier_depends() to READ_ONCE()
-greatly reduced Alpha's impact on the memory model.
-
-See the subsection on "Cache Coherency" above.
+the Linux kernel's addition of smp_mb() to READ_ONCE() on Alpha greatly
+reduced its impact on the memory model.


 VIRTUAL MACHINE GUESTS
--- a/Documentation/s390/s390dbf.rst
+++ b/Documentation/s390/s390dbf.rst
@ -67,7 +67,7 @@ corresponding component. The debugfs normally should be mounted to
 The content of the directories are files which represent different views
 to the debug log. Each component can decide which views should be
 used through registering them with the function :c:func:`debug_register_view()`.
-Predefined views for hex/ascii, sprintf and raw binary data are provided.
+Predefined views for hex/ascii and sprintf data are provided.
 It is also possible to define other views. The content of
 a view can be inspected simply by reading the corresponding debugfs file.

@ -119,8 +119,6 @@ Predefined views:

  extern struct debug_view debug_hex_ascii_view;

-  extern struct debug_view debug_raw_view;
-
  extern struct debug_view debug_sprintf_view;

 Examples
@ -129,7 +127,7 @@ Examples
 .. code-block:: c

  /*
-   * hex_ascii- + raw-view Example
+   * hex_ascii-view Example
   */

  #include <linux/init.h>
@ -143,7 +141,6 @@ Examples

      debug_info = debug_register("test", 1, 4, 4 );
      debug_register_view(debug_info, &debug_hex_ascii_view);
-      debug_register_view(debug_info, &debug_raw_view);

      debug_text_event(debug_info, 4 , "one ");
      debug_int_exception(debug_info, 4, 4711);
@ -201,7 +198,7 @@ debugfs-files:
 Example::

  > ls /sys/kernel/debug/s390dbf/dasd
-  flush  hex_ascii  level pages raw
+  flush  hex_ascii  level pages
  > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
  00 00974733272:680099 2 - 02 0006ad7e  07 ea 4a 90 | ....
  00 00974733272:682210 2 - 02 0006ade6  46 52 45 45 | FREE
@ -298,10 +295,9 @@ order to see the debug entries well formatted.
 Predefined Views
 ----------------

-There are three predefined views: hex_ascii, raw and sprintf.
+There are two predefined views: hex_ascii and sprintf.
 The hex_ascii view shows the data field in hex and ascii representation
 (e.g. ``45 43 4b 44 | ECKD``).
-The raw view returns a bytestream as the debug areas are stored in memory.

 The sprintf view formats the debug entries in the same way as the sprintf
 function would do. The sprintf event/exception functions write to the
@ -334,11 +330,6 @@ The format of the hex_ascii and sprintf view is as follows:
 - Return Address to caller
 - data field

-The format of the raw view is:
-
- Header as described in debug.h
- datafield
-
 A typical line of the hex_ascii view will look like the following (first line
 is only for explanation and will not be displayed when 'cating' the view)::

--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@ -577,7 +577,7 @@ ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE
 데이터 의존성 배리어 (역사적)
 -----------------------------

-리눅스 커널 v4.15 기준으로, smp_read_barrier_depends() 가 READ_ONCE() 에
+리눅스 커널 v4.15 기준으로, smp_mb() 가 DEC Alpha 용 READ_ONCE() 코드에
 추가되었는데, 이는 이 섹션에 주의를 기울여야 하는 사람들은 DEC Alpha 아키텍쳐
 전용 코드를 만드는 사람들과 READ_ONCE() 자체를 만드는 사람들 뿐임을 의미합니다.
 그런 분들을 위해, 그리고 역사에 관심 있는 분들을 위해, 여기 데이터 의존성
@ -2664,144 +2664,6 @@ CPU 코어는 프로그램의 인과성이 유지된다고만 여겨진다면
 수도 있습니다.


-캐시 일관성
-----------
-
-하지만 삶은 앞에서 이야기한 것처럼 단순하지 않습니다: 캐시들은 일관적일 것으로
-기대되지만, 그 일관성이 순서에도 적용될 거라는 보장은 없습니다.  한 CPU 에서
-만들어진 변경 사항은 최종적으로는 시스템의 모든 CPU 에게 보여지게 되지만, 다른
-CPU 들에게도 같은 순서로 보이게 될 거라는 보장은 없다는 뜻입니다.
-
-
-두개의 CPU (1 & 2) 가 달려 있고, 각 CPU 에 두개의 데이터 캐시(CPU 1 은 A/B 를,
-CPU 2 는 C/D 를 갖습니다)가 병렬로 연결되어 있는 시스템을 다룬다고 생각해
-봅시다:
-
-	            :
-	            :                          +--------+
-	            :      +---------+         |        |
-	+--------+  : +--->| Cache A |<------->|        |
-	|        |  : |    +---------+         |        |
-	|  CPU 1 |<---+                        |        |
-	|        |  : |    +---------+         |        |
-	+--------+  : +--->| Cache B |<------->|        |
-	            :      +---------+         |        |
-	            :                          | Memory |
-	            :      +---------+         | System |
-	+--------+  : +--->| Cache C |<------->|        |
-	|        |  : |    +---------+         |        |
-	|  CPU 2 |<---+                        |        |
-	|        |  : |    +---------+         |        |
-	+--------+  : +--->| Cache D |<------->|        |
-	            :      +---------+         |        |
-	            :                          +--------+
-	            :
-
-이 시스템이 다음과 같은 특성을 갖는다 생각해 봅시다:
-
- (*) 홀수번 캐시라인은 캐시 A, 캐시 C 또는 메모리에 위치할 수 있음;
-
- (*) 짝수번 캐시라인은 캐시 B, 캐시 D 또는 메모리에 위치할 수 있음;
-
- (*) CPU 코어가 한개의 캐시에 접근하는 동안, 다른 캐시는 - 더티 캐시라인을
-     메모리에 내리거나 추측성 로드를 하거나 하기 위해 - 시스템의 다른 부분에
-     액세스 하기 위해 버스를 사용할 수 있음;
-
- (*) 각 캐시는 시스템의 나머지 부분들과 일관성을 맞추기 위해 해당 캐시에
-     적용되어야 할 오퍼레이션들의 큐를 가짐;
-
- (*) 이 일관성 큐는 캐시에 이미 존재하는 라인에 가해지는 평범한 로드에 의해서는
-     비워지지 않는데, 큐의 오퍼레이션들이 이 로드의 결과에 영향을 끼칠 수 있다
-     할지라도 그러함.
-
-이제, 첫번째 CPU 에서 두개의 쓰기 오퍼레이션을 만드는데, 해당 CPU 의 캐시에
-요청된 순서로 오퍼레이션이 도달됨을 보장하기 위해 두 오퍼레이션 사이에 쓰기
-배리어를 사용하는 상황을 상상해 봅시다:
-
-	CPU 1		CPU 2		COMMENT
-	===============	===============	=======================================
-					u == 0, v == 1 and p == &u, q == &u
-	v = 2;
-	smp_wmb();			v 의 변경이 p 의 변경 전에 보일 것을
-					 분명히 함
-	<A:modify v=2>			v 는 이제 캐시 A 에 독점적으로 존재함
-	p = &v;
-	<B:modify p=&v>			p 는 이제 캐시 B 에 독점적으로 존재함
-
-여기서의 쓰기 메모리 배리어는 CPU 1 의 캐시가 올바른 순서로 업데이트 된 것으로
-시스템의 다른 CPU 들이 인지하게 만듭니다.  하지만, 이제 두번째 CPU 가 그 값들을
-읽으려 하는 상황을 생각해 봅시다:
-
-	CPU 1		CPU 2		COMMENT
-	===============	===============	=======================================
-	...
-			q = p;
-			x = *q;
-
-위의 두개의 읽기 오퍼레이션은 예상된 순서로 일어나지 못할 수 있는데, 두번째 CPU
-의 한 캐시에 다른 캐시 이벤트가 발생해 v 를 담고 있는 캐시라인의 해당 캐시에의
-업데이트가 지연되는 사이, p 를 담고 있는 캐시라인은 두번째 CPU 의 다른 캐시에
-업데이트 되어버렸을 수 있기 때문입니다.
-
-	CPU 1		CPU 2		COMMENT
-	===============	===============	=======================================
-					u == 0, v == 1 and p == &u, q == &u
-	v = 2;
-	smp_wmb();
-	<A:modify v=2>	<C:busy>
-			<C:queue v=2>
-	p = &v;		q = p;
-			<D:request p>
-	<B:modify p=&v>	<D:commit p=&v>
-			<D:read p>
-			x = *q;
-			<C:read *q>	캐시에 업데이트 되기 전의 v 를 읽음
-			<C:unbusy>
-			<C:commit v=2>
-
-기본적으로, 두개의 캐시라인 모두 CPU 2 에 최종적으로는 업데이트 될 것이지만,
-별도의 개입 없이는, 업데이트의 순서가 CPU 1 에서 만들어진 순서와 동일할
-것이라는 보장이 없습니다.
-
-
-여기에 개입하기 위해선, 데이터 의존성 배리어나 읽기 배리어를 로드 오퍼레이션들
-사이에 넣어야 합니다 (v4.15 부터는 READ_ONCE() 매크로에 의해 무조건적으로
-그렇게 됩니다).  이렇게 함으로써 캐시가 다음 요청을 처리하기 전에 일관성 큐를
-처리하도록 강제하게 됩니다.
-
-	CPU 1		CPU 2		COMMENT
-	===============	===============	=======================================
-					u == 0, v == 1 and p == &u, q == &u
-	v = 2;
-	smp_wmb();
-	<A:modify v=2>	<C:busy>
-			<C:queue v=2>
-	p = &v;		q = p;
-			<D:request p>
-	<B:modify p=&v>	<D:commit p=&v>
-			<D:read p>
-			smp_read_barrier_depends()
-			<C:unbusy>
-			<C:commit v=2>
-			x = *q;
-			<C:read *q>	캐시에 업데이트 된 v 를 읽음
-
-
-이런 부류의 문제는 DEC Alpha 계열 프로세서들에서 발견될 수 있는데, 이들은
-데이터 버스를 좀 더 잘 사용해 성능을 개선할 수 있는, 분할된 캐시를 가지고 있기
-때문입니다.  대부분의 CPU 는 하나의 읽기 오퍼레이션의 메모리 액세스가 다른 읽기
-오퍼레이션에 의존적이라면 데이터 의존성 배리어를 내포시킵니다만, 모두가 그런건
-아니기 때문에 이점에 의존해선 안됩니다.
-
-다른 CPU 들도 분할된 캐시를 가지고 있을 수 있지만, 그런 CPU 들은 평범한 메모리
-액세스를 위해서도 이 분할된 캐시들 사이의 조정을 해야만 합니다.  Alpha 는 가장
-약한 메모리 순서 시맨틱 (semantic) 을 선택함으로써 메모리 배리어가 명시적으로
-사용되지 않았을 때에는 그런 조정이 필요하지 않게 했으며, 이는 Alpha 가 당시에
-더 높은 CPU 클락 속도를 가질 수 있게 했습니다.  하지만, (다시 말하건대, v4.15
-이후부터는) Alpha 아키텍쳐 전용 코드와 READ_ONCE() 매크로 내부에서를 제외하고는
-smp_read_barrier_depends() 가 사용되지 않아야 함을 알아두시기 바랍니다.
-
-
 캐시 일관성 VS DMA
 ------------------

@ -2962,10 +2824,8 @@ Alpha CPU 의 일부 버전은 분할된 데이터 캐시를 가지고 있어서
 데이터의 발견을 올바른 순서로 일어나게 하기 때문입니다.

 리눅스 커널의 메모리 배리어 모델은 Alpha 에 기초해서 정의되었습니다만, v4.15
-부터는 리눅스 커널이 READ_ONCE() 내에 smp_read_barrier_depends() 를 추가해서
-Alpha 의 메모리 모델로의 영향력이 크게 줄어들긴 했습니다.
-
-위의 "캐시 일관성" 서브섹션을 참고하세요.
+부터는 Alpha 용 READ_ONCE() 코드 내에 smp_mb() 가 추가되어서 메모리 모델로의
+Alpha 의 영향력이 크게 줄어들었습니다.


 가상 머신 게스트
--- a/23
+++ b/23
@ -9988,6 +9988,7 @@ M:	Luc Maranget <luc.maranget@inria.fr>
 M:	"Paul E. McKenney" <paulmck@kernel.org>
 R:	Akira Yokosawa <akiyks@gmail.com>
 R:	Daniel Lustig <dlustig@nvidia.com>
+R:	Joel Fernandes <joel@joelfernandes.org>
 L:	linux-kernel@vger.kernel.org
 L:	linux-arch@vger.kernel.org
 S:	Supported
@ -9996,6 +9997,7 @@ F:	Documentation/atomic_bitops.txt
 F:	Documentation/atomic_t.txt
 F:	Documentation/core-api/atomic_ops.rst
 F:	Documentation/core-api/refcount-vs-atomic.rst
+F:	Documentation/litmus-tests/
 F:	Documentation/memory-barriers.txt
 F:	tools/memory-model/

@ -13614,16 +13616,6 @@ F:	drivers/block/pktcdvd.c
 F:	include/linux/pktcdvd.h
 F:	include/uapi/linux/pktcdvd.h

-PKUNITY SOC DRIVERS
-M:	Guan Xuetao <gxt@pku.edu.cn>
-S:	Maintained
-W:	http://mprc.pku.edu.cn/~guanxuetao/linux
-T:	git git://github.com/gxt/linux.git
-F:	drivers/i2c/busses/i2c-puv3.c
-F:	drivers/input/serio/i8042-unicore32io.h
-F:	drivers/rtc/rtc-puv3.c
-F:	drivers/video/fbdev/fb-puv3.c
-
 PLANTOWER PMS7003 AIR POLLUTION SENSOR DRIVER
 M:	Tomasz Duszynski <tduszyns@gmail.com>
 S:	Maintained
@ -14472,7 +14464,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
 F:	Documentation/RCU/
 F:	include/linux/rcu*
 F:	kernel/rcu/
-X:	Documentation/RCU/torture.txt
+X:	Documentation/RCU/torture.rst
 X:	include/linux/srcu*.h
 X:	kernel/rcu/srcu*.c

@ -17324,7 +17316,7 @@ M:	Josh Triplett <josh@joshtriplett.org>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
-F:	Documentation/RCU/torture.txt
+F:	Documentation/RCU/torture.rst
 F:	kernel/locking/locktorture.c
 F:	kernel/rcu/rcuperf.c
 F:	kernel/rcu/rcutorture.c
@ -17568,13 +17560,6 @@ L:	linux-fsdevel@vger.kernel.org
 S:	Supported
 F:	fs/unicode/

-UNICORE32 ARCHITECTURE
-M:	Guan Xuetao <gxt@pku.edu.cn>
-S:	Maintained
-W:	http://mprc.pku.edu.cn/~guanxuetao/linux
-T:	git git://github.com/gxt/linux.git
-F:	arch/unicore32/
-
 UNIFDEF
 M:	Tony Finch <dot@dotat.at>
 S:	Maintained
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@ -16,15 +16,14 @@

 /*
 * To ensure dependency ordering is preserved for the _relaxed and
- * _release atomics, an smp_read_barrier_depends() is unconditionally
- * inserted into the _relaxed variants, which are used to build the
- * barriered versions. Avoid redundant back-to-back fences in the
- * _acquire and _fence versions.
+ * _release atomics, an smp_mb() is unconditionally inserted into the
+ * _relaxed variants, which are used to build the barriered versions.
+ * Avoid redundant back-to-back fences in the _acquire and _fence
+ * versions.
 */
 #define __atomic_acquire_fence()
 #define __atomic_post_full_fence()

-#define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }

 #define atomic_read(v)		READ_ONCE((v)->counter)
@ -70,7 +69,7 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 	".previous"							\
 	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
 	:"Ir" (i), "m" (v->counter) : "memory");			\
-	smp_read_barrier_depends();					\
+	smp_mb();							\
 	return result;							\
 }

@ -88,7 +87,7 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
 	".previous"							\
 	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
 	:"Ir" (i), "m" (v->counter) : "memory");			\
-	smp_read_barrier_depends();					\
+	smp_mb();							\
 	return result;							\
 }

@ -123,7 +122,7 @@ static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)	\
 	".previous"							\
 	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
 	:"Ir" (i), "m" (v->counter) : "memory");			\
-	smp_read_barrier_depends();					\
+	smp_mb();							\
 	return result;							\
 }

@ -141,7 +140,7 @@ static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)	\
 	".previous"							\
 	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
 	:"Ir" (i), "m" (v->counter) : "memory");			\
-	smp_read_barrier_depends();					\
+	smp_mb();							\
 	return result;							\
 }

--- a/arch/alpha/include/asm/barrier.h
+++ b/arch/alpha/include/asm/barrier.h
@ -2,64 +2,15 @@
 #ifndef __BARRIER_H
 #define __BARRIER_H

-#include <asm/compiler.h>
-
 #define mb()	__asm__ __volatile__("mb": : :"memory")
 #define rmb()	__asm__ __volatile__("mb": : :"memory")
 #define wmb()	__asm__ __volatile__("wmb": : :"memory")

-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	b = 2;
- *	memory_barrier();
- *	p = &b;				q = p;
- *					read_barrier_depends();
- *					d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	a = 2;
- *	memory_barrier();
- *	b = 3;				y = b;
- *					read_barrier_depends();
- *					x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- */
-#define read_barrier_depends() __asm__ __volatile__("mb": : :"memory")
+#define __smp_load_acquire(p)						\
+({									\
+	compiletime_assert_atomic_type(*p);				\
+	__READ_ONCE(*p);						\
+})

 #ifdef CONFIG_SMP
 #define __ASM_SMP_MB	"\tmb\n"
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@ -277,9 +277,9 @@ extern inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= __DIRTY_BITS; retur
 extern inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= __ACCESS_BITS; return pte; }

 /*
- * The smp_read_barrier_depends() in the following functions are required to
- * order the load of *dir (the pointer in the top level page table) with any
- * subsequent load of the returned pmd_t *ret (ret is data dependent on *dir).
+ * The smp_rmb() in the following functions are required to order the load of
+ * *dir (the pointer in the top level page table) with any subsequent load of
+ * the returned pmd_t *ret (ret is data dependent on *dir).
 *
 * If this ordering is not enforced, the CPU might load an older value of
 * *ret, which may be uninitialized data. See mm/memory.c:__pte_alloc for
@ -293,7 +293,7 @@ extern inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= __ACCESS_BITS; retu
 extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address)
 {
 	pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
-	smp_read_barrier_depends(); /* see above */
+	smp_rmb(); /* see above */
 	return ret;
 }
 #define pmd_offset pmd_offset
@ -303,7 +303,7 @@ extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address)
 {
 	pte_t *ret = (pte_t *) pmd_page_vaddr(*dir)
 		+ ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1));
-	smp_read_barrier_depends(); /* see above */
+	smp_rmb(); /* see above */
 	return ret;
 }
 #define pte_offset_kernel pte_offset_kernel
--- a/arch/alpha/include/asm/rwonce.h
+++ b/arch/alpha/include/asm/rwonce.h
@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Google LLC.
+ */
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#ifdef CONFIG_SMP
+
+#include <asm/barrier.h>
+
+/*
+ * Alpha is apparently daft enough to reorder address-dependent loads
+ * on some CPU implementations. Knock some common sense into it with
+ * a memory barrier in READ_ONCE().
+ *
+ * For the curious, more information about this unusual reordering is
+ * available in chapter 15 of the "perfbook":
+ *
+ *  https://kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook.html
+ *
+ */
+#define __READ_ONCE(x)							\
+({									\
+	__unqual_scalar_typeof(x) __x =					\
+		(*(volatile typeof(__x) *)(&(x)));			\
+	mb();								\
+	(typeof(x))__x;							\
+})
+
+#endif /* CONFIG_SMP */
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_RWONCE_H */
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@ -14,8 +14,6 @@
 #include <asm/barrier.h>
 #include <asm/smp.h>

-#define ATOMIC_INIT(i)	{ (i) }
-
 #ifndef CONFIG_ARC_PLAT_EZNPS

 #define atomic_read(v)  READ_ONCE((v)->counter)
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@ -15,8 +15,6 @@
 #include <asm/barrier.h>
 #include <asm/cmpxchg.h>

-#define ATOMIC_INIT(i)	{ (i) }
-
 #ifdef __KERNEL__

 /*
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@ -5,7 +5,7 @@
 #ifndef _ASM_ARM_PERCPU_H_
 #define _ASM_ARM_PERCPU_H_

-#include <asm/thread_info.h>
+register unsigned long current_stack_pointer asm ("sp");

 /*
 * Same as asm-generic/percpu.h, except that we store the per cpu offset
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@ -75,11 +75,6 @@ struct thread_info {
 	.addr_limit	= KERNEL_DS,					\
 }

-/*
- * how to get the current stack pointer in C
- */
-register unsigned long current_stack_pointer asm ("sp");
-
 /*
 * how to get the thread information struct from C
 */
--- a/arch/arm/include/asm/vdso/gettimeofday.h
+++ b/arch/arm/include/asm/vdso/gettimeofday.h
@ -7,6 +7,7 @@

 #ifndef __ASSEMBLY__

+#include <asm/barrier.h>
 #include <asm/errno.h>
 #include <asm/unistd.h>
 #include <asm/vdso/cp15.h>
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@ -118,6 +118,7 @@ config ARM64
 	select GENERIC_STRNLEN_USER
 	select GENERIC_TIME_VSYSCALL
 	select GENERIC_GETTIMEOFDAY
+	select GENERIC_VDSO_TIME_NS
 	select HANDLE_DOMAIN_IRQ
 	select HARDIRQS_SW_RESEND
 	select HAVE_PCI
@ -1327,6 +1328,8 @@ config SWP_EMULATION
 	  ARMv8 obsoletes the use of A32 SWP/SWPB instructions such that
 	  they are always undefined. Say Y here to enable software
 	  emulation of these instructions for userspace using LDXR/STXR.
+	  This feature can be controlled at runtime with the abi.swp
+	  sysctl which is disabled by default.

 	  In some older versions of glibc [<=2.8] SWP is used during futex
 	  trylock() operations with the assumption that the code will not
@ -1353,7 +1356,8 @@ config CP15_BARRIER_EMULATION
 	  Say Y here to enable software emulation of these
 	  instructions for AArch32 userspace code. When this option is
 	  enabled, CP15 barrier usage is traced which can help
-	  identify software that needs updating.
+	  identify software that needs updating. This feature can be
+	  controlled at runtime with the abi.cp15_barrier sysctl.

 	  If unsure, say Y

@ -1364,7 +1368,8 @@ config SETEND_EMULATION
 	  AArch32 EL0, and is deprecated in ARMv8.

 	  Say Y here to enable software emulation of the instruction
-	  for AArch32 userspace code.
+	  for AArch32 userspace code. This feature can be controlled
+	  at runtime with the abi.setend sysctl.

 	  Note: All the cpus on the system must have mixed endian support at EL0
 	  for this feature to be enabled. If a new CPU - which doesn't support mixed
@ -1596,6 +1601,20 @@ config ARM64_AMU_EXTN
 	  correctly reflect reality. Most commonly, the value read will be 0,
 	  indicating that the counter is not enabled.

+config AS_HAS_ARMV8_4
+	def_bool $(cc-option,-Wa$(comma)-march=armv8.4-a)
+
+config ARM64_TLB_RANGE
+	bool "Enable support for tlbi range feature"
+	default y
+	depends on AS_HAS_ARMV8_4
+	help
+	  ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a
+	  range of input addresses.
+
+	  The feature introduces new assembly instructions, and they were
+	  support when binutils >= 2.30.
+
 endmenu

 menu "ARMv8.5 architectural features"
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@ -82,11 +82,18 @@ endif
 # compiler to generate them and consequently to break the single image contract
 # we pass it only to the assembler. This option is utilized only in case of non
 # integrated assemblers.
+ifneq ($(CONFIG_AS_HAS_ARMV8_4), y)
 branch-prot-flags-$(CONFIG_AS_HAS_PAC) += -Wa,-march=armv8.3-a
 endif
+endif

 KBUILD_CFLAGS += $(branch-prot-flags-y)

+ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
+# make sure to pass the newest target architecture to -march.
+KBUILD_CFLAGS	+= -Wa,-march=armv8.4-a
+endif
+
 ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
 KBUILD_CFLAGS	+= -ffixed-x18
 endif
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@ -68,6 +68,7 @@ CONFIG_SCHED_SMT=y
 CONFIG_NUMA=y
 CONFIG_SECCOMP=y
 CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
 CONFIG_CRASH_DUMP=y
 CONFIG_XEN=y
 CONFIG_COMPAT=y
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@ -47,20 +47,7 @@
 pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);

 /* ACPI table mapping after acpi_permanent_mmap is set */
-static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
-					    acpi_size size)
-{
-	/* For normal memory we already have a cacheable mapping. */
-	if (memblock_is_map_memory(phys))
-		return (void __iomem *)__phys_to_virt(phys);
-
-	/*
-	 * We should still honor the memory's attribute here because
-	 * crash dump kernel possibly excludes some ACPI (reclaim)
-	 * regions from memblock list.
-	 */
-	return __ioremap(phys, size, __acpi_get_mem_attribute(phys));
-}
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
 #define acpi_os_ioremap acpi_os_ioremap

 typedef u64 phys_cpuid_t;
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@ -99,8 +99,6 @@ static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
 	return __lse_ll_sc_body(atomic64_dec_if_positive, v);
 }

-#define ATOMIC_INIT(i)	{ (i) }
-
 #define arch_atomic_read(v)			__READ_ONCE((v)->counter)
 #define arch_atomic_set(v, i)			__WRITE_ONCE(((v)->counter), (i))

--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@ -62,7 +62,9 @@
 #define ARM64_HAS_GENERIC_AUTH			52
 #define ARM64_HAS_32BIT_EL1			53
 #define ARM64_BTI				54
+#define ARM64_HAS_ARMv8_4_TTL			55
+#define ARM64_HAS_TLB_RANGE			56

-#define ARM64_NCAPS				55
+#define ARM64_NCAPS				57

 #endif /* __ASM_CPUCAPS_H */
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@ -692,6 +692,12 @@ static inline bool system_supports_bti(void)
 	return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI);
 }

+static inline bool system_supports_tlb_range(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) &&
+		cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
+}
+
 #define ARM64_BP_HARDEN_UNKNOWN		-1
 #define ARM64_BP_HARDEN_WA_NEEDED	0
 #define ARM64_BP_HARDEN_NOT_REQUIRED	1
@ -774,6 +780,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
 }

 u32 get_kvm_ipa_limit(void);
+void dump_cpu_features(void);

 #endif /* __ASSEMBLY__ */

--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@ -49,6 +49,8 @@ extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 				 pte_t *ptep, pte_t pte, unsigned long sz);
 #define set_huge_swap_pte_at set_huge_swap_pte_at

+void __init arm64_hugetlb_cma_reserve(void);
+
 #include <asm-generic/hugetlb.h>

 #endif /* __ASM_HUGETLB_H */
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@ -95,6 +95,7 @@
 #define KERNEL_HWCAP_DGH		__khwcap2_feature(DGH)
 #define KERNEL_HWCAP_RNG		__khwcap2_feature(RNG)
 #define KERNEL_HWCAP_BTI		__khwcap2_feature(BTI)
+/* reserved for KERNEL_HWCAP_MTE	__khwcap2_feature(MTE) */

 /*
 * This yields a mask that user programs can use to figure out what
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@ -8,7 +8,7 @@
 #ifndef __ASM_KERNEL_PGTABLE_H
 #define __ASM_KERNEL_PGTABLE_H

-#include <linux/pgtable.h>
+#include <asm/pgtable-hwdef.h>
 #include <asm/sparsemem.h>

 /*
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@ -10,11 +10,8 @@
 #ifndef __ASM_MEMORY_H
 #define __ASM_MEMORY_H

-#include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/sizes.h>
-#include <linux/types.h>
-#include <asm/bug.h>
 #include <asm/page-def.h>

 /*
@ -157,11 +154,15 @@
 #endif

 #ifndef __ASSEMBLY__
-extern u64			vabits_actual;
-#define PAGE_END		(_PAGE_END(vabits_actual))

 #include <linux/bitops.h>
+#include <linux/compiler.h>
 #include <linux/mmdebug.h>
+#include <linux/types.h>
+#include <asm/bug.h>
+
+extern u64			vabits_actual;
+#define PAGE_END		(_PAGE_END(vabits_actual))

 extern s64			physvirt_offset;
 extern s64			memstart_addr;
@ -322,6 +323,7 @@ static inline void *phys_to_virt(phys_addr_t x)
 	__is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr));	\
 })

+void dump_mem_limit(void);
 #endif /* !ASSEMBLY */

 /*
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@ -175,7 +175,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp)
 * take CPU migration into account.
 */
 #define destroy_context(mm)		do { } while(0)
-void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
+void check_and_switch_context(struct mm_struct *mm);

 #define init_new_context(tsk,mm)	({ atomic64_set(&(mm)->context.id, 0); 0; })

@ -214,8 +214,6 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)

 static inline void __switch_mm(struct mm_struct *next)
 {
-	unsigned int cpu = smp_processor_id();
-
 	/*
 	 * init_mm.pgd does not contain any user mappings and it is always
 	 * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
@ -225,7 +223,7 @@ static inline void __switch_mm(struct mm_struct *next)
 		return;
 	}

-	check_and_switch_context(next, cpu);
+	check_and_switch_context(next);
 }

 static inline void
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@ -72,6 +72,13 @@
 #define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD				0x36
 #define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD			0x37
 #define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD			0x38
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD			0x39
+#define ARMV8_PMUV3_PERFCTR_OP_RETIRED				0x3A
+#define ARMV8_PMUV3_PERFCTR_OP_SPEC				0x3B
+#define ARMV8_PMUV3_PERFCTR_STALL				0x3C
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND			0x3D
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND			0x3E
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT				0x3F

 /* Statistical profiling extension microarchitectural events */
 #define	ARMV8_SPE_PERFCTR_SAMPLE_POP				0x4000
@ -79,6 +86,26 @@
 #define	ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE			0x4002
 #define	ARMV8_SPE_PERFCTR_SAMPLE_COLLISION			0x4003

+/* AMUv1 architecture events */
+#define	ARMV8_AMU_PERFCTR_CNT_CYCLES				0x4004
+#define	ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM			0x4005
+
+/* long-latency read miss events */
+#define	ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS			0x4006
+#define	ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD			0x4009
+#define	ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS			0x400A
+#define	ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD			0x400B
+
+/* additional latency from alignment events */
+#define	ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT			0x4020
+#define	ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT			0x4021
+#define	ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT			0x4022
+
+/* Armv8.5 Memory Tagging Extension events */
+#define	ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED			0x4024
+#define	ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD			0x4025
+#define	ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR			0x4026
+
 /* ARMv8 recommended implementation defined event types */
 #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD			0x40
 #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR			0x41
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@ -29,7 +29,7 @@
 * Size mapped by an entry at level n ( 0 <= n <= 3)
 * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
 * in the final page. The maximum number of translation levels supported by
- * the architecture is 4. Hence, starting at at level n, we have further
+ * the architecture is 4. Hence, starting at level n, we have further
 * ((4 - n) - 1) levels of translation excluding the offset within the page.
 * So, the total number of bits mapped by an entry at level n is :
 *
@ -82,23 +82,23 @@
 * Contiguous page definitions.
 */
 #ifdef CONFIG_ARM64_64K_PAGES
-#define CONT_PTE_SHIFT		5
-#define CONT_PMD_SHIFT		5
+#define CONT_PTE_SHIFT		(5 + PAGE_SHIFT)
+#define CONT_PMD_SHIFT		(5 + PMD_SHIFT)
 #elif defined(CONFIG_ARM64_16K_PAGES)
-#define CONT_PTE_SHIFT		7
-#define CONT_PMD_SHIFT		5
+#define CONT_PTE_SHIFT		(7 + PAGE_SHIFT)
+#define CONT_PMD_SHIFT		(5 + PMD_SHIFT)
 #else
-#define CONT_PTE_SHIFT		4
-#define CONT_PMD_SHIFT		4
+#define CONT_PTE_SHIFT		(4 + PAGE_SHIFT)
+#define CONT_PMD_SHIFT		(4 + PMD_SHIFT)
 #endif

-#define CONT_PTES		(1 << CONT_PTE_SHIFT)
+#define CONT_PTES		(1 << (CONT_PTE_SHIFT - PAGE_SHIFT))
 #define CONT_PTE_SIZE		(CONT_PTES * PAGE_SIZE)
 #define CONT_PTE_MASK		(~(CONT_PTE_SIZE - 1))
-#define CONT_PMDS		(1 << CONT_PMD_SHIFT)
+#define CONT_PMDS		(1 << (CONT_PMD_SHIFT - PMD_SHIFT))
 #define CONT_PMD_SIZE		(CONT_PMDS * PMD_SIZE)
 #define CONT_PMD_MASK		(~(CONT_PMD_SIZE - 1))
-/* the the numerical offset of the PTE within a range of CONT_PTES */
+/* the numerical offset of the PTE within a range of CONT_PTES */
 #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))

 /*
@ -178,10 +178,12 @@
 #define PTE_S2_RDONLY		(_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
 #define PTE_S2_RDWR		(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 #define PTE_S2_XN		(_AT(pteval_t, 2) << 53)  /* XN[1:0] */
+#define PTE_S2_SW_RESVD		(_AT(pteval_t, 15) << 55) /* Reserved for SW */

 #define PMD_S2_RDONLY		(_AT(pmdval_t, 1) << 6)   /* HAP[2:1] */
 #define PMD_S2_RDWR		(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
 #define PMD_S2_XN		(_AT(pmdval_t, 2) << 53)  /* XN[1:0] */
+#define PMD_S2_SW_RESVD		(_AT(pmdval_t, 15) << 55) /* Reserved for SW */

 #define PUD_S2_RDONLY		(_AT(pudval_t, 1) << 6)   /* HAP[2:1] */
 #define PUD_S2_RDWR		(_AT(pudval_t, 3) << 6)   /* HAP[2:1] */
@ -216,6 +218,7 @@
 #define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
 #define TCR_TxSZ_WIDTH		6
 #define TCR_T0SZ_MASK		(((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ_MASK		(((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T1SZ_OFFSET)

 #define TCR_EPD0_SHIFT		7
 #define TCR_EPD0_MASK		(UL(1) << TCR_EPD0_SHIFT)
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@ -40,6 +40,16 @@ extern void __pmd_error(const char *file, int line, unsigned long val);
 extern void __pud_error(const char *file, int line, unsigned long val);
 extern void __pgd_error(const char *file, int line, unsigned long val);

+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+
+/* Set stride and tlb_level in flush_*_tlb_range */
+#define flush_pmd_tlb_range(vma, addr, end)	\
+	__flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2)
+#define flush_pud_tlb_range(vma, addr, end)	\
+	__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
 /*
 * ZERO_PAGE is a global shared page that is always zero: used
 * for zero-mapped memory areas etc..
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@ -27,7 +27,7 @@
 *
 * Some code sections either automatically switch back to PSR.I or explicitly
 * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
- * in the  the priority mask, it indicates that PSR.I should be set and
+ * in the priority mask, it indicates that PSR.I should be set and
 * interrupt disabling temporarily does not rely on IRQ priorities.
 */
 #define GIC_PRIO_IRQON			0xe0
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@ -256,4 +256,13 @@ stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 	return (boundary - 1 < end - 1) ? boundary : end;
 }

+/*
+ * Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and
+ * the architectural page-table level.
+ */
+#define S2_NO_LEVEL_HINT	0
+#define S2_PUD_LEVEL		1
+#define S2_PMD_LEVEL		2
+#define S2_PTE_LEVEL		3
+
 #endif	/* __ARM64_S2_PGTABLE_H_ */
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@ -421,9 +421,9 @@
 */

 #define SYS_AMEVCNTR0_EL0(n)		SYS_AM_EL0(4 + ((n) >> 3), (n) & 7)
-#define SYS_AMEVTYPE0_EL0(n)		SYS_AM_EL0(6 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVTYPER0_EL0(n)		SYS_AM_EL0(6 + ((n) >> 3), (n) & 7)
 #define SYS_AMEVCNTR1_EL0(n)		SYS_AM_EL0(12 + ((n) >> 3), (n) & 7)
-#define SYS_AMEVTYPE1_EL0(n)		SYS_AM_EL0(14 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVTYPER1_EL0(n)		SYS_AM_EL0(14 + ((n) >> 3), (n) & 7)

 /* AMU v1: Fixed (architecturally defined) activity monitors */
 #define SYS_AMEVCNTR0_CORE_EL0		SYS_AMEVCNTR0_EL0(0)
@ -617,6 +617,9 @@
 #define ID_AA64ISAR0_SHA1_SHIFT		8
 #define ID_AA64ISAR0_AES_SHIFT		4

+#define ID_AA64ISAR0_TLB_RANGE_NI	0x0
+#define ID_AA64ISAR0_TLB_RANGE		0x2
+
 /* id_aa64isar1 */
 #define ID_AA64ISAR1_I8MM_SHIFT		52
 #define ID_AA64ISAR1_DGH_SHIFT		48
@ -706,6 +709,9 @@
 #define ID_AA64ZFR0_SVEVER_SVE2		0x1

 /* id_aa64mmfr0 */
+#define ID_AA64MMFR0_ECV_SHIFT		60
+#define ID_AA64MMFR0_FGT_SHIFT		56
+#define ID_AA64MMFR0_EXS_SHIFT		44
 #define ID_AA64MMFR0_TGRAN4_2_SHIFT	40
 #define ID_AA64MMFR0_TGRAN64_2_SHIFT	36
 #define ID_AA64MMFR0_TGRAN16_2_SHIFT	32
@ -734,6 +740,10 @@
 #endif

 /* id_aa64mmfr1 */
+#define ID_AA64MMFR1_ETS_SHIFT		36
+#define ID_AA64MMFR1_TWED_SHIFT		32
+#define ID_AA64MMFR1_XNX_SHIFT		28
+#define ID_AA64MMFR1_SPECSEI_SHIFT	24
 #define ID_AA64MMFR1_PAN_SHIFT		20
 #define ID_AA64MMFR1_LOR_SHIFT		16
 #define ID_AA64MMFR1_HPD_SHIFT		12
@ -746,8 +756,15 @@

 /* id_aa64mmfr2 */
 #define ID_AA64MMFR2_E0PD_SHIFT		60
+#define ID_AA64MMFR2_EVT_SHIFT		56
+#define ID_AA64MMFR2_BBM_SHIFT		52
+#define ID_AA64MMFR2_TTL_SHIFT		48
 #define ID_AA64MMFR2_FWB_SHIFT		40
+#define ID_AA64MMFR2_IDS_SHIFT		36
 #define ID_AA64MMFR2_AT_SHIFT		32
+#define ID_AA64MMFR2_ST_SHIFT		28
+#define ID_AA64MMFR2_NV_SHIFT		24
+#define ID_AA64MMFR2_CCIDX_SHIFT	20
 #define ID_AA64MMFR2_LVA_SHIFT		16
 #define ID_AA64MMFR2_IESB_SHIFT		12
 #define ID_AA64MMFR2_LSM_SHIFT		8
@ -755,6 +772,7 @@
 #define ID_AA64MMFR2_CNP_SHIFT		0

 /* id_aa64dfr0 */
+#define ID_AA64DFR0_DOUBLELOCK_SHIFT	36
 #define ID_AA64DFR0_PMSVER_SHIFT	32
 #define ID_AA64DFR0_CTX_CMPS_SHIFT	28
 #define ID_AA64DFR0_WRPS_SHIFT		20
@ -807,18 +825,40 @@
 #define ID_ISAR6_DP_SHIFT		4
 #define ID_ISAR6_JSCVT_SHIFT		0

+#define ID_MMFR0_INNERSHR_SHIFT		28
+#define ID_MMFR0_FCSE_SHIFT		24
+#define ID_MMFR0_AUXREG_SHIFT		20
+#define ID_MMFR0_TCM_SHIFT		16
+#define ID_MMFR0_SHARELVL_SHIFT		12
+#define ID_MMFR0_OUTERSHR_SHIFT		8
+#define ID_MMFR0_PMSA_SHIFT		4
+#define ID_MMFR0_VMSA_SHIFT		0
+
 #define ID_MMFR4_EVT_SHIFT		28
 #define ID_MMFR4_CCIDX_SHIFT		24
 #define ID_MMFR4_LSM_SHIFT		20
 #define ID_MMFR4_HPDS_SHIFT		16
 #define ID_MMFR4_CNP_SHIFT		12
 #define ID_MMFR4_XNX_SHIFT		8
+#define ID_MMFR4_AC2_SHIFT		4
 #define ID_MMFR4_SPECSEI_SHIFT		0

 #define ID_MMFR5_ETS_SHIFT		0

 #define ID_PFR0_DIT_SHIFT		24
 #define ID_PFR0_CSV2_SHIFT		16
+#define ID_PFR0_STATE3_SHIFT		12
+#define ID_PFR0_STATE2_SHIFT		8
+#define ID_PFR0_STATE1_SHIFT		4
+#define ID_PFR0_STATE0_SHIFT		0
+
+#define ID_DFR0_PERFMON_SHIFT		24
+#define ID_DFR0_MPROFDBG_SHIFT		20
+#define ID_DFR0_MMAPTRC_SHIFT		16
+#define ID_DFR0_COPTRC_SHIFT		12
+#define ID_DFR0_MMAPDBG_SHIFT		8
+#define ID_DFR0_COPSDBG_SHIFT		4
+#define ID_DFR0_COPDBG_SHIFT		0

 #define ID_PFR2_SSBS_SHIFT		4
 #define ID_PFR2_CSV3_SHIFT		0
@ -861,6 +901,11 @@
 #define ID_AA64MMFR0_TGRAN_SUPPORTED	ID_AA64MMFR0_TGRAN64_SUPPORTED
 #endif

+#define MVFR2_FPMISC_SHIFT		4
+#define MVFR2_SIMDMISC_SHIFT		0
+
+#define DCZID_DZP_SHIFT			4
+#define DCZID_BS_SHIFT			0

 /*
 * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@ -21,11 +21,37 @@ static void tlb_flush(struct mmu_gather *tlb);

 #include <asm-generic/tlb.h>

+/*
+ * get the tlbi levels in arm64.  Default value is 0 if more than one
+ * of cleared_* is set or neither is set.
+ * Arm64 doesn't support p4ds now.
+ */
+static inline int tlb_get_level(struct mmu_gather *tlb)
+{
+	if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
+				   tlb->cleared_puds ||
+				   tlb->cleared_p4ds))
+		return 3;
+
+	if (tlb->cleared_pmds && !(tlb->cleared_ptes ||
+				   tlb->cleared_puds ||
+				   tlb->cleared_p4ds))
+		return 2;
+
+	if (tlb->cleared_puds && !(tlb->cleared_ptes ||
+				   tlb->cleared_pmds ||
+				   tlb->cleared_p4ds))
+		return 1;
+
+	return 0;
+}
+
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
 	bool last_level = !tlb->freed_tables;
 	unsigned long stride = tlb_get_unmap_size(tlb);
+	int tlb_level = tlb_get_level(tlb);

 	/*
 	 * If we're tearing down the address space then we only care about
@ -38,7 +64,8 @@ static inline void tlb_flush(struct mmu_gather *tlb)
 		return;
 	}

-	__flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level);
+	__flush_tlb_range(&vma, tlb->start, tlb->end, stride,
+			  last_level, tlb_level);
 }

 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@ -10,6 +10,7 @@

 #ifndef __ASSEMBLY__

+#include <linux/bitfield.h>
 #include <linux/mm_types.h>
 #include <linux/sched.h>
 #include <asm/cputype.h>
@ -59,6 +60,102 @@
 		__ta;						\
 	})

+/*
+ * Get translation granule of the system, which is decided by
+ * PAGE_SIZE.  Used by TTL.
+ *  - 4KB	: 1
+ *  - 16KB	: 2
+ *  - 64KB	: 3
+ */
+#define TLBI_TTL_TG_4K		1
+#define TLBI_TTL_TG_16K		2
+#define TLBI_TTL_TG_64K		3
+
+static inline unsigned long get_trans_granule(void)
+{
+	switch (PAGE_SIZE) {
+	case SZ_4K:
+		return TLBI_TTL_TG_4K;
+	case SZ_16K:
+		return TLBI_TTL_TG_16K;
+	case SZ_64K:
+		return TLBI_TTL_TG_64K;
+	default:
+		return 0;
+	}
+}
+
+/*
+ * Level-based TLBI operations.
+ *
+ * When ARMv8.4-TTL exists, TLBI operations take an additional hint for
+ * the level at which the invalidation must take place. If the level is
+ * wrong, no invalidation may take place. In the case where the level
+ * cannot be easily determined, a 0 value for the level parameter will
+ * perform a non-hinted invalidation.
+ *
+ * For Stage-2 invalidation, use the level values provided to that effect
+ * in asm/stage2_pgtable.h.
+ */
+#define TLBI_TTL_MASK		GENMASK_ULL(47, 44)
+
+#define __tlbi_level(op, addr, level) do {				\
+	u64 arg = addr;							\
+									\
+	if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) &&		\
+	    level) {							\
+		u64 ttl = level & 3;					\
+		ttl |= get_trans_granule() << 2;			\
+		arg &= ~TLBI_TTL_MASK;					\
+		arg |= FIELD_PREP(TLBI_TTL_MASK, ttl);			\
+	}								\
+									\
+	__tlbi(op, arg);						\
+} while(0)
+
+#define __tlbi_user_level(op, arg, level) do {				\
+	if (arm64_kernel_unmapped_at_el0())				\
+		__tlbi_level(op, (arg | USER_ASID_FLAG), level);	\
+} while (0)
+
+/*
+ * This macro creates a properly formatted VA operand for the TLB RANGE.
+ * The value bit assignments are:
+ *
+ * +----------+------+-------+-------+-------+----------------------+
+ * |   ASID   |  TG  | SCALE |  NUM  |  TTL  |        BADDR         |
+ * +-----------------+-------+-------+-------+----------------------+
+ * |63      48|47  46|45   44|43   39|38   37|36                   0|
+ *
+ * The address range is determined by below formula:
+ * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
+ *
+ */
+#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl)		\
+	({							\
+		unsigned long __ta = (addr) >> PAGE_SHIFT;	\
+		__ta &= GENMASK_ULL(36, 0);			\
+		__ta |= (unsigned long)(ttl) << 37;		\
+		__ta |= (unsigned long)(num) << 39;		\
+		__ta |= (unsigned long)(scale) << 44;		\
+		__ta |= get_trans_granule() << 46;		\
+		__ta |= (unsigned long)(asid) << 48;		\
+		__ta;						\
+	})
+
+/* These macros are used by the TLBI RANGE feature. */
+#define __TLBI_RANGE_PAGES(num, scale)	\
+	((unsigned long)((num) + 1) << (5 * (scale) + 1))
+#define MAX_TLBI_RANGE_PAGES		__TLBI_RANGE_PAGES(31, 3)
+
+/*
+ * Generate 'num' values from -1 to 30 with -1 rejected by the
+ * __flush_tlb_range() loop below.
+ */
+#define TLBI_RANGE_MASK			GENMASK_ULL(4, 0)
+#define __TLBI_RANGE_NUM(pages, scale)	\
+	((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+
 /*
 *	TLB Invalidation
 *	================
@ -179,34 +276,83 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,

 static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end,
-				     unsigned long stride, bool last_level)
+				     unsigned long stride, bool last_level,
+				     int tlb_level)
 {
+	int num = 0;
+	int scale = 0;
 	unsigned long asid = ASID(vma->vm_mm);
 	unsigned long addr;
+	unsigned long pages;

 	start = round_down(start, stride);
 	end = round_up(end, stride);
+	pages = (end - start) >> PAGE_SHIFT;

-	if ((end - start) >= (MAX_TLBI_OPS * stride)) {
+	/*
+	 * When not uses TLB range ops, we can handle up to
+	 * (MAX_TLBI_OPS - 1) pages;
+	 * When uses TLB range ops, we can handle up to
+	 * (MAX_TLBI_RANGE_PAGES - 1) pages.
+	 */
+	if ((!system_supports_tlb_range() &&
+	     (end - start) >= (MAX_TLBI_OPS * stride)) ||
+	    pages >= MAX_TLBI_RANGE_PAGES) {
 		flush_tlb_mm(vma->vm_mm);
 		return;
 	}

-	/* Convert the stride into units of 4k */
-	stride >>= 12;
-
-	start = __TLBI_VADDR(start, asid);
-	end = __TLBI_VADDR(end, asid);
-
 	dsb(ishst);
-	for (addr = start; addr < end; addr += stride) {
-		if (last_level) {
-			__tlbi(vale1is, addr);
-			__tlbi_user(vale1is, addr);
-		} else {
-			__tlbi(vae1is, addr);
-			__tlbi_user(vae1is, addr);
+
+	/*
+	 * When the CPU does not support TLB range operations, flush the TLB
+	 * entries one by one at the granularity of 'stride'. If the the TLB
+	 * range ops are supported, then:
+	 *
+	 * 1. If 'pages' is odd, flush the first page through non-range
+	 *    operations;
+	 *
+	 * 2. For remaining pages: the minimum range granularity is decided
+	 *    by 'scale', so multiple range TLBI operations may be required.
+	 *    Start from scale = 0, flush the corresponding number of pages
+	 *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+	 *    until no pages left.
+	 *
+	 * Note that certain ranges can be represented by either num = 31 and
+	 * scale or num = 0 and scale + 1. The loop below favours the latter
+	 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+	 */
+	while (pages > 0) {
+		if (!system_supports_tlb_range() ||
+		    pages % 2 == 1) {
+			addr = __TLBI_VADDR(start, asid);
+			if (last_level) {
+				__tlbi_level(vale1is, addr, tlb_level);
+				__tlbi_user_level(vale1is, addr, tlb_level);
+			} else {
+				__tlbi_level(vae1is, addr, tlb_level);
+				__tlbi_user_level(vae1is, addr, tlb_level);
+			}
+			start += stride;
+			pages -= stride >> PAGE_SHIFT;
+			continue;
 		}
+
+		num = __TLBI_RANGE_NUM(pages, scale);
+		if (num >= 0) {
+			addr = __TLBI_VADDR_RANGE(start, asid, scale,
+						  num, tlb_level);
+			if (last_level) {
+				__tlbi(rvale1is, addr);
+				__tlbi_user(rvale1is, addr);
+			} else {
+				__tlbi(rvae1is, addr);
+				__tlbi_user(rvae1is, addr);
+			}
+			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
+			pages -= __TLBI_RANGE_PAGES(num, scale);
+		}
+		scale++;
 	}
 	dsb(ish);
 }
@ -217,8 +363,9 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 	/*
 	 * We cannot use leaf-only invalidation here, since we may be invalidating
 	 * table entries as part of collapsing hugepages or moving page tables.
+	 * Set the tlb_level to 0 because we can not get enough information here.
 	 */
-	__flush_tlb_range(vma, start, end, PAGE_SIZE, false);
+	__flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
 }

 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@ -19,6 +19,7 @@
 #include <linux/string.h>

 #include <asm/cpufeature.h>
+#include <asm/mmu.h>
 #include <asm/ptrace.h>
 #include <asm/memory.h>
 #include <asm/extable.h>
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@ -12,6 +12,8 @@
 */
 #define VDSO_LBASE	0x0

+#define __VVAR_PAGES    2
+
 #ifndef __ASSEMBLY__

 #include <generated/vdso-offsets.h>
--- a/Show More
+++ b/Show More