From 3f225f29c69c13ce1cbdb1d607a42efeef080056 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 9 Jan 2023 18:48:00 +0100 Subject: [PATCH 01/92] arm64: Stash shadow stack pointer in the task struct on interrupt commit 59b37fe52f49955791a460752c37145f1afdcad1 upstream. Instead of reloading the shadow call stack pointer from the ordinary stack, which may be vulnerable to the kind of gadget based attacks shadow call stacks were designed to prevent, let's store a task's shadow call stack pointer in the task struct when switching to the shadow IRQ stack. Given that currently, the task_struct::scs_sp field is only used to preserve the shadow call stack pointer while a task is scheduled out or running in user space, reusing this field to preserve and restore it while running off the IRQ stack must be safe, as those occurrences are guaranteed to never overlap. (The stack switching logic only switches stacks when running from the task stack, and so the value being saved here always corresponds to the task mode shadow stack) While at it, fold a mov/add/mov sequence into a single add. Signed-off-by: Ard Biesheuvel Reviewed-by: Kees Cook Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230109174800.3286265-3-ardb@kernel.org Signed-off-by: Catalin Marinas [ardb: v5.10 backport, which doesn't have call_on_irq_stack() yet *] Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 28d4cdeee5ae..55e477f73158 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -431,9 +431,7 @@ SYM_CODE_END(__swpan_exit_el0) .macro irq_stack_entry mov x19, sp // preserve the original sp -#ifdef CONFIG_SHADOW_CALL_STACK - mov x24, scs_sp // preserve the original shadow stack -#endif + scs_save tsk // preserve the original shadow stack /* * Compare sp with the base of the task stack. @@ -467,9 +465,7 @@ SYM_CODE_END(__swpan_exit_el0) */ .macro irq_stack_exit mov sp, x19 -#ifdef CONFIG_SHADOW_CALL_STACK - mov scs_sp, x24 -#endif + scs_load_current .endm /* GPRs used by entry code */ From 9079ff34a1ac65c52eb825840960257b68e49d47 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 1 May 2023 17:42:06 +0200 Subject: [PATCH 02/92] debugobject: Ensure pool refill (again) commit 0af462f19e635ad522f28981238334620881badc upstream. The recent fix to ensure atomicity of lookup and allocation inadvertently broke the pool refill mechanism. Prior to that change debug_objects_activate() and debug_objecs_assert_init() invoked debug_objecs_init() to set up the tracking object for statically initialized objects. That's not longer the case and debug_objecs_init() is now the only place which does pool refills. Depending on the number of statically initialized objects this can be enough to actually deplete the pool, which was observed by Ido via a debugobjects OOM warning. Restore the old behaviour by adding explicit refill opportunities to debug_objects_activate() and debug_objecs_assert_init(). Fixes: 63a759694eed ("debugobject: Prevent init race with static objects") Reported-by: Ido Schimmel Signed-off-by: Thomas Gleixner Tested-by: Ido Schimmel Link: https://lore.kernel.org/r/871qk05a9d.ffs@tglx Signed-off-by: Greg Kroah-Hartman --- lib/debugobjects.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 8282ae37db4e..824337ec36aa 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -590,6 +590,16 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket return NULL; } +static void debug_objects_fill_pool(void) +{ + /* + * On RT enabled kernels the pool refill must happen in preemptible + * context: + */ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) + fill_pool(); +} + static void __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack) { @@ -598,7 +608,7 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack struct debug_obj *obj; unsigned long flags; - fill_pool(); + debug_objects_fill_pool(); db = get_bucket((unsigned long) addr); @@ -683,6 +693,8 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr) if (!debug_objects_enabled) return 0; + debug_objects_fill_pool(); + db = get_bucket((unsigned long) addr); raw_spin_lock_irqsave(&db->lock, flags); @@ -892,6 +904,8 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr) if (!debug_objects_enabled) return; + debug_objects_fill_pool(); + db = get_bucket((unsigned long) addr); raw_spin_lock_irqsave(&db->lock, flags); From c820c05c5ff6613ad424dce1cddc459ffd2fb96b Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Thu, 14 Apr 2022 17:19:38 +0800 Subject: [PATCH 03/92] sound/oss/dmasound: fix 'dmasound_setup' defined but not used commit 357ad4d898286b94aaae0cb7e3f573459e5b98b9 upstream. We observed: 'dmasound_setup' defined but not used error with COMPILER=gcc ARCH=m68k DEFCONFIG=allmodconfig build. Fix it by adding __maybe_unused to dmasound_setup. Error(s): sound/oss/dmasound/dmasound_core.c:1431:12: error: 'dmasound_setup' defined but not used [-Werror=unused-function] Fixes: 9dd7c46346ca ("sound/oss/dmasound: fix build when drivers are mixed =y/=m") Signed-off-by: Miles Chen Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20220414091940.2216-1-miles.chen@mediatek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/oss/dmasound/dmasound_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c index 7a8698ff7f54..7454b058dda5 100644 --- a/sound/oss/dmasound/dmasound_core.c +++ b/sound/oss/dmasound/dmasound_core.c @@ -1442,7 +1442,7 @@ void dmasound_deinit(void) unregister_sound_dsp(sq_unit); } -static int dmasound_setup(char *str) +static int __maybe_unused dmasound_setup(char *str) { int ints[6], size; From 14fc6af67b3f54f6d5a02e8066b4981762ea17d1 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Fri, 6 Jan 2023 17:46:18 +0100 Subject: [PATCH 04/92] arm64: dts: qcom: sdm845: correct dynamic power coefficients commit 44750f153699b6e4f851a399287e5c8df208d696 upstream. While stressing EAS on my dragonboard RB3, I have noticed that LITTLE cores where never selected as the most energy efficient CPU whatever the utilization level of waking task. energy model framework uses its cost field to estimate the energy with the formula: nrg = cost of the selected OPP * utilization / CPU's max capacity which ends up selecting the CPU with lowest cost / max capacity ration as long as the utilization fits in the OPP's capacity. If we compare the cost of a little OPP with similar capacity of a big OPP like : OPP(kHz) OPP capacity cost max capacity cost/max capacity LITTLE 1766400 407 351114 407 863 big 1056000 408 520267 1024 508 This can be interpreted as the LITTLE core consumes 70% more than big core for the same compute capacity. According to [1], LITTLE consumes 10% less than big core for Coremark benchmark at those OPPs. If we consider that everything else stays unchanged, the dynamic-power-coefficient of LITTLE core should be only 53% of the current value: 290 * 53% = 154 Set the dynamic-power-coefficient of CPU0-3 to 154 to fix the energy model. [1] https://github.com/kdrag0n/freqbench/tree/master/results/sdm845/main Fixes: 0e0a8e35d725 ("arm64: dts: qcom: sdm845: correct dynamic power coefficients") Signed-off-by: Vincent Guittot Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230106164618.1845281-1-vincent.guittot@linaro.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 28fbd728304d..71e5b9fdc9e1 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -197,7 +197,7 @@ CPU0: cpu@0 { &LITTLE_CPU_SLEEP_1 &CLUSTER_SLEEP_0>; capacity-dmips-mhz = <611>; - dynamic-power-coefficient = <290>; + dynamic-power-coefficient = <154>; qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>, @@ -222,7 +222,7 @@ CPU1: cpu@100 { &LITTLE_CPU_SLEEP_1 &CLUSTER_SLEEP_0>; capacity-dmips-mhz = <611>; - dynamic-power-coefficient = <290>; + dynamic-power-coefficient = <154>; qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>, @@ -244,7 +244,7 @@ CPU2: cpu@200 { &LITTLE_CPU_SLEEP_1 &CLUSTER_SLEEP_0>; capacity-dmips-mhz = <611>; - dynamic-power-coefficient = <290>; + dynamic-power-coefficient = <154>; qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>, @@ -266,7 +266,7 @@ CPU3: cpu@300 { &LITTLE_CPU_SLEEP_1 &CLUSTER_SLEEP_0>; capacity-dmips-mhz = <611>; - dynamic-power-coefficient = <290>; + dynamic-power-coefficient = <154>; qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>, From a222d2794c53f8165de20aa91b39e35e4b72bce9 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sat, 15 May 2021 16:03:15 +0900 Subject: [PATCH 05/92] scsi: target: core: Avoid smp_processor_id() in preemptible code commit 70ca3c57ff914113f681e657634f7fbfa68e1ad1 upstream. The BUG message "BUG: using smp_processor_id() in preemptible [00000000] code" was observed for TCMU devices with kernel config DEBUG_PREEMPT. The message was observed when blktests block/005 was run on TCMU devices with fileio backend or user:zbc backend [1]. The commit 1130b499b4a7 ("scsi: target: tcm_loop: Use LIO wq cmd submission helper") triggered the symptom. The commit modified work queue to handle commands and changed 'current->nr_cpu_allowed' at smp_processor_id() call. The message was also observed at system shutdown when TCMU devices were not cleaned up [2]. The function smp_processor_id() was called in SCSI host work queue for abort handling, and triggered the BUG message. This symptom was observed regardless of the commit 1130b499b4a7 ("scsi: target: tcm_loop: Use LIO wq cmd submission helper"). To avoid the preemptible code check at smp_processor_id(), get CPU ID with raw_smp_processor_id() instead. The CPU ID is used for performance improvement then thread move to other CPU will not affect the code. [1] [ 56.468103] run blktests block/005 at 2021-05-12 14:16:38 [ 57.369473] check_preemption_disabled: 85 callbacks suppressed [ 57.369480] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1511 [ 57.369506] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1510 [ 57.369512] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1506 [ 57.369552] caller is __target_init_cmd+0x157/0x170 [target_core_mod] [ 57.369606] CPU: 4 PID: 1506 Comm: fio Not tainted 5.13.0-rc1+ #34 [ 57.369613] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 1302 03/15/2018 [ 57.369617] Call Trace: [ 57.369621] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1507 [ 57.369628] dump_stack+0x6d/0x89 [ 57.369642] check_preemption_disabled+0xc8/0xd0 [ 57.369628] caller is __target_init_cmd+0x157/0x170 [target_core_mod] [ 57.369655] __target_init_cmd+0x157/0x170 [target_core_mod] [ 57.369695] target_init_cmd+0x76/0x90 [target_core_mod] [ 57.369732] tcm_loop_queuecommand+0x109/0x210 [tcm_loop] [ 57.369744] scsi_queue_rq+0x38e/0xc40 [ 57.369761] __blk_mq_try_issue_directly+0x109/0x1c0 [ 57.369779] blk_mq_try_issue_directly+0x43/0x90 [ 57.369790] blk_mq_submit_bio+0x4e5/0x5d0 [ 57.369812] submit_bio_noacct+0x46e/0x4e0 [ 57.369830] __blkdev_direct_IO_simple+0x1a3/0x2d0 [ 57.369859] ? set_init_blocksize.isra.0+0x60/0x60 [ 57.369880] generic_file_read_iter+0x89/0x160 [ 57.369898] blkdev_read_iter+0x44/0x60 [ 57.369906] new_sync_read+0x102/0x170 [ 57.369929] vfs_read+0xd4/0x160 [ 57.369941] __x64_sys_pread64+0x6e/0xa0 [ 57.369946] ? lockdep_hardirqs_on+0x79/0x100 [ 57.369958] do_syscall_64+0x3a/0x70 [ 57.369965] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 57.369973] RIP: 0033:0x7f7ed4c1399f [ 57.369979] Code: 08 89 3c 24 48 89 4c 24 18 e8 7d f3 ff ff 4c 8b 54 24 18 48 8b 54 24 10 41 89 c0 48 8b 74 24 08 8b 3c 24 b8 11 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 04 24 e8 cd f3 ff ff 48 8b [ 57.369983] RSP: 002b:00007ffd7918c580 EFLAGS: 00000293 ORIG_RAX: 0000000000000011 [ 57.369990] RAX: ffffffffffffffda RBX: 00000000015b4540 RCX: 00007f7ed4c1399f [ 57.369993] RDX: 0000000000001000 RSI: 00000000015de000 RDI: 0000000000000009 [ 57.369996] RBP: 00000000015b4540 R08: 0000000000000000 R09: 0000000000000001 [ 57.369999] R10: 0000000000e5c000 R11: 0000000000000293 R12: 00007f7eb5269a70 [ 57.370002] R13: 0000000000000000 R14: 0000000000001000 R15: 00000000015b4568 [ 57.370031] CPU: 7 PID: 1507 Comm: fio Not tainted 5.13.0-rc1+ #34 [ 57.370036] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 1302 03/15/2018 [ 57.370039] Call Trace: [ 57.370045] dump_stack+0x6d/0x89 [ 57.370056] check_preemption_disabled+0xc8/0xd0 [ 57.370068] __target_init_cmd+0x157/0x170 [target_core_mod] [ 57.370121] target_init_cmd+0x76/0x90 [target_core_mod] [ 57.370178] tcm_loop_queuecommand+0x109/0x210 [tcm_loop] [ 57.370197] scsi_queue_rq+0x38e/0xc40 [ 57.370224] __blk_mq_try_issue_directly+0x109/0x1c0 ... [2] [ 117.458597] BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u16:8 [ 117.467279] caller is __target_init_cmd+0x157/0x170 [target_core_mod] [ 117.473893] CPU: 1 PID: 418 Comm: kworker/u16:6 Not tainted 5.13.0-rc1+ #34 [ 117.481150] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 8 [ 117.481153] Workqueue: scsi_tmf_7 scmd_eh_abort_handler [ 117.481156] Call Trace: [ 117.481158] dump_stack+0x6d/0x89 [ 117.481162] check_preemption_disabled+0xc8/0xd0 [ 117.512575] target_submit_tmr+0x41/0x150 [target_core_mod] [ 117.519705] tcm_loop_issue_tmr+0xa7/0x100 [tcm_loop] [ 117.524913] tcm_loop_abort_task+0x43/0x60 [tcm_loop] [ 117.530137] scmd_eh_abort_handler+0x7b/0x230 [ 117.534681] process_one_work+0x268/0x580 [ 117.538862] worker_thread+0x55/0x3b0 [ 117.542652] ? process_one_work+0x580/0x580 [ 117.548351] kthread+0x143/0x160 [ 117.551675] ? kthread_create_worker_on_cpu+0x40/0x40 [ 117.556873] ret_from_fork+0x1f/0x30 Link: https://lore.kernel.org/r/20210515070315.215801-1-shinichiro.kawasaki@wdc.com Fixes: 1526d9f10c61 ("scsi: target: Make state_list per CPU") Cc: stable@vger.kernel.org # v5.11+ Reviewed-by: Mike Christie Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 230fffa993c0..2e97937f005f 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1396,7 +1396,7 @@ void transport_init_se_cmd( cmd->orig_fe_lun = unpacked_lun; if (!(cmd->se_cmd_flags & SCF_USE_CPUID)) - cmd->cpuid = smp_processor_id(); + cmd->cpuid = raw_smp_processor_id(); cmd->state_active = false; } From e044a24447189419c3a7ccc5fa6da7516036dc55 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 May 2023 10:25:24 +0200 Subject: [PATCH 06/92] netfilter: nf_tables: deactivate anonymous set from preparation phase commit c1592a89942e9678f7d9c8030efa777c0d57edab upstream. Toggle deleted anonymous sets as inactive in the next generation, so users cannot perform any update on it. Clear the generation bitmask in case the transaction is aborted. The following KASAN splat shows a set element deletion for a bound anonymous set that has been already removed in the same transaction. [ 64.921510] ================================================================== [ 64.923123] BUG: KASAN: wild-memory-access in nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.924745] Write of size 8 at addr dead000000000122 by task test/890 [ 64.927903] CPU: 3 PID: 890 Comm: test Not tainted 6.3.0+ #253 [ 64.931120] Call Trace: [ 64.932699] [ 64.934292] dump_stack_lvl+0x33/0x50 [ 64.935908] ? nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.937551] kasan_report+0xda/0x120 [ 64.939186] ? nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.940814] nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.942452] ? __kasan_slab_alloc+0x2d/0x60 [ 64.944070] ? nf_tables_setelem_notify+0x190/0x190 [nf_tables] [ 64.945710] ? kasan_set_track+0x21/0x30 [ 64.947323] nfnetlink_rcv_batch+0x709/0xd90 [nfnetlink] [ 64.948898] ? nfnetlink_rcv_msg+0x480/0x480 [nfnetlink] Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 12 ++++++++++++ net/netfilter/nft_dynset.c | 2 +- net/netfilter/nft_lookup.c | 2 +- net/netfilter/nft_objref.c | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e66fee99ed3e..564fbe0c865f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -507,6 +507,7 @@ struct nft_set_binding { }; enum nft_trans_phase; +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7bb716df7afc..fe51cedd9cc3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4479,12 +4479,24 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } } +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) +{ + if (nft_set_is_anonymous(set)) + nft_clear(ctx->net, set); + + set->use++; +} +EXPORT_SYMBOL_GPL(nf_tables_activate_set); + void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { switch (phase) { case NFT_TRANS_PREPARE: + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + set->use--; return; case NFT_TRANS_ABORT: diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 8c45e01fecdd..038588d4d80e 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -233,7 +233,7 @@ static void nft_dynset_activate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_dynset_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index b0f558b4fea5..8bc008ff00cb 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -132,7 +132,7 @@ static void nft_lookup_activate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_lookup_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index bc104d36d3bb..25157d8cc250 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -180,7 +180,7 @@ static void nft_objref_map_activate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, From 6a392b806f185dc122761d17138ab3bdc74bb823 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:22 +0200 Subject: [PATCH 07/92] tty: create internal tty.h file [ Upstream commit 98602c010ceba82f2c2384122dbd07bc965fd367 ] There are a number of functions and #defines in include/linux/tty.h that do not belong there as they are private to the tty core code. Create an initial drivers/tty/tty.h file and copy the odd "tty logging" macros into it to seed the file with some initial things that we know nothing outside of the tty core should be calling. Cc: Tetsuo Handa Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 094fb49a2d0d ("tty: Prevent writing chars during tcsetattr TCSADRAIN/FLUSH") Signed-off-by: Sasha Levin --- drivers/tty/n_tty.c | 1 + drivers/tty/pty.c | 1 + drivers/tty/tty.h | 21 +++++++++++++++++++++ drivers/tty/tty_io.c | 1 + drivers/tty/tty_jobctrl.c | 1 + drivers/tty/tty_ldisc.c | 1 + drivers/tty/tty_port.c | 1 + include/linux/tty.h | 12 ------------ 8 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 drivers/tty/tty.h diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 12dde01e576b..8e7931d93543 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -49,6 +49,7 @@ #include #include #include +#include "tty.h" /* * Until this number of characters is queued in the xmit buffer, select will diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 16498f5fba64..ca3e5a6c1a49 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -29,6 +29,7 @@ #include #include #include +#include "tty.h" #undef TTY_DEBUG_HANGUP #ifdef TTY_DEBUG_HANGUP diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h new file mode 100644 index 000000000000..f4cd20261e91 --- /dev/null +++ b/drivers/tty/tty.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TTY core internal functions + */ + +#ifndef _TTY_INTERNAL_H +#define _TTY_INTERNAL_H + +#define tty_msg(fn, tty, f, ...) \ + fn("%s %s: " f, tty_driver_name(tty), tty_name(tty), ##__VA_ARGS__) + +#define tty_debug(tty, f, ...) tty_msg(pr_debug, tty, f, ##__VA_ARGS__) +#define tty_info(tty, f, ...) tty_msg(pr_info, tty, f, ##__VA_ARGS__) +#define tty_notice(tty, f, ...) tty_msg(pr_notice, tty, f, ##__VA_ARGS__) +#define tty_warn(tty, f, ...) tty_msg(pr_warn, tty, f, ##__VA_ARGS__) +#define tty_err(tty, f, ...) tty_msg(pr_err, tty, f, ##__VA_ARGS__) + +#define tty_info_ratelimited(tty, f, ...) \ + tty_msg(pr_info_ratelimited, tty, f, ##__VA_ARGS__) + +#endif diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index c37d2657308c..86fbfe42ce0a 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -108,6 +108,7 @@ #include #include +#include "tty.h" #undef TTY_DEBUG_HANGUP #ifdef TTY_DEBUG_HANGUP diff --git a/drivers/tty/tty_jobctrl.c b/drivers/tty/tty_jobctrl.c index aa6d0537b379..95d67613b25b 100644 --- a/drivers/tty/tty_jobctrl.c +++ b/drivers/tty/tty_jobctrl.c @@ -11,6 +11,7 @@ #include #include #include +#include "tty.h" static int is_ignored(int sig) { diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index fe37ec331289..c23938b8628d 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -19,6 +19,7 @@ #include #include #include +#include "tty.h" #undef LDISC_DEBUG_HANGUP diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index ea80bf872f54..cbb56f725bc4 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -18,6 +18,7 @@ #include #include #include +#include "tty.h" static int tty_port_default_receive_buf(struct tty_port *port, const unsigned char *p, diff --git a/include/linux/tty.h b/include/linux/tty.h index 5972f43b9d5a..9e3725589214 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -786,16 +786,4 @@ static inline void proc_tty_register_driver(struct tty_driver *d) {} static inline void proc_tty_unregister_driver(struct tty_driver *d) {} #endif -#define tty_msg(fn, tty, f, ...) \ - fn("%s %s: " f, tty_driver_name(tty), tty_name(tty), ##__VA_ARGS__) - -#define tty_debug(tty, f, ...) tty_msg(pr_debug, tty, f, ##__VA_ARGS__) -#define tty_info(tty, f, ...) tty_msg(pr_info, tty, f, ##__VA_ARGS__) -#define tty_notice(tty, f, ...) tty_msg(pr_notice, tty, f, ##__VA_ARGS__) -#define tty_warn(tty, f, ...) tty_msg(pr_warn, tty, f, ##__VA_ARGS__) -#define tty_err(tty, f, ...) tty_msg(pr_err, tty, f, ##__VA_ARGS__) - -#define tty_info_ratelimited(tty, f, ...) \ - tty_msg(pr_info_ratelimited, tty, f, ##__VA_ARGS__) - #endif From f665d81ffad7a3e9af6b22acf16a60ebbdfcef64 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:29 +0200 Subject: [PATCH 08/92] tty: audit: move some local functions out of tty.h [ Upstream commit da5d669e00d2c437b3f508d60add417fc74f4bb6 ] The functions tty_audit_add_data() and tty_audit_tiocsti() are local to the tty core code, and do not need to be in a "kernel-wide" header file so move them to drivers/tty/tty.h Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-9-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 094fb49a2d0d ("tty: Prevent writing chars during tcsetattr TCSADRAIN/FLUSH") Signed-off-by: Sasha Levin --- drivers/tty/tty.h | 14 ++++++++++++++ drivers/tty/tty_audit.c | 1 + include/linux/tty.h | 10 ---------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h index f4cd20261e91..f131d538b62b 100644 --- a/drivers/tty/tty.h +++ b/drivers/tty/tty.h @@ -18,4 +18,18 @@ #define tty_info_ratelimited(tty, f, ...) \ tty_msg(pr_info_ratelimited, tty, f, ##__VA_ARGS__) +/* tty_audit.c */ +#ifdef CONFIG_AUDIT +void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size); +void tty_audit_tiocsti(struct tty_struct *tty, char ch); +#else +static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, + size_t size) +{ +} +static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) +{ +} +#endif + #endif diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index 9f906a5b8e81..9b30edee71fe 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -10,6 +10,7 @@ #include #include #include +#include "tty.h" struct tty_audit_buf { struct mutex mutex; /* Protects all data below */ diff --git a/include/linux/tty.h b/include/linux/tty.h index 9e3725589214..a1a9c4b8210e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -731,20 +731,10 @@ static inline void n_tty_init(void) { } /* tty_audit.c */ #ifdef CONFIG_AUDIT -extern void tty_audit_add_data(struct tty_struct *tty, const void *data, - size_t size); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); -extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern int tty_audit_push(void); #else -static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, - size_t size) -{ -} -static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) -{ -} static inline void tty_audit_exit(void) { } From 1924d47a2809e2690fe6414d120d51b2a92ffeae Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:30 +0200 Subject: [PATCH 09/92] tty: move some internal tty lock enums and functions out of tty.h [ Upstream commit 6c80c0b94b94192d9a34b400f8237703c6475f4d ] Move the TTY_LOCK_* enums and tty_ldisc lock functions out of the global tty.h into the local header file to clean things up. Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-10-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 094fb49a2d0d ("tty: Prevent writing chars during tcsetattr TCSADRAIN/FLUSH") Signed-off-by: Sasha Levin --- drivers/tty/tty.h | 26 ++++++++++++++++++++++++++ drivers/tty/tty_buffer.c | 2 +- drivers/tty/tty_mutex.c | 1 + include/linux/tty.h | 26 -------------------------- 4 files changed, 28 insertions(+), 27 deletions(-) diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h index f131d538b62b..552e263e02df 100644 --- a/drivers/tty/tty.h +++ b/drivers/tty/tty.h @@ -18,6 +18,32 @@ #define tty_info_ratelimited(tty, f, ...) \ tty_msg(pr_info_ratelimited, tty, f, ##__VA_ARGS__) +/* + * Lock subclasses for tty locks + * + * TTY_LOCK_NORMAL is for normal ttys and master ptys. + * TTY_LOCK_SLAVE is for slave ptys only. + * + * Lock subclasses are necessary for handling nested locking with pty pairs. + * tty locks which use nested locking: + * + * legacy_mutex - Nested tty locks are necessary for releasing pty pairs. + * The stable lock order is master pty first, then slave pty. + * termios_rwsem - The stable lock order is tty_buffer lock->termios_rwsem. + * Subclassing this lock enables the slave pty to hold its + * termios_rwsem when claiming the master tty_buffer lock. + * tty_buffer lock - slave ptys can claim nested buffer lock when handling + * signal chars. The stable lock order is slave pty, then + * master. + */ +enum { + TTY_LOCK_NORMAL = 0, + TTY_LOCK_SLAVE, +}; + +int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); +void tty_ldisc_unlock(struct tty_struct *tty); + /* tty_audit.c */ #ifdef CONFIG_AUDIT void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size); diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 5bbc2e010b48..9f2379815557 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -17,7 +17,7 @@ #include #include #include - +#include "tty.h" #define MIN_TTYB_SIZE 256 #define TTYB_ALIGN_MASK 255 diff --git a/drivers/tty/tty_mutex.c b/drivers/tty/tty_mutex.c index 2640635ee177..393518a24cfe 100644 --- a/drivers/tty/tty_mutex.c +++ b/drivers/tty/tty_mutex.c @@ -4,6 +4,7 @@ #include #include #include +#include "tty.h" /* Legacy tty mutex glue */ diff --git a/include/linux/tty.h b/include/linux/tty.h index a1a9c4b8210e..af398d0aa9fd 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -16,30 +16,6 @@ #include -/* - * Lock subclasses for tty locks - * - * TTY_LOCK_NORMAL is for normal ttys and master ptys. - * TTY_LOCK_SLAVE is for slave ptys only. - * - * Lock subclasses are necessary for handling nested locking with pty pairs. - * tty locks which use nested locking: - * - * legacy_mutex - Nested tty locks are necessary for releasing pty pairs. - * The stable lock order is master pty first, then slave pty. - * termios_rwsem - The stable lock order is tty_buffer lock->termios_rwsem. - * Subclassing this lock enables the slave pty to hold its - * termios_rwsem when claiming the master tty_buffer lock. - * tty_buffer lock - slave ptys can claim nested buffer lock when handling - * signal chars. The stable lock order is slave pty, then - * master. - */ - -enum { - TTY_LOCK_NORMAL = 0, - TTY_LOCK_SLAVE, -}; - /* * (Note: the *_driver.minor_start values 1, 64, 128, 192 are * hardcoded at present.) @@ -419,8 +395,6 @@ extern const char *tty_name(const struct tty_struct *tty); extern struct tty_struct *tty_kopen(dev_t device); extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); -extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); -extern void tty_ldisc_unlock(struct tty_struct *tty); extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); #else static inline void tty_kref_put(struct tty_struct *tty) From 57b510c7d37717784a5c082c7ec66baecfe6dfff Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:32 +0200 Subject: [PATCH 10/92] tty: move some tty-only functions to drivers/tty/tty.h [ Upstream commit 9f72cab1596327e1011ab4599c07b165e0fb45db ] The flow change and restricted_tty_write() logic is internal to the tty core only, so move it out of the include/linux/tty.h file. Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-12-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 094fb49a2d0d ("tty: Prevent writing chars during tcsetattr TCSADRAIN/FLUSH") Signed-off-by: Sasha Levin --- drivers/tty/tty.h | 17 +++++++++++++++++ drivers/tty/tty_ioctl.c | 1 + include/linux/tty.h | 16 ---------------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h index 552e263e02df..9eda9e5f8ad5 100644 --- a/drivers/tty/tty.h +++ b/drivers/tty/tty.h @@ -41,6 +41,21 @@ enum { TTY_LOCK_SLAVE, }; +/* Values for tty->flow_change */ +#define TTY_THROTTLE_SAFE 1 +#define TTY_UNTHROTTLE_SAFE 2 + +static inline void __tty_set_flow_change(struct tty_struct *tty, int val) +{ + tty->flow_change = val; +} + +static inline void tty_set_flow_change(struct tty_struct *tty, int val) +{ + tty->flow_change = val; + smp_mb(); +} + int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); void tty_ldisc_unlock(struct tty_struct *tty); @@ -58,4 +73,6 @@ static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) } #endif +ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); + #endif diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 803da2d111c8..50e65784fbf7 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -21,6 +21,7 @@ #include #include #include +#include "tty.h" #include #include diff --git a/include/linux/tty.h b/include/linux/tty.h index af398d0aa9fd..a641fc6a7fa8 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -350,21 +350,6 @@ struct tty_file_private { #define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ -/* Values for tty->flow_change */ -#define TTY_THROTTLE_SAFE 1 -#define TTY_UNTHROTTLE_SAFE 2 - -static inline void __tty_set_flow_change(struct tty_struct *tty, int val) -{ - tty->flow_change = val; -} - -static inline void tty_set_flow_change(struct tty_struct *tty, int val) -{ - tty->flow_change = val; - smp_mb(); -} - static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { return file->f_flags & O_NONBLOCK || @@ -395,7 +380,6 @@ extern const char *tty_name(const struct tty_struct *tty); extern struct tty_struct *tty_kopen(dev_t device); extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); -extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); #else static inline void tty_kref_put(struct tty_struct *tty) { } From 6c2ee50c9012b926aa298538a347a747e5ce6ba7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:34 +0200 Subject: [PATCH 11/92] tty: clean include/linux/tty.h up [ Upstream commit 5ffa6e344a1c92a27c242f500fc74e6eb361a4bc ] There are a lot of tty-core-only functions that are listed in include/linux/tty.h. Move them to drivers/tty/tty.h so that no one else can accidentally call them or think that they are public functions. Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-14-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 094fb49a2d0d ("tty: Prevent writing chars during tcsetattr TCSADRAIN/FLUSH") Signed-off-by: Sasha Levin --- drivers/tty/n_gsm.c | 1 + drivers/tty/n_hdlc.c | 1 + drivers/tty/tty.h | 37 +++++++++++++++++++++++++++++++++++++ drivers/tty/tty_baudrate.c | 1 + include/linux/tty.h | 33 --------------------------------- 5 files changed, 40 insertions(+), 33 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index f5063499f9cf..23b014b8c919 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -50,6 +50,7 @@ #include #include #include +#include "tty.h" static int debug; module_param(debug, int, 0600); diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 48c64e68017c..697199a3ca01 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -100,6 +100,7 @@ #include #include +#include "tty.h" /* * Buffers for individual HDLC frames diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h index 9eda9e5f8ad5..74ed99bc5449 100644 --- a/drivers/tty/tty.h +++ b/drivers/tty/tty.h @@ -59,6 +59,43 @@ static inline void tty_set_flow_change(struct tty_struct *tty, int val) int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); void tty_ldisc_unlock(struct tty_struct *tty); +int __tty_check_change(struct tty_struct *tty, int sig); +int tty_check_change(struct tty_struct *tty); +void __stop_tty(struct tty_struct *tty); +void __start_tty(struct tty_struct *tty); +void tty_vhangup_session(struct tty_struct *tty); +void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty); +int tty_signal_session_leader(struct tty_struct *tty, int exit_session); +void session_clear_tty(struct pid *session); +void tty_buffer_free_all(struct tty_port *port); +void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); +void tty_buffer_init(struct tty_port *port); +void tty_buffer_set_lock_subclass(struct tty_port *port); +bool tty_buffer_restart_work(struct tty_port *port); +bool tty_buffer_cancel_work(struct tty_port *port); +void tty_buffer_flush_work(struct tty_port *port); +speed_t tty_termios_input_baud_rate(struct ktermios *termios); +void tty_ldisc_hangup(struct tty_struct *tty, bool reset); +int tty_ldisc_reinit(struct tty_struct *tty, int disc); +long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, + struct file *file, unsigned int cmd, unsigned long arg); +void tty_default_fops(struct file_operations *fops); +struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx); +int tty_alloc_file(struct file *file); +void tty_add_file(struct tty_struct *tty, struct file *file); +void tty_free_file(struct file *file); +int tty_release(struct inode *inode, struct file *filp); + +#define tty_is_writelocked(tty) (mutex_is_locked(&tty->atomic_write_lock)) + +int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty); +void tty_ldisc_release(struct tty_struct *tty); +int __must_check tty_ldisc_init(struct tty_struct *tty); +void tty_ldisc_deinit(struct tty_struct *tty); + +void tty_sysctl_init(void); + /* tty_audit.c */ #ifdef CONFIG_AUDIT void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size); diff --git a/drivers/tty/tty_baudrate.c b/drivers/tty/tty_baudrate.c index 84fec3c62d6a..9d0093d84e08 100644 --- a/drivers/tty/tty_baudrate.c +++ b/drivers/tty/tty_baudrate.c @@ -8,6 +8,7 @@ #include #include #include +#include "tty.h" /* diff --git a/include/linux/tty.h b/include/linux/tty.h index a641fc6a7fa8..e51d75f5165b 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -432,11 +432,7 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) extern const char *tty_driver_name(const struct tty_struct *tty); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); -extern int __tty_check_change(struct tty_struct *tty, int sig); -extern int tty_check_change(struct tty_struct *tty); -extern void __stop_tty(struct tty_struct *tty); extern void stop_tty(struct tty_struct *tty); -extern void __start_tty(struct tty_struct *tty); extern void start_tty(struct tty_struct *tty); extern int tty_register_driver(struct tty_driver *driver); extern int tty_unregister_driver(struct tty_driver *driver); @@ -461,23 +457,11 @@ extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); extern int is_current_pgrp_orphaned(void); extern void tty_hangup(struct tty_struct *tty); extern void tty_vhangup(struct tty_struct *tty); -extern void tty_vhangup_session(struct tty_struct *tty); extern int tty_hung_up_p(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); -extern void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty); -extern int tty_signal_session_leader(struct tty_struct *tty, int exit_session); -extern void session_clear_tty(struct pid *session); extern void no_tty(void); -extern void tty_buffer_free_all(struct tty_port *port); -extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); -extern void tty_buffer_init(struct tty_port *port); -extern void tty_buffer_set_lock_subclass(struct tty_port *port); -extern bool tty_buffer_restart_work(struct tty_port *port); -extern bool tty_buffer_cancel_work(struct tty_port *port); -extern void tty_buffer_flush_work(struct tty_port *port); extern speed_t tty_termios_baud_rate(struct ktermios *termios); -extern speed_t tty_termios_input_baud_rate(struct ktermios *termios); extern void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, speed_t obaud); extern void tty_encode_baud_rate(struct tty_struct *tty, @@ -505,27 +489,16 @@ extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); -extern void tty_ldisc_hangup(struct tty_struct *tty, bool reset); -extern int tty_ldisc_reinit(struct tty_struct *tty, int disc); extern const struct seq_operations tty_ldiscs_seq_ops; extern void tty_wakeup(struct tty_struct *tty); extern void tty_ldisc_flush(struct tty_struct *tty); -extern long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); -extern long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, - struct file *file, unsigned int cmd, unsigned long arg); extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg); -extern void tty_default_fops(struct file_operations *fops); -extern struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx); -extern int tty_alloc_file(struct file *file); -extern void tty_add_file(struct tty_struct *tty, struct file *file); -extern void tty_free_file(struct file *file); extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); extern void tty_release_struct(struct tty_struct *tty, int idx); -extern int tty_release(struct inode *inode, struct file *filp); extern void tty_init_termios(struct tty_struct *tty); extern void tty_save_termios(struct tty_struct *tty); extern int tty_standard_install(struct tty_driver *driver, @@ -533,8 +506,6 @@ extern int tty_standard_install(struct tty_driver *driver, extern struct mutex tty_mutex; -#define tty_is_writelocked(tty) (mutex_is_locked(&tty->atomic_write_lock)) - extern void tty_port_init(struct tty_port *port); extern void tty_port_link_device(struct tty_port *port, struct tty_driver *driver, unsigned index); @@ -672,10 +643,6 @@ static inline int tty_port_users(struct tty_port *port) extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); extern int tty_set_ldisc(struct tty_struct *tty, int disc); -extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty); -extern void tty_ldisc_release(struct tty_struct *tty); -extern int __must_check tty_ldisc_init(struct tty_struct *tty); -extern void tty_ldisc_deinit(struct tty_struct *tty); extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, char *f, int count); From 2fcb12b3f421f3a26d912744df22dfbd771e0f1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 17 Mar 2023 13:33:17 +0200 Subject: [PATCH 12/92] tty: Prevent writing chars during tcsetattr TCSADRAIN/FLUSH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 094fb49a2d0d6827c86d2e0840873e6db0c491d2 ] If userspace races tcsetattr() with a write, the drained condition might not be guaranteed by the kernel. There is a race window after checking Tx is empty before tty_set_termios() takes termios_rwsem for write. During that race window, more characters can be queued by a racing writer. Any ongoing transmission might produce garbage during HW's ->set_termios() call. The intent of TCSADRAIN/FLUSH seems to be preventing such a character corruption. If those flags are set, take tty's write lock to stop any writer before performing the lower layer Tx empty check and wait for the pending characters to be sent (if any). The initial wait for all-writers-done must be placed outside of tty's write lock to avoid deadlock which makes it impossible to use tty_wait_until_sent(). The write lock is retried if a racing write is detected. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230317113318.31327-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/tty.h | 2 ++ drivers/tty/tty_io.c | 4 ++-- drivers/tty/tty_ioctl.c | 47 +++++++++++++++++++++++++++++------------ 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h index 74ed99bc5449..1908f27a795a 100644 --- a/drivers/tty/tty.h +++ b/drivers/tty/tty.h @@ -63,6 +63,8 @@ int __tty_check_change(struct tty_struct *tty, int sig); int tty_check_change(struct tty_struct *tty); void __stop_tty(struct tty_struct *tty); void __start_tty(struct tty_struct *tty); +void tty_write_unlock(struct tty_struct *tty); +int tty_write_lock(struct tty_struct *tty, int ndelay); void tty_vhangup_session(struct tty_struct *tty); void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty); int tty_signal_session_leader(struct tty_struct *tty, int exit_session); diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 86fbfe42ce0a..094e82a12d29 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -942,13 +942,13 @@ static ssize_t tty_read(struct kiocb *iocb, struct iov_iter *to) return i; } -static void tty_write_unlock(struct tty_struct *tty) +void tty_write_unlock(struct tty_struct *tty) { mutex_unlock(&tty->atomic_write_lock); wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT); } -static int tty_write_lock(struct tty_struct *tty, int ndelay) +int tty_write_lock(struct tty_struct *tty, int ndelay) { if (!mutex_trylock(&tty->atomic_write_lock)) { if (ndelay) diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 50e65784fbf7..68b07250dcb6 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -398,22 +398,43 @@ static int set_termios(struct tty_struct *tty, void __user *arg, int opt) tmp_termios.c_ispeed = tty_termios_input_baud_rate(&tmp_termios); tmp_termios.c_ospeed = tty_termios_baud_rate(&tmp_termios); - ld = tty_ldisc_ref(tty); + if (opt & (TERMIOS_FLUSH|TERMIOS_WAIT)) { +retry_write_wait: + retval = wait_event_interruptible(tty->write_wait, !tty_chars_in_buffer(tty)); + if (retval < 0) + return retval; - if (ld != NULL) { - if ((opt & TERMIOS_FLUSH) && ld->ops->flush_buffer) - ld->ops->flush_buffer(tty); - tty_ldisc_deref(ld); + if (tty_write_lock(tty, 0) < 0) + goto retry_write_wait; + + /* Racing writer? */ + if (tty_chars_in_buffer(tty)) { + tty_write_unlock(tty); + goto retry_write_wait; + } + + ld = tty_ldisc_ref(tty); + if (ld != NULL) { + if ((opt & TERMIOS_FLUSH) && ld->ops->flush_buffer) + ld->ops->flush_buffer(tty); + tty_ldisc_deref(ld); + } + + if ((opt & TERMIOS_WAIT) && tty->ops->wait_until_sent) { + tty->ops->wait_until_sent(tty, 0); + if (signal_pending(current)) { + tty_write_unlock(tty); + return -ERESTARTSYS; + } + } + + tty_set_termios(tty, &tmp_termios); + + tty_write_unlock(tty); + } else { + tty_set_termios(tty, &tmp_termios); } - if (opt & TERMIOS_WAIT) { - tty_wait_until_sent(tty, 0); - if (signal_pending(current)) - return -ERESTARTSYS; - } - - tty_set_termios(tty, &tmp_termios); - /* FIXME: Arguably if tmp_termios == tty->termios AND the actual requested termios was not tmp_termios then we may want to return an error as no user requested change has From 6f60aae72cccb5afe78487560a485e1c75132441 Mon Sep 17 00:00:00 2001 From: Tze-nan Wu Date: Wed, 26 Apr 2023 14:20:23 +0800 Subject: [PATCH 13/92] ring-buffer: Ensure proper resetting of atomic variables in ring_buffer_reset_online_cpus [ Upstream commit 7c339fb4d8577792378136c15fde773cfb863cb8 ] In ring_buffer_reset_online_cpus, the buffer_size_kb write operation may permanently fail if the cpu_online_mask changes between two for_each_online_buffer_cpu loops. The number of increases and decreases on both cpu_buffer->resize_disabled and cpu_buffer->record_disabled may be inconsistent, causing some CPUs to have non-zero values for these atomic variables after the function returns. This issue can be reproduced by "echo 0 > trace" while hotplugging cpu. After reproducing success, we can find out buffer_size_kb will not be functional anymore. To prevent leaving 'resize_disabled' and 'record_disabled' non-zero after ring_buffer_reset_online_cpus returns, we ensure that each atomic variable has been set up before atomic_sub() to it. Link: https://lore.kernel.org/linux-trace-kernel/20230426062027.17451-1-Tze-nan.Wu@mediatek.com Cc: stable@vger.kernel.org Cc: Cc: npiggin@gmail.com Fixes: b23d7a5f4a07 ("ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU") Reviewed-by: Cheng-Jui Wang Signed-off-by: Tze-nan Wu Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1fe6b29366f1..f08904914166 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5051,6 +5051,9 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); +/* Flag to ensure proper resetting of atomic variables */ +#define RESET_BIT (1 << 30) + /** * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer * @buffer: The ring buffer to reset a per cpu buffer of @@ -5067,20 +5070,27 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) for_each_online_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; - atomic_inc(&cpu_buffer->resize_disabled); + atomic_add(RESET_BIT, &cpu_buffer->resize_disabled); atomic_inc(&cpu_buffer->record_disabled); } /* Make sure all commits have finished */ synchronize_rcu(); - for_each_online_buffer_cpu(buffer, cpu) { + for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; + /* + * If a CPU came online during the synchronize_rcu(), then + * ignore it. + */ + if (!(atomic_read(&cpu_buffer->resize_disabled) & RESET_BIT)) + continue; + reset_disabled_cpu_buffer(cpu_buffer); atomic_dec(&cpu_buffer->record_disabled); - atomic_dec(&cpu_buffer->resize_disabled); + atomic_sub(RESET_BIT, &cpu_buffer->resize_disabled); } mutex_unlock(&buffer->mutex); From c4e636f025a14e6d27f144fc0f16f442a4663f39 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Tue, 28 Mar 2023 15:16:36 +0000 Subject: [PATCH 14/92] crypto: ccp - Clear PSP interrupt status register before calling handler [ Upstream commit 45121ad4a1750ca47ce3f32bd434bdb0cdbf0043 ] The PSP IRQ is edge-triggered (MSI or MSI-X) in all cases supported by the psp module so clear the interrupt status register early in the handler to prevent missed interrupts. sev_irq_handler() calls wake_up() on a wait queue, which can result in a new command being submitted from a different CPU. This then races with the clearing of isr and can result in missed interrupts. A missed interrupt results in a command waiting until it times out, which results in the psp being declared dead. This is unlikely on bare metal, but has been observed when running virtualized. In the cases where this is observed, sev->cmdresp_reg has PSP_CMDRESP_RESP set which indicates that the command was processed correctly but no interrupt was asserted. The full sequence of events looks like this: CPU 1: submits SEV cmd #1 CPU 1: calls wait_event_timeout() CPU 0: enters psp_irq_handler() CPU 0: calls sev_handler()->wake_up() CPU 1: wakes up; finishes processing cmd #1 CPU 1: submits SEV cmd #2 CPU 1: calls wait_event_timeout() PSP: finishes processing cmd #2; interrupt status is still set; no interrupt CPU 0: clears intsts CPU 0: exits psp_irq_handler() CPU 1: wait_event_timeout() times out; psp_dead=true Fixes: 200664d5237f ("crypto: ccp: Add Secure Encrypted Virtualization (SEV) command support") Cc: stable@vger.kernel.org Signed-off-by: Jeremi Piotrowski Acked-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/ccp/psp-dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index ae7b44599914..4bf9eaab4456 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -42,6 +42,9 @@ static irqreturn_t psp_irq_handler(int irq, void *data) /* Read the interrupt status: */ status = ioread32(psp->io_regs + psp->vdata->intsts_reg); + /* Clear the interrupt status by writing the same value we read. */ + iowrite32(status, psp->io_regs + psp->vdata->intsts_reg); + /* invoke subdevice interrupt handlers */ if (status) { if (psp->sev_irq_handler) @@ -51,9 +54,6 @@ static irqreturn_t psp_irq_handler(int irq, void *data) psp->tee_irq_handler(irq, psp->tee_irq_data, status); } - /* Clear the interrupt status by writing the same value we read. */ - iowrite32(status, psp->io_regs + psp->vdata->intsts_reg); - return IRQ_HANDLED; } From 67fb57f24737ffdbd443eb176fe3fc344c93f491 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 20 Nov 2022 09:25:54 +0100 Subject: [PATCH 15/92] mailbox: zynq: Switch to flexible array to simplify code [ Upstream commit 043f85ce81cb1714e14d31c322c5646513dde3fb ] Using flexible array is more straight forward. It - saves 1 pointer in the 'zynqmp_ipi_pdata' structure - saves an indirection when using this array - saves some LoC and avoids some always spurious pointer arithmetic Signed-off-by: Christophe JAILLET Signed-off-by: Jassi Brar Stable-dep-of: f72f805e7288 ("mailbox: zynqmp: Fix counts of child nodes") Signed-off-by: Sasha Levin --- drivers/mailbox/zynqmp-ipi-mailbox.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mailbox/zynqmp-ipi-mailbox.c b/drivers/mailbox/zynqmp-ipi-mailbox.c index 05e36229622e..136a84ad871c 100644 --- a/drivers/mailbox/zynqmp-ipi-mailbox.c +++ b/drivers/mailbox/zynqmp-ipi-mailbox.c @@ -110,7 +110,7 @@ struct zynqmp_ipi_pdata { unsigned int method; u32 local_id; int num_mboxes; - struct zynqmp_ipi_mbox *ipi_mboxes; + struct zynqmp_ipi_mbox ipi_mboxes[]; }; static struct device_driver zynqmp_ipi_mbox_driver = { @@ -635,7 +635,7 @@ static int zynqmp_ipi_probe(struct platform_device *pdev) int num_mboxes, ret = -EINVAL; num_mboxes = of_get_child_count(np); - pdata = devm_kzalloc(dev, sizeof(*pdata) + (num_mboxes * sizeof(*mbox)), + pdata = devm_kzalloc(dev, struct_size(pdata, ipi_mboxes, num_mboxes), GFP_KERNEL); if (!pdata) return -ENOMEM; @@ -649,8 +649,6 @@ static int zynqmp_ipi_probe(struct platform_device *pdev) } pdata->num_mboxes = num_mboxes; - pdata->ipi_mboxes = (struct zynqmp_ipi_mbox *) - ((char *)pdata + sizeof(*pdata)); mbox = pdata->ipi_mboxes; for_each_available_child_of_node(np, nc) { From 26b1b0d0bebdb45ba1e33590313a4e345e1e18d8 Mon Sep 17 00:00:00 2001 From: Tanmay Shah Date: Fri, 10 Mar 2023 17:24:04 -0800 Subject: [PATCH 16/92] mailbox: zynqmp: Fix counts of child nodes [ Upstream commit f72f805e72882c361e2a612c64a6e549f3da7152 ] If child mailbox node status is disabled it causes crash in interrupt handler. Fix this by assigning only available child node during driver probe. Fixes: 4981b82ba2ff ("mailbox: ZynqMP IPI mailbox controller") Signed-off-by: Tanmay Shah Acked-by: Michal Simek Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230311012407.1292118-2-tanmay.shah@amd.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/mailbox/zynqmp-ipi-mailbox.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mailbox/zynqmp-ipi-mailbox.c b/drivers/mailbox/zynqmp-ipi-mailbox.c index 136a84ad871c..be06de791c54 100644 --- a/drivers/mailbox/zynqmp-ipi-mailbox.c +++ b/drivers/mailbox/zynqmp-ipi-mailbox.c @@ -634,7 +634,12 @@ static int zynqmp_ipi_probe(struct platform_device *pdev) struct zynqmp_ipi_mbox *mbox; int num_mboxes, ret = -EINVAL; - num_mboxes = of_get_child_count(np); + num_mboxes = of_get_available_child_count(np); + if (num_mboxes == 0) { + dev_err(dev, "mailbox nodes not available\n"); + return -EINVAL; + } + pdata = devm_kzalloc(dev, struct_size(pdata, ipi_mboxes, num_mboxes), GFP_KERNEL); if (!pdata) From fc097cfca0049c46df8f08814b3784780d483b4d Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Mon, 13 Sep 2021 09:26:42 +0000 Subject: [PATCH 17/92] dm verity: skip redundant verity_handle_err() on I/O errors [ Upstream commit 2c0468e054c0adb660ac055fc396622ec7235df9 ] Without FEC, dm-verity won't call verity_handle_err() when I/O fails, but with FEC enabled, it currently does even if an I/O error has occurred. If there is an I/O error and FEC correction fails, return the error instead of calling verity_handle_err() again. Suggested-by: Sami Tolvanen Signed-off-by: Akilesh Kailash Reviewed-by: Sami Tolvanen Signed-off-by: Mike Snitzer Stable-dep-of: e8c5d45f82ce ("dm verity: fix error handling for check_at_most_once on FEC") Signed-off-by: Sasha Levin --- drivers/md/dm-verity-target.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index c801f6b93b7b..d9c388e6ce76 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -475,6 +475,7 @@ static int verity_verify_io(struct dm_verity_io *io) struct bvec_iter start; unsigned b; struct crypto_wait wait; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); for (b = 0; b < io->n_blocks; b++) { int r; @@ -529,9 +530,17 @@ static int verity_verify_io(struct dm_verity_io *io) else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, cur_block, NULL, &start) == 0) continue; - else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - cur_block)) - return -EIO; + else { + if (bio->bi_status) { + /* + * Error correction failed; Just return error + */ + return -EIO; + } + if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, + cur_block)) + return -EIO; + } } return 0; From 4285cc0a22d25c6f5363d9cc182d24b516dc6709 Mon Sep 17 00:00:00 2001 From: Yeongjin Gil Date: Mon, 20 Mar 2023 15:59:32 +0900 Subject: [PATCH 18/92] dm verity: fix error handling for check_at_most_once on FEC [ Upstream commit e8c5d45f82ce0c238a4817739892fe8897a3dcc3 ] In verity_end_io(), if bi_status is not BLK_STS_OK, it can be return directly. But if FEC configured, it is desired to correct the data page through verity_verify_io. And the return value will be converted to blk_status and passed to verity_finish_io(). BTW, when a bit is set in v->validated_blocks, verity_verify_io() skips verification regardless of I/O error for the corresponding bio. In this case, the I/O error could not be returned properly, and as a result, there is a problem that abnormal data could be read for the corresponding block. To fix this problem, when an I/O error occurs, do not skip verification even if the bit related is set in v->validated_blocks. Fixes: 843f38d382b1 ("dm verity: add 'check_at_most_once' option to only validate hashes once") Cc: stable@vger.kernel.org Reviewed-by: Sungjong Seo Signed-off-by: Yeongjin Gil Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-verity-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index d9c388e6ce76..0c2048d2b847 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -482,7 +482,7 @@ static int verity_verify_io(struct dm_verity_io *io) sector_t cur_block = io->block + b; struct ahash_request *req = verity_io_hash_req(v, io); - if (v->validated_blocks && + if (v->validated_blocks && bio->bi_status == BLK_STS_OK && likely(test_bit(cur_block, v->validated_blocks))) { verity_bv_skip_block(v, io, &io->iter); continue; From fa19c533ab19161298f0780bcc6523af88f6fd20 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Thu, 13 Apr 2023 11:34:22 +0800 Subject: [PATCH 19/92] scsi: qedi: Fix use after free bug in qedi_remove() [ Upstream commit c5749639f2d0a1f6cbe187d05f70c2e7c544d748 ] In qedi_probe() we call __qedi_probe() which initializes &qedi->recovery_work with qedi_recovery_handler() and &qedi->board_disable_work with qedi_board_disable_work(). When qedi_schedule_recovery_handler() is called, schedule_delayed_work() will finally start the work. In qedi_remove(), which is called to remove the driver, the following sequence may be observed: Fix this by finishing the work before cleanup in qedi_remove(). CPU0 CPU1 |qedi_recovery_handler qedi_remove | __qedi_remove | iscsi_host_free | scsi_host_put | //free shost | |iscsi_host_for_each_session |//use qedi->shost Cancel recovery_work and board_disable_work in __qedi_remove(). Fixes: 4b1068f5d74b ("scsi: qedi: Add MFW error recovery process") Signed-off-by: Zheng Wang Link: https://lore.kernel.org/r/20230413033422.28003-1-zyytlz.wz@163.com Acked-by: Manish Rangankar Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qedi/qedi_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 299d0369e4f0..7df0106f132e 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2456,6 +2456,9 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) qedi_ops->ll2->stop(qedi->cdev); } + cancel_delayed_work_sync(&qedi->recovery_work); + cancel_delayed_work_sync(&qedi->board_disable_work); + qedi_free_iscsi_pf_param(qedi); rval = qedi_ops->common->update_drv_state(qedi->cdev, false); From cc8efc78c3f6a6fd0f6ff2523a98048c0610499d Mon Sep 17 00:00:00 2001 From: Cosmo Chou Date: Wed, 26 Apr 2023 16:13:50 +0800 Subject: [PATCH 20/92] net/ncsi: clear Tx enable mode when handling a Config required AEN [ Upstream commit 6f75cd166a5a3c0bc50441faa8b8304f60522fdd ] ncsi_channel_is_tx() determines whether a given channel should be used for Tx or not. However, when reconfiguring the channel by handling a Configuration Required AEN, there is a misjudgment that the channel Tx has already been enabled, which results in the Enable Channel Network Tx command not being sent. Clear the channel Tx enable flag before reconfiguring the channel to avoid the misjudgment. Fixes: 8d951a75d022 ("net/ncsi: Configure multi-package, multi-channel modes with failover") Signed-off-by: Cosmo Chou Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ncsi/ncsi-aen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index b635c194f0a8..62fb1031763d 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -165,6 +165,7 @@ static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp, nc->state = NCSI_CHANNEL_INACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); + nc->modes[NCSI_MODE_TX_ENABLE].enable = 0; return ncsi_process_next_channel(ndp); } From 7311c8be3755611bf6edea4dfbeb190b4bdd489f Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 26 Apr 2023 14:31:11 +0200 Subject: [PATCH 21/92] net/sched: cls_api: remove block_cb from driver_list before freeing [ Upstream commit da94a7781fc3c92e7df7832bc2746f4d39bc624e ] Error handler of tcf_block_bind() frees the whole bo->cb_list on error. However, by that time the flow_block_cb instances are already in the driver list because driver ndo_setup_tc() callback is called before that up the call chain in tcf_block_offload_cmd(). This leaves dangling pointers to freed objects in the list and causes use-after-free[0]. Fix it by also removing flow_block_cb instances from driver_list before deallocating them. [0]: [ 279.868433] ================================================================== [ 279.869964] BUG: KASAN: slab-use-after-free in flow_block_cb_setup_simple+0x631/0x7c0 [ 279.871527] Read of size 8 at addr ffff888147e2bf20 by task tc/2963 [ 279.873151] CPU: 6 PID: 2963 Comm: tc Not tainted 6.3.0-rc6+ #4 [ 279.874273] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 279.876295] Call Trace: [ 279.876882] [ 279.877413] dump_stack_lvl+0x33/0x50 [ 279.878198] print_report+0xc2/0x610 [ 279.878987] ? flow_block_cb_setup_simple+0x631/0x7c0 [ 279.879994] kasan_report+0xae/0xe0 [ 279.880750] ? flow_block_cb_setup_simple+0x631/0x7c0 [ 279.881744] ? mlx5e_tc_reoffload_flows_work+0x240/0x240 [mlx5_core] [ 279.883047] flow_block_cb_setup_simple+0x631/0x7c0 [ 279.884027] tcf_block_offload_cmd.isra.0+0x189/0x2d0 [ 279.885037] ? tcf_block_setup+0x6b0/0x6b0 [ 279.885901] ? mutex_lock+0x7d/0xd0 [ 279.886669] ? __mutex_unlock_slowpath.constprop.0+0x2d0/0x2d0 [ 279.887844] ? ingress_init+0x1c0/0x1c0 [sch_ingress] [ 279.888846] tcf_block_get_ext+0x61c/0x1200 [ 279.889711] ingress_init+0x112/0x1c0 [sch_ingress] [ 279.890682] ? clsact_init+0x2b0/0x2b0 [sch_ingress] [ 279.891701] qdisc_create+0x401/0xea0 [ 279.892485] ? qdisc_tree_reduce_backlog+0x470/0x470 [ 279.893473] tc_modify_qdisc+0x6f7/0x16d0 [ 279.894344] ? tc_get_qdisc+0xac0/0xac0 [ 279.895213] ? mutex_lock+0x7d/0xd0 [ 279.896005] ? __mutex_lock_slowpath+0x10/0x10 [ 279.896910] rtnetlink_rcv_msg+0x5fe/0x9d0 [ 279.897770] ? rtnl_calcit.isra.0+0x2b0/0x2b0 [ 279.898672] ? __sys_sendmsg+0xb5/0x140 [ 279.899494] ? do_syscall_64+0x3d/0x90 [ 279.900302] ? entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 279.901337] ? kasan_save_stack+0x2e/0x40 [ 279.902177] ? kasan_save_stack+0x1e/0x40 [ 279.903058] ? kasan_set_track+0x21/0x30 [ 279.903913] ? kasan_save_free_info+0x2a/0x40 [ 279.904836] ? ____kasan_slab_free+0x11a/0x1b0 [ 279.905741] ? kmem_cache_free+0x179/0x400 [ 279.906599] netlink_rcv_skb+0x12c/0x360 [ 279.907450] ? rtnl_calcit.isra.0+0x2b0/0x2b0 [ 279.908360] ? netlink_ack+0x1550/0x1550 [ 279.909192] ? rhashtable_walk_peek+0x170/0x170 [ 279.910135] ? kmem_cache_alloc_node+0x1af/0x390 [ 279.911086] ? _copy_from_iter+0x3d6/0xc70 [ 279.912031] netlink_unicast+0x553/0x790 [ 279.912864] ? netlink_attachskb+0x6a0/0x6a0 [ 279.913763] ? netlink_recvmsg+0x416/0xb50 [ 279.914627] netlink_sendmsg+0x7a1/0xcb0 [ 279.915473] ? netlink_unicast+0x790/0x790 [ 279.916334] ? iovec_from_user.part.0+0x4d/0x220 [ 279.917293] ? netlink_unicast+0x790/0x790 [ 279.918159] sock_sendmsg+0xc5/0x190 [ 279.918938] ____sys_sendmsg+0x535/0x6b0 [ 279.919813] ? import_iovec+0x7/0x10 [ 279.920601] ? kernel_sendmsg+0x30/0x30 [ 279.921423] ? __copy_msghdr+0x3c0/0x3c0 [ 279.922254] ? import_iovec+0x7/0x10 [ 279.923041] ___sys_sendmsg+0xeb/0x170 [ 279.923854] ? copy_msghdr_from_user+0x110/0x110 [ 279.924797] ? ___sys_recvmsg+0xd9/0x130 [ 279.925630] ? __perf_event_task_sched_in+0x183/0x470 [ 279.926656] ? ___sys_sendmsg+0x170/0x170 [ 279.927529] ? ctx_sched_in+0x530/0x530 [ 279.928369] ? update_curr+0x283/0x4f0 [ 279.929185] ? perf_event_update_userpage+0x570/0x570 [ 279.930201] ? __fget_light+0x57/0x520 [ 279.931023] ? __switch_to+0x53d/0xe70 [ 279.931846] ? sockfd_lookup_light+0x1a/0x140 [ 279.932761] __sys_sendmsg+0xb5/0x140 [ 279.933560] ? __sys_sendmsg_sock+0x20/0x20 [ 279.934436] ? fpregs_assert_state_consistent+0x1d/0xa0 [ 279.935490] do_syscall_64+0x3d/0x90 [ 279.936300] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 279.937311] RIP: 0033:0x7f21c814f887 [ 279.938085] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 279.941448] RSP: 002b:00007fff11efd478 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 279.942964] RAX: ffffffffffffffda RBX: 0000000064401979 RCX: 00007f21c814f887 [ 279.944337] RDX: 0000000000000000 RSI: 00007fff11efd4e0 RDI: 0000000000000003 [ 279.945660] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [ 279.947003] R10: 00007f21c8008708 R11: 0000000000000246 R12: 0000000000000001 [ 279.948345] R13: 0000000000409980 R14: 000000000047e538 R15: 0000000000485400 [ 279.949690] [ 279.950706] Allocated by task 2960: [ 279.951471] kasan_save_stack+0x1e/0x40 [ 279.952338] kasan_set_track+0x21/0x30 [ 279.953165] __kasan_kmalloc+0x77/0x90 [ 279.954006] flow_block_cb_setup_simple+0x3dd/0x7c0 [ 279.955001] tcf_block_offload_cmd.isra.0+0x189/0x2d0 [ 279.956020] tcf_block_get_ext+0x61c/0x1200 [ 279.956881] ingress_init+0x112/0x1c0 [sch_ingress] [ 279.957873] qdisc_create+0x401/0xea0 [ 279.958656] tc_modify_qdisc+0x6f7/0x16d0 [ 279.959506] rtnetlink_rcv_msg+0x5fe/0x9d0 [ 279.960392] netlink_rcv_skb+0x12c/0x360 [ 279.961216] netlink_unicast+0x553/0x790 [ 279.962044] netlink_sendmsg+0x7a1/0xcb0 [ 279.962906] sock_sendmsg+0xc5/0x190 [ 279.963702] ____sys_sendmsg+0x535/0x6b0 [ 279.964534] ___sys_sendmsg+0xeb/0x170 [ 279.965343] __sys_sendmsg+0xb5/0x140 [ 279.966132] do_syscall_64+0x3d/0x90 [ 279.966908] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 279.968407] Freed by task 2960: [ 279.969114] kasan_save_stack+0x1e/0x40 [ 279.969929] kasan_set_track+0x21/0x30 [ 279.970729] kasan_save_free_info+0x2a/0x40 [ 279.971603] ____kasan_slab_free+0x11a/0x1b0 [ 279.972483] __kmem_cache_free+0x14d/0x280 [ 279.973337] tcf_block_setup+0x29d/0x6b0 [ 279.974173] tcf_block_offload_cmd.isra.0+0x226/0x2d0 [ 279.975186] tcf_block_get_ext+0x61c/0x1200 [ 279.976080] ingress_init+0x112/0x1c0 [sch_ingress] [ 279.977065] qdisc_create+0x401/0xea0 [ 279.977857] tc_modify_qdisc+0x6f7/0x16d0 [ 279.978695] rtnetlink_rcv_msg+0x5fe/0x9d0 [ 279.979562] netlink_rcv_skb+0x12c/0x360 [ 279.980388] netlink_unicast+0x553/0x790 [ 279.981214] netlink_sendmsg+0x7a1/0xcb0 [ 279.982043] sock_sendmsg+0xc5/0x190 [ 279.982827] ____sys_sendmsg+0x535/0x6b0 [ 279.983703] ___sys_sendmsg+0xeb/0x170 [ 279.984510] __sys_sendmsg+0xb5/0x140 [ 279.985298] do_syscall_64+0x3d/0x90 [ 279.986076] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 279.987532] The buggy address belongs to the object at ffff888147e2bf00 which belongs to the cache kmalloc-192 of size 192 [ 279.989747] The buggy address is located 32 bytes inside of freed 192-byte region [ffff888147e2bf00, ffff888147e2bfc0) [ 279.992367] The buggy address belongs to the physical page: [ 279.993430] page:00000000550f405c refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x147e2a [ 279.995182] head:00000000550f405c order:1 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [ 279.996713] anon flags: 0x200000000010200(slab|head|node=0|zone=2) [ 279.997878] raw: 0200000000010200 ffff888100042a00 0000000000000000 dead000000000001 [ 279.999384] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 [ 280.000894] page dumped because: kasan: bad access detected [ 280.002386] Memory state around the buggy address: [ 280.003338] ffff888147e2be00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 280.004781] ffff888147e2be80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 280.006224] >ffff888147e2bf00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 280.007700] ^ [ 280.008592] ffff888147e2bf80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 280.010035] ffff888147e2c000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 280.011564] ================================================================== Fixes: 59094b1e5094 ("net: sched: use flow block API") Signed-off-by: Vlad Buslov Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c410a736301b..53d315ed9430 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1466,6 +1466,7 @@ static int tcf_block_bind(struct tcf_block *block, err_unroll: list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { + list_del(&block_cb->driver_list); if (i-- > 0) { list_del(&block_cb->list); tcf_block_playback_offloads(block, block_cb->cb, From d6f0687d506d74ff15701823b5223c4d07d91bc1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 26 Apr 2023 23:00:06 -0700 Subject: [PATCH 22/92] sit: update dev->needed_headroom in ipip6_tunnel_bind_dev() [ Upstream commit c88f8d5cd95fd039cff95d682b8e71100c001df0 ] When a tunnel device is bound with the underlying device, its dev->needed_headroom needs to be updated properly. IPv4 tunnels already do the same in ip_tunnel_bind_dev(). Otherwise we may not have enough header room for skb, especially after commit b17f709a2401 ("gue: TX support for using remote checksum offload option"). Fixes: 32b8a8e59c9c ("sit: add IPv4 over IPv4 support") Reported-by: Palash Oswal Link: https://lore.kernel.org/netdev/CAGyP=7fDcSPKu6nttbGwt7RXzE3uyYxLjCSE97J64pRxJP8jPA@mail.gmail.com/ Cc: Kuniyuki Iwashima Cc: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/sit.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1ce486a9bc07..9806bd56b95f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1094,12 +1094,13 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, static void ipip6_tunnel_bind_dev(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen = tunnel->hlen + sizeof(struct iphdr); struct net_device *tdev = NULL; - struct ip_tunnel *tunnel; + int hlen = LL_MAX_HEADER; const struct iphdr *iph; struct flowi4 fl4; - tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; if (iph->daddr) { @@ -1122,14 +1123,15 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev && !netif_is_l3_master(tdev)) { - int t_hlen = tunnel->hlen + sizeof(struct iphdr); int mtu; mtu = tdev->mtu - t_hlen; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; WRITE_ONCE(dev->mtu, mtu); + hlen = tdev->hard_header_len + tdev->needed_headroom; } + dev->needed_headroom = t_hlen + hlen; } static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, From f47f0fb5b58b92e0e256fd8e90211fe42093aaad Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Wed, 26 Apr 2023 22:28:15 +0200 Subject: [PATCH 23/92] net: dsa: mv88e6xxx: add mv88e6321 rsvd2cpu [ Upstream commit 6686317855c6997671982d4489ccdd946f644957 ] Add rsvd2cpu capability for mv88e6321 model, to allow proper bpdu processing. Signed-off-by: Angelo Dureghello Fixes: 51c901a775621 ("net: dsa: mv88e6xxx: distinguish Global 2 Rsvd2CPU") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 0b104a90c0d8..321c821876f6 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4182,6 +4182,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, From 3b7798b42e1da6bbea70e6b09c41829865014f25 Mon Sep 17 00:00:00 2001 From: Maxim Korotkov Date: Thu, 19 Jan 2023 13:44:43 +0300 Subject: [PATCH 24/92] writeback: fix call of incorrect macro [ Upstream commit 3e46c89c74f2c38e5337d2cf44b0b551adff1cb4 ] the variable 'history' is of type u16, it may be an error that the hweight32 macro was used for it I guess macro hweight16 should be used Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 2a81490811d0 ("writeback: implement foreign cgroup inode detection") Signed-off-by: Maxim Korotkov Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230119104443.3002-1-korotkov.maxim.s@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 6f18459f5e38..045a3bd520ca 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -700,7 +700,7 @@ void wbc_detach_inode(struct writeback_control *wbc) * is okay. The main goal is avoiding keeping an inode on * the wrong wb for an extended period of time. */ - if (hweight32(history) > WB_FRN_HIST_THR_SLOTS) + if (hweight16(history) > WB_FRN_HIST_THR_SLOTS) inode_switch_wbs(inode, max_id); } From f3fae1b1c721edfadf35c85ac9cecc52d71d6f23 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 26 Apr 2023 08:52:48 +0200 Subject: [PATCH 25/92] watchdog: dw_wdt: Fix the error handling path of dw_wdt_drv_probe() [ Upstream commit 7f5390750645756bd5da2b24fac285f2654dd922 ] The commit in Fixes has only updated the remove function and missed the error handling path of the probe. Add the missing reset_control_assert() call. Fixes: 65a3b6935d92 ("watchdog: dw_wdt: get reset lines from dt") Signed-off-by: Christophe JAILLET Reviewed-by: Philipp Zabel Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/fbb650650bbb33a8fa2fd028c23157bedeed50e1.1682491863.git.christophe.jaillet@wanadoo.fr Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/dw_wdt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index 32d0e1781e63..3cd118281980 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -638,7 +638,7 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) ret = dw_wdt_init_timeouts(dw_wdt, dev); if (ret) - goto out_disable_clk; + goto out_assert_rst; wdd = &dw_wdt->wdd; wdd->ops = &dw_wdt_ops; @@ -669,12 +669,15 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) ret = watchdog_register_device(wdd); if (ret) - goto out_disable_pclk; + goto out_assert_rst; dw_wdt_dbgfs_init(dw_wdt); return 0; +out_assert_rst: + reset_control_assert(dw_wdt->rst); + out_disable_pclk: clk_disable_unprepare(dw_wdt->pclk); From 774da70521564b3ebbc4d53402a960b1726ae94e Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Wed, 26 Apr 2023 15:19:40 +0000 Subject: [PATCH 26/92] net/sched: act_mirred: Add carrier check [ Upstream commit 526f28bd0fbdc699cda31426928802650c1528e5 ] There are cases where the device is adminstratively UP, but operationally down. For example, we have a physical device (Nvidia ConnectX-6 Dx, 25Gbps) who's cable was pulled out, here is its ip link output: 5: ens2f1: mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000 link/ether b8:ce:f6:4b:68:35 brd ff:ff:ff:ff:ff:ff altname enp179s0f1np1 As you can see, it's administratively UP but operationally down. In this case, sending a packet to this port caused a nasty kernel hang (so nasty that we were unable to capture it). Aborting a transmit based on operational status (in addition to administrative status) fixes the issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira v1->v2: Add fixes tag v2->v3: Remove blank line between tags + add change log, suggested by Leon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/act_mirred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 24d561d8d9c9..25dad1921baf 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -244,7 +244,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, goto out; } - if (unlikely(!(dev->flags & IFF_UP))) { + if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); goto out; From 9291aba0ffa0bd15fe3d2677a437328689c15c94 Mon Sep 17 00:00:00 2001 From: Andy Moreton Date: Fri, 28 Apr 2023 12:33:33 +0100 Subject: [PATCH 27/92] sfc: Fix module EEPROM reporting for QSFP modules [ Upstream commit 281900a923d4c50df109b52a22ae3cdac150159b ] The sfc driver does not report QSFP module EEPROM contents correctly as only the first page is fetched from hardware. Commit 0e1a2a3e6e7d ("ethtool: Add SFF-8436 and SFF-8636 max EEPROM length definitions") added ETH_MODULE_SFF_8436_MAX_LEN for the overall size of the EEPROM info, so use that to report the full EEPROM contents. Fixes: 9b17010da57a ("sfc: Add ethtool -m support for QSFP modules") Signed-off-by: Andy Moreton Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/mcdi_port_common.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c index c4fe3c48ac46..eccb97a5d938 100644 --- a/drivers/net/ethernet/sfc/mcdi_port_common.c +++ b/drivers/net/ethernet/sfc/mcdi_port_common.c @@ -974,12 +974,15 @@ static u32 efx_mcdi_phy_module_type(struct efx_nic *efx) /* A QSFP+ NIC may actually have an SFP+ module attached. * The ID is page 0, byte 0. + * QSFP28 is of type SFF_8636, however, this is treated + * the same by ethtool, so we can also treat them the same. */ switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) { - case 0x3: + case 0x3: /* SFP */ return MC_CMD_MEDIA_SFP_PLUS; - case 0xc: - case 0xd: + case 0xc: /* QSFP */ + case 0xd: /* QSFP+ */ + case 0x11: /* QSFP28 */ return MC_CMD_MEDIA_QSFP_PLUS; default: return 0; @@ -1077,7 +1080,7 @@ int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *mo case MC_CMD_MEDIA_QSFP_PLUS: modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; break; default: From 15152b8a4bbb8541c8f8c18e972a7c4030da86b3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 28 Apr 2023 21:27:54 +0100 Subject: [PATCH 28/92] rxrpc: Fix hard call timeout units [ Upstream commit 0d098d83c5d9e107b2df7f5e11f81492f56d2fe7 ] The hard call timeout is specified in the RXRPC_SET_CALL_TIMEOUT cmsg in seconds, so fix the point at which sendmsg() applies it to the call to convert to jiffies from seconds, not milliseconds. Fixes: a158bdd3247b ("rxrpc: Fix timeout of a call that hasn't yet been granted a channel") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rxrpc/sendmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index a670553159ab..1882fea71903 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -753,7 +753,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) fallthrough; case 1: if (p.call.timeouts.hard > 0) { - j = msecs_to_jiffies(p.call.timeouts.hard); + j = p.call.timeouts.hard * HZ; now = jiffies; j += now; WRITE_ONCE(call->expect_term_by, j); From ea7453f5e5b6305e3d0e349f7d332aca7ee73465 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 3 May 2023 12:39:43 +0530 Subject: [PATCH 29/92] octeontx2-pf: Disable packet I/O for graceful exit [ Upstream commit c926252205c424c4842dbdbe02f8e3296f623204 ] At the stage of enabling packet I/O in otx2_open, If mailbox timeout occurs then interface ends up in down state where as hardware packet I/O is enabled. Hence disable packet I/O also before bailing out. Fixes: 1ea0166da050 ("octeontx2-pf: Fix the device state on error") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 161174be51c3..54aeb276b9a0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1589,11 +1589,20 @@ int otx2_open(struct net_device *netdev) otx2_config_pause_frm(pf); err = otx2_rxtx_enable(pf, true); - if (err) + /* If a mbox communication error happens at this point then interface + * will end up in a state such that it is in down state but hardware + * mcam entries are enabled to receive the packets. Hence disable the + * packet I/O. + */ + if (err == EIO) + goto err_disable_rxtx; + else if (err) goto err_tx_stop_queues; return 0; +err_disable_rxtx: + otx2_rxtx_enable(pf, false); err_tx_stop_queues: netif_tx_stop_all_queues(netdev); netif_carrier_off(netdev); From 3605b3318483714f4cda7c7b629d4463bd9ed94e Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 3 May 2023 12:39:44 +0530 Subject: [PATCH 30/92] octeontx2-vf: Detach LF resources on probe cleanup [ Upstream commit 99ae1260fdb5f15beab8a3adfb93a9041c87a2c1 ] When a VF device probe fails due to error in MSIX vector allocation then the resources NIX and NPA LFs were not detached. Fix this by detaching the LFs when MSIX vector allocation fails. Fixes: 3184fb5ba96e ("octeontx2-vf: Virtual function driver support") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 67fabf265fe6..5310b71795ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -542,7 +542,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = otx2vf_realloc_msix_vectors(vf); if (err) - goto err_mbox_destroy; + goto err_detach_rsrc; err = otx2_set_real_num_queues(netdev, qcount, qcount); if (err) From 02359ba526ce4df4fbb1ed03684c1a15c0e6c8da Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 2 May 2023 11:47:40 -0700 Subject: [PATCH 31/92] ionic: remove noise from ethtool rxnfc error msg [ Upstream commit 3711d44fac1f80ea69ecb7315fed05b3812a7401 ] It seems that ethtool is calling into .get_rxnfc more often with ETHTOOL_GRXCLSRLCNT which ionic doesn't know about. We don't need to log a message about it, just return not supported. Fixes: aa3198819bea6 ("ionic: Add RSS support") Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 35c72d4a78b3..8e5b01af85ed 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -693,7 +693,7 @@ static int ionic_get_rxnfc(struct net_device *netdev, info->data = lif->nxqs; break; default: - netdev_err(netdev, "Command parameter %d is not supported\n", + netdev_dbg(netdev, "Command parameter %d is not supported\n", info->cmd); err = -EOPNOTSUPP; } From f00ef2618fa1c941981f3aafb106754377658f30 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 1 May 2023 13:28:57 -0700 Subject: [PATCH 32/92] af_packet: Don't send zero-byte data in packet_sendmsg_spkt(). [ Upstream commit 6a341729fb31b4c5df9f74f24b4b1c98410c9b87 ] syzkaller reported a warning below [0]. We can reproduce it by sending 0-byte data from the (AF_PACKET, SOCK_PACKET) socket via some devices whose dev->hard_header_len is 0. struct sockaddr_pkt addr = { .spkt_family = AF_PACKET, .spkt_device = "tun0", }; int fd; fd = socket(AF_PACKET, SOCK_PACKET, 0); sendto(fd, NULL, 0, 0, (struct sockaddr *)&addr, sizeof(addr)); We have a similar fix for the (AF_PACKET, SOCK_RAW) socket as commit dc633700f00f ("net/af_packet: check len when min_header_len equals to 0"). Let's add the same test for the SOCK_PACKET socket. [0]: skb_assert_len WARNING: CPU: 1 PID: 19945 at include/linux/skbuff.h:2552 skb_assert_len include/linux/skbuff.h:2552 [inline] WARNING: CPU: 1 PID: 19945 at include/linux/skbuff.h:2552 __dev_queue_xmit+0x1f26/0x31d0 net/core/dev.c:4159 Modules linked in: CPU: 1 PID: 19945 Comm: syz-executor.0 Not tainted 6.3.0-rc7-02330-gca6270c12e20 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:skb_assert_len include/linux/skbuff.h:2552 [inline] RIP: 0010:__dev_queue_xmit+0x1f26/0x31d0 net/core/dev.c:4159 Code: 89 de e8 1d a2 85 fd 84 db 75 21 e8 64 a9 85 fd 48 c7 c6 80 2a 1f 86 48 c7 c7 c0 06 1f 86 c6 05 23 cf 27 04 01 e8 fa ee 56 fd <0f> 0b e8 43 a9 85 fd 0f b6 1d 0f cf 27 04 31 ff 89 de e8 e3 a1 85 RSP: 0018:ffff8880217af6e0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffc90001133000 RDX: 0000000000040000 RSI: ffffffff81186922 RDI: 0000000000000001 RBP: ffff8880217af8b0 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff888030045640 R13: ffff8880300456b0 R14: ffff888030045650 R15: ffff888030045718 FS: 00007fc5864da640(0000) GS:ffff88806cd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020005740 CR3: 000000003f856003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: dev_queue_xmit include/linux/netdevice.h:3085 [inline] packet_sendmsg_spkt+0xc4b/0x1230 net/packet/af_packet.c:2066 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg+0x1b4/0x200 net/socket.c:747 ____sys_sendmsg+0x331/0x970 net/socket.c:2503 ___sys_sendmsg+0x11d/0x1c0 net/socket.c:2557 __sys_sendmmsg+0x18c/0x430 net/socket.c:2643 __do_sys_sendmmsg net/socket.c:2672 [inline] __se_sys_sendmmsg net/socket.c:2669 [inline] __x64_sys_sendmmsg+0x9c/0x100 net/socket.c:2669 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3c/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fc58791de5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007fc5864d9cc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007fc58791de5d RDX: 0000000000000001 RSI: 0000000020005740 RDI: 0000000000000004 RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fc58797e530 R15: 0000000000000000 ---[ end trace 0000000000000000 ]--- skb len=0 headroom=16 headlen=0 tailroom=304 mac=(16,0) net=(16,-1) trans=-1 shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0)) csum(0x0 ip_summed=0 complete_sw=0 valid=0 level=0) hash(0x0 sw=0 l4=0) proto=0x0000 pkttype=0 iif=0 dev name=sit0 feat=0x00000006401d7869 sk family=17 type=10 proto=0 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9b6f6a5e0b14..2e766490a739 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1996,7 +1996,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, goto retry; } - if (!dev_validate_header(dev, skb->data, len)) { + if (!dev_validate_header(dev, skb->data, len) || !skb->len) { err = -EINVAL; goto out_unlock; } From 7f497a9451d716d8e46a3907996151a23296d49b Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 26 Apr 2023 15:54:55 -0700 Subject: [PATCH 33/92] drm/amdgpu: add a missing lock for AMDGPU_SCHED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2397e3d8d2e120355201a8310b61929f5a8bd2c0 ] mgr->ctx_handles should be protected by mgr->lock. v2: improve commit message v3: add a Fixes tag Signed-off-by: Chia-I Wu Reviewed-by: Christian König Fixes: 52c6a62c64fa ("drm/amdgpu: add interface for editing a foreign process's priority v3") Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 0da0a0d98672..15c0a3068eab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -66,6 +66,7 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, { struct fd f = fdget(fd); struct amdgpu_fpriv *fpriv; + struct amdgpu_ctx_mgr *mgr; struct amdgpu_ctx *ctx; uint32_t id; int r; @@ -79,8 +80,11 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, return r; } - idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id) + mgr = &fpriv->ctx_mgr; + mutex_lock(&mgr->lock); + idr_for_each_entry(&mgr->ctx_handles, ctx, id) amdgpu_ctx_priority_override(ctx, priority); + mutex_unlock(&mgr->lock); fdput(f); return 0; From 0f1ad0ef60cc07d94256a39b0c594c0cd94f1716 Mon Sep 17 00:00:00 2001 From: Ruliang Lin Date: Thu, 4 May 2023 14:50:53 +0800 Subject: [PATCH 34/92] ALSA: caiaq: input: Add error handling for unsupported input methods in `snd_usb_caiaq_input_init` [ Upstream commit 0d727e1856ef22dd9337199430258cb64cbbc658 ] Smatch complains that: snd_usb_caiaq_input_init() warn: missing error code 'ret' This patch adds a new case to handle the situation where the device does not support any input methods in the `snd_usb_caiaq_input_init` function. It returns an `-EINVAL` error code to indicate that no input methods are supported on the device. Fixes: 523f1dce3743 ("[ALSA] Add Native Instrument usb audio device support") Signed-off-by: Ruliang Lin Reviewed-by: Dongliang Mu Acked-by: Daniel Mack Link: https://lore.kernel.org/r/20230504065054.3309-1-u202112092@hust.edu.cn Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/caiaq/input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/caiaq/input.c b/sound/usb/caiaq/input.c index 1e2cf2f08eec..84f26dce7f5d 100644 --- a/sound/usb/caiaq/input.c +++ b/sound/usb/caiaq/input.c @@ -804,6 +804,7 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *cdev) default: /* no input methods supported on this device */ + ret = -EINVAL; goto exit_free_idev; } From 0773270b131481fb5159bc9d562c3dfac6330d22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Wed, 3 May 2023 00:09:46 +0300 Subject: [PATCH 35/92] net: dsa: mt7530: fix corrupt frames using trgmii on 40 MHz XTAL MT7621 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 37c218d8021e36e226add4bab93d071d30fe0704 ] The multi-chip module MT7530 switch with a 40 MHz oscillator on the MT7621AT, MT7621DAT, and MT7621ST SoCs forwards corrupt frames using trgmii. This is caused by the assumption that MT7621 SoCs have got 150 MHz PLL, hence using the ncpo1 value, 0x0780. My testing shows this value works on Unielec U7621-06, Bartel's testing shows it won't work on Hi-Link HLK-MT7621A and Netgear WAC104. All devices tested have got 40 MHz oscillators. Using the value for 125 MHz PLL, 0x0640, works on all boards at hand. The definitions for 125 MHz PLL exist on the Banana Pi BPI-R2 BSP source code whilst 150 MHz PLL don't. Forwarding frames using trgmii on the MCM MT7530 switch with a 25 MHz oscillator on the said MT7621 SoCs works fine because the ncpo1 value defined for it is for 125 MHz PLL. Change the 150 MHz PLL comment to 125 MHz PLL, and use the 125 MHz PLL ncpo1 values for both oscillator frequencies. Link: https://github.com/BPI-SINOVOIP/BPI-R2-bsp/blob/81d24bbce7d99524d0771a8bdb2d6663e4eb4faa/u-boot-mt/drivers/net/rt2880_eth.c#L2195 Fixes: 7ef6f6f8d237 ("net: dsa: mt7530: Add MT7621 TRGMII mode support") Tested-by: Bartel Eerdekens Signed-off-by: Arınç ÜNAL Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mt7530.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 70155e996f7d..d3b42adef057 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -404,9 +404,9 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) case PHY_INTERFACE_MODE_TRGMII: trgint = 1; if (priv->id == ID_MT7621) { - /* PLL frequency: 150MHz: 1.2GBit */ + /* PLL frequency: 125MHz: 1.0GBit */ if (xtal == HWTRAP_XTAL_40MHZ) - ncpo1 = 0x0780; + ncpo1 = 0x0640; if (xtal == HWTRAP_XTAL_25MHZ) ncpo1 = 0x0a00; } else { /* PLL frequency: 250MHz: 2.0Gbit */ From c043714ef230065fd5303307acbd2ca2627cb567 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:59 +0800 Subject: [PATCH 36/92] virtio_net: split free_unused_bufs() [ Upstream commit 6e345f8c7cd029ad3aaece15ad4425ac26e4eb63 ] This patch separates two functions for freeing sq buf and rq buf from free_unused_bufs(). When supporting the enable/disable tx/rq queue in the future, it is necessary to support separate recovery of a sq buf or a rq buf. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-40-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin Stable-dep-of: f8bb51043945 ("virtio_net: suppress cpu stall when free_unused_bufs") Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 41 ++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 47c9118cc92a..75219c8f4a63 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2747,6 +2747,27 @@ static void free_receive_page_frags(struct virtnet_info *vi) put_page(vi->rq[i].alloc_frag.page); } +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) +{ + if (!is_xdp_frame(buf)) + dev_kfree_skb(buf); + else + xdp_return_frame(ptr_to_xdp(buf)); +} + +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf) +{ + struct virtnet_info *vi = vq->vdev->priv; + int i = vq2rxq(vq); + + if (vi->mergeable_rx_bufs) + put_page(virt_to_head_page(buf)); + else if (vi->big_packets) + give_pages(&vi->rq[i], buf); + else + put_page(virt_to_head_page(buf)); +} + static void free_unused_bufs(struct virtnet_info *vi) { void *buf; @@ -2754,26 +2775,14 @@ static void free_unused_bufs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_frame(buf)) - dev_kfree_skb(buf); - else - xdp_return_frame(ptr_to_xdp(buf)); - } + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) + virtnet_sq_free_unused_buf(vq, buf); } for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->rq[i].vq; - - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (vi->mergeable_rx_bufs) { - put_page(virt_to_head_page(buf)); - } else if (vi->big_packets) { - give_pages(&vi->rq[i], buf); - } else { - put_page(virt_to_head_page(buf)); - } - } + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) + virtnet_rq_free_unused_buf(vq, buf); } } From b6b15de5129e9a02c59640cd43fa1955036ed674 Mon Sep 17 00:00:00 2001 From: Wenliang Wang Date: Thu, 4 May 2023 10:27:06 +0800 Subject: [PATCH 37/92] virtio_net: suppress cpu stall when free_unused_bufs [ Upstream commit f8bb5104394560e29017c25bcade4c6b7aabd108 ] For multi-queue and large ring-size use case, the following error occurred when free_unused_bufs: rcu: INFO: rcu_sched self-detected stall on CPU. Fixes: 986a4f4d452d ("virtio_net: multiqueue support") Signed-off-by: Wenliang Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/virtio_net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 75219c8f4a63..119a32f34b53 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2777,12 +2777,14 @@ static void free_unused_bufs(struct virtnet_info *vi) struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) virtnet_sq_free_unused_buf(vq, buf); + cond_resched(); } for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->rq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) virtnet_rq_free_unused_buf(vq, buf); + cond_resched(); } } From 796616f216d06269aabc4e7801f251f6f9c1c364 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 4 May 2023 16:03:59 +0800 Subject: [PATCH 38/92] net: enetc: check the index of the SFI rather than the handle [ Upstream commit 299efdc2380aac588557f4d0b2ce7bee05bd0cf2 ] We should check whether the current SFI (Stream Filter Instance) table is full before creating a new SFI entry. However, the previous logic checks the handle by mistake and might lead to unpredictable behavior. Fixes: 888ae5a3952b ("net: enetc: add tc flower psfp offload driver") Signed-off-by: Wei Fang Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 5841721c8119..8d92dc6bc994 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -1266,7 +1266,7 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv, int index; index = enetc_get_free_index(priv); - if (sfi->handle < 0) { + if (index < 0) { NL_SET_ERR_MSG_MOD(extack, "No Stream Filter resource!"); err = -ENOSPC; goto free_fmi; From 1ebd0dfb271e023058211e3e6c12a73aa8253982 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Tue, 28 Mar 2023 16:59:08 +0530 Subject: [PATCH 39/92] perf vendor events power9: Remove UTF-8 characters from JSON files [ Upstream commit 5d9df8731c0941f3add30f96745a62586a0c9d52 ] Commit 3c22ba5243040c13 ("perf vendor events powerpc: Update POWER9 events") added and updated power9 PMU JSON events. However some of the JSON events which are part of other.json and pipeline.json files, contains UTF-8 characters in their brief description. Having UTF-8 character could breaks the perf build on some distros. Fix this issue by removing the UTF-8 characters from other.json and pipeline.json files. Result without the fix: [command]# file -i pmu-events/arch/powerpc/power9/* pmu-events/arch/powerpc/power9/cache.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/floating-point.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/frontend.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/marked.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/memory.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/metrics.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/nest_metrics.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/other.json: application/json; charset=utf-8 pmu-events/arch/powerpc/power9/pipeline.json: application/json; charset=utf-8 pmu-events/arch/powerpc/power9/pmc.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/translation.json: application/json; charset=us-ascii [command]# Result with the fix: [command]# file -i pmu-events/arch/powerpc/power9/* pmu-events/arch/powerpc/power9/cache.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/floating-point.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/frontend.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/marked.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/memory.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/metrics.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/nest_metrics.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/other.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/pipeline.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/pmc.json: application/json; charset=us-ascii pmu-events/arch/powerpc/power9/translation.json: application/json; charset=us-ascii [command]# Fixes: 3c22ba5243040c13 ("perf vendor events powerpc: Update POWER9 events") Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Kajol Jain Acked-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Athira Rajeev Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Sukadev Bhattiprolu Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/lkml/ZBxP77deq7ikTxwG@kernel.org/ Link: https://lore.kernel.org/r/20230328112908.113158-1-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/pmu-events/arch/powerpc/power9/other.json | 4 ++-- tools/perf/pmu-events/arch/powerpc/power9/pipeline.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json index 3f69422c21f9..f10bd554521a 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json @@ -1417,7 +1417,7 @@ { "EventCode": "0x45054", "EventName": "PM_FMA_CMPL", - "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. " + "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only." }, { "EventCode": "0x201E8", @@ -2017,7 +2017,7 @@ { "EventCode": "0xC0BC", "EventName": "PM_LSU_FLUSH_OTHER", - "BriefDescription": "Other LSU flushes including: Sync (sync ack from L2 caused search of LRQ for oldest snooped load, This will either signal a Precise Flush of the oldest snooped loa or a Flush Next PPC); Data Valid Flush Next (several cases of this, one example is store and reload are lined up such that a store-hit-reload scenario exists and the CDF has already launched and has gotten bad/stale data); Bad Data Valid Flush Next (might be a few cases of this, one example is a larxa (D$ hit) return data and dval but can't allocate to LMQ (LMQ full or other reason). Already gave dval but can't watch it for snoop_hit_larx. Need to take the “bad dval” back and flush all younger ops)" + "BriefDescription": "Other LSU flushes including: Sync (sync ack from L2 caused search of LRQ for oldest snooped load, This will either signal a Precise Flush of the oldest snooped loa or a Flush Next PPC); Data Valid Flush Next (several cases of this, one example is store and reload are lined up such that a store-hit-reload scenario exists and the CDF has already launched and has gotten bad/stale data); Bad Data Valid Flush Next (might be a few cases of this, one example is a larxa (D$ hit) return data and dval but can't allocate to LMQ (LMQ full or other reason). Already gave dval but can't watch it for snoop_hit_larx. Need to take the 'bad dval' back and flush all younger ops)" }, { "EventCode": "0x5094", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json index d0265f255de2..723bffa41c44 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json @@ -442,7 +442,7 @@ { "EventCode": "0x4D052", "EventName": "PM_2FLOP_CMPL", - "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg " + "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg" }, { "EventCode": "0x1F142", From 604b650fb5742d8a1e654786372473ca3838f2be Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Apr 2023 10:23:35 -0300 Subject: [PATCH 40/92] perf pmu: zfree() expects a pointer to a pointer to zero it after freeing its contents [ Upstream commit 57f14b5ae1a97537f2abd2828ee7212cada7036e ] An audit showed just this one problem with zfree(), fix it. Fixes: 9fbc61f832ebf432 ("perf pmu: Add support for PMU capabilities") Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ac45da0302a7..d322305bc182 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1670,7 +1670,7 @@ static int perf_pmu__new_caps(struct list_head *list, char *name, char *value) return 0; free_name: - zfree(caps->name); + zfree(&caps->name); free_caps: free(caps); From be3517ae6b8093baa5c7e04cd4c8713c6e6eacaa Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 13 Apr 2023 14:46:39 +0200 Subject: [PATCH 41/92] perf map: Delete two variable initialisations before null pointer checks in sort__sym_from_cmp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c160118a90d4acf335993d8d59b02ae2147a524e ] Addresses of two data structure members were determined before corresponding null pointer checks in the implementation of the function “sort__sym_from_cmp”. Thus avoid the risk for undefined behaviour by removing extra initialisations for the local variables “from_l” and “from_r” (also because they were already reassigned with the same value behind this pointer check). This issue was detected by using the Coccinelle software. Fixes: 1b9e97a2a95e4941 ("perf tools: Fix report -F symbol_from for data without branch info") Signed-off-by: Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/cocci/54a21fea-64e3-de67-82ef-d61b90ffad05@web.de/ Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/sort.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5e9e96452b9e..42806102010b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -873,8 +873,7 @@ static int hist_entry__dso_to_filter(struct hist_entry *he, int type, static int64_t sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) { - struct addr_map_symbol *from_l = &left->branch_info->from; - struct addr_map_symbol *from_r = &right->branch_info->from; + struct addr_map_symbol *from_l, *from_r; if (!left->branch_info || !right->branch_info) return cmp_null(left->branch_info, right->branch_info); From 87a1fa0ad74e844c1801515df94ff0af0a0121b8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 17 Apr 2023 22:25:09 +0200 Subject: [PATCH 42/92] crypto: sun8i-ss - Fix a test in sun8i_ss_setup_ivs() [ Upstream commit 8fd91151ebcb21b3f2f2bf158ac6092192550b2b ] SS_ENCRYPTION is (0 << 7 = 0), so the test can never be true. Use a direct comparison to SS_ENCRYPTION instead. The same king of test is already done the same way in sun8i_ss_run_task(). Fixes: 359e893e8af4 ("crypto: sun8i-ss - rework handling of IV") Signed-off-by: Christophe JAILLET Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 49c7a8b464dd..8a94f812e6d2 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -132,7 +132,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq) } rctx->p_iv[i] = a; /* we need to setup all others IVs only in the decrypt way */ - if (rctx->op_dir & SS_ENCRYPTION) + if (rctx->op_dir == SS_ENCRYPTION) return 0; todo = min(len, sg_dma_len(sg)); len -= todo; From 7a4db11f00f3412eb4931fcb5e5cc91675b89b6e Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Thu, 27 Apr 2023 01:28:41 +0000 Subject: [PATCH 43/92] perf symbols: Fix return incorrect build_id size in elf_read_build_id() [ Upstream commit 1511e4696acb715a4fe48be89e1e691daec91c0e ] In elf_read_build_id(), if gnu build_id is found, should return the size of the actually copied data. If descsz is greater thanBuild_ID_SIZE, write_buildid data access may occur. Fixes: be96ea8ffa788dcc ("perf symbols: Fix issue with binaries using 16-bytes buildids (v2)") Reported-by: Will Ochowicz Signed-off-by: Yang Jihong Tested-by: Will Ochowicz Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/lkml/CWLP265MB49702F7BA3D6D8F13E4B1A719C649@CWLP265MB4970.GBRP265.PROD.OUTLOOK.COM/T/ Link: https://lore.kernel.org/r/20230427012841.231729-1-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol-elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 5221f272f85c..b171d134ce87 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -548,7 +548,7 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size) size_t sz = min(size, descsz); memcpy(bf, ptr, sz); memset(bf + sz, 0, size - sz); - err = descsz; + err = sz; break; } } From 08fa23adbdccd1dca92e0f6a14debfc8c240ace6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 12 Apr 2023 11:33:09 +0100 Subject: [PATCH 44/92] btrfs: fix btrfs_prev_leaf() to not return the same key twice commit 6f932d4ef007d6a4ae03badcb749fbb8f49196f6 upstream. A call to btrfs_prev_leaf() may end up returning a path that points to the same item (key) again. This happens if while btrfs_prev_leaf(), after we release the path, a concurrent insertion happens, which moves items off from a sibling into the front of the previous leaf, and an item with the computed previous key does not exists. For example, suppose we have the two following leaves: Leaf A ------------------------------------------------------------- | ... key (300 96 10) key (300 96 15) key (300 96 16) | ------------------------------------------------------------- slot 20 slot 21 slot 22 Leaf B ------------------------------------------------------------- | key (300 96 20) key (300 96 21) key (300 96 22) ... | ------------------------------------------------------------- slot 0 slot 1 slot 2 If we call btrfs_prev_leaf(), from btrfs_previous_item() for example, with a path pointing to leaf B and slot 0 and the following happens: 1) At btrfs_prev_leaf() we compute the previous key to search as: (300 96 19), which is a key that does not exists in the tree; 2) Then we call btrfs_release_path() at btrfs_prev_leaf(); 3) Some other task inserts a key at leaf A, that sorts before the key at slot 20, for example it has an objectid of 299. In order to make room for the new key, the key at slot 22 is moved to the front of leaf B. This happens at push_leaf_right(), called from split_leaf(). After this leaf B now looks like: -------------------------------------------------------------------------------- | key (300 96 16) key (300 96 20) key (300 96 21) key (300 96 22) ... | -------------------------------------------------------------------------------- slot 0 slot 1 slot 2 slot 3 4) At btrfs_prev_leaf() we call btrfs_search_slot() for the computed previous key: (300 96 19). Since the key does not exists, btrfs_search_slot() returns 1 and with a path pointing to leaf B and slot 1, the item with key (300 96 20); 5) This makes btrfs_prev_leaf() return a path that points to slot 1 of leaf B, the same key as before it was called, since the key at slot 0 of leaf B (300 96 16) is less than the computed previous key, which is (300 96 19); 6) As a consequence btrfs_previous_item() returns a path that points again to the item with key (300 96 20). For some users of btrfs_prev_leaf() or btrfs_previous_item() this may not be functional a problem, despite not making sense to return a new path pointing again to the same item/key. However for a caller such as tree-log.c:log_dir_items(), this has a bad consequence, as it can result in not logging some dir index deletions in case the directory is being logged without holding the inode's VFS lock (logging triggered while logging a child inode for example) - for the example scenario above, in case the dir index keys 17, 18 and 19 were deleted in the current transaction. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5addd1e36a8e..3e55245e54e7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5160,10 +5160,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { struct btrfs_key key; + struct btrfs_key orig_key; struct btrfs_disk_key found_key; int ret; btrfs_item_key_to_cpu(path->nodes[0], &key, 0); + orig_key = key; if (key.offset > 0) { key.offset--; @@ -5180,8 +5182,36 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) btrfs_release_path(path); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) + if (ret <= 0) return ret; + + /* + * Previous key not found. Even if we were at slot 0 of the leaf we had + * before releasing the path and calling btrfs_search_slot(), we now may + * be in a slot pointing to the same original key - this can happen if + * after we released the path, one of more items were moved from a + * sibling leaf into the front of the leaf we had due to an insertion + * (see push_leaf_right()). + * If we hit this case and our slot is > 0 and just decrement the slot + * so that the caller does not process the same key again, which may or + * may not break the caller, depending on its logic. + */ + if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) { + btrfs_item_key(path->nodes[0], &found_key, path->slots[0]); + ret = comp_keys(&found_key, &orig_key); + if (ret == 0) { + if (path->slots[0] > 0) { + path->slots[0]--; + return 0; + } + /* + * At slot 0, same key as before, it means orig_key is + * the lowest, leftmost, key in the tree. We're done. + */ + return 1; + } + } + btrfs_item_key(path->nodes[0], &found_key, 0); ret = comp_keys(&found_key, &key); /* From 148b16cd30b202999ec5b534e3e5d8ab4b766f21 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 2 May 2023 16:00:06 -0400 Subject: [PATCH 45/92] btrfs: don't free qgroup space unless specified commit d246331b78cbef86237f9c22389205bc9b4e1cc1 upstream. Boris noticed in his simple quotas testing that he was getting a leak with Sweet Tea's change to subvol create that stopped doing a transaction commit. This was just a side effect of that change. In the delayed inode code we have an optimization that will free extra reservations if we think we can pack a dir item into an already modified leaf. Previously this wouldn't be triggered in the subvolume create case because we'd commit the transaction, it was still possible but much harder to trigger. It could actually be triggered if we did a mkdir && subvol create with qgroups enabled. This occurs because in btrfs_insert_delayed_dir_index(), which gets called when we're adding the dir item, we do the following: btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); if we're able to skip reserving space. The problem here is that trans->block_rsv points at the temporary block rsv for the subvolume create, which has qgroup reservations in the block rsv. This is a problem because btrfs_block_rsv_release() will do the following: if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { qgroup_to_release = block_rsv->qgroup_rsv_reserved - block_rsv->qgroup_rsv_size; block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; } The temporary block rsv just has ->qgroup_rsv_reserved set, ->qgroup_rsv_size == 0. The optimization in btrfs_insert_delayed_dir_index() sets ->qgroup_rsv_reserved = 0. Then later on when we call btrfs_subvolume_release_metadata() which has btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release); btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release); qgroup_to_release is set to 0, and we do not convert the reserved metadata space. The problem here is that the block rsv code has been unconditionally messing with ->qgroup_rsv_reserved, because the main place this is used is delalloc, and any time we call btrfs_block_rsv_release() we do it with qgroup_to_release set, and thus do the proper accounting. The subvolume code is the only other code that uses the qgroup reservation stuff, but it's intermingled with the above optimization, and thus was getting its reservation freed out from underneath it and thus leaking the reserved space. The solution is to simply not mess with the qgroup reservations if we don't have qgroup_to_release set. This works with the existing code as anything that messes with the delalloc reservations always have qgroup_to_release set. This fixes the leak that Boris was observing. Reviewed-by: Qu Wenruo CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-rsv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index bc920afe23bf..eb41dc2f6b40 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -121,7 +121,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, } else { num_bytes = 0; } - if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { + if (qgroup_to_release_ret && + block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { qgroup_to_release = block_rsv->qgroup_rsv_reserved - block_rsv->qgroup_rsv_size; block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; From 829c20fd7a7b2fe440a1a5261b6f366e7398ba92 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Wed, 26 Apr 2023 14:53:23 +0300 Subject: [PATCH 46/92] btrfs: print-tree: parent bytenr must be aligned to sector size commit c87f318e6f47696b4040b58f460d5c17ea0280e6 upstream. Check nodesize to sectorsize in alignment check in print_extent_item. The comment states that and this is correct, similar check is done elsewhere in the functions. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ea57788eb76d ("btrfs: require only sector size alignment for parent eb bytenr") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Qu Wenruo Signed-off-by: Anastasia Belova Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/print-tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index c62771f3af8c..e98ba4e091b3 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -147,10 +147,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) pr_cont("shared data backref parent %llu count %u\n", offset, btrfs_shared_data_ref_count(eb, sref)); /* - * offset is supposed to be a tree block which - * must be aligned to nodesize. + * Offset is supposed to be a tree block which must be + * aligned to sectorsize. */ - if (!IS_ALIGNED(offset, eb->fs_info->nodesize)) + if (!IS_ALIGNED(offset, eb->fs_info->sectorsize)) pr_info( "\t\t\t(parent %llu not aligned to sectorsize %u)\n", offset, eb->fs_info->sectorsize); From 8c2cdb7326f0672db21d1d114aaf7b2193ca21ae Mon Sep 17 00:00:00 2001 From: Pawel Witek Date: Fri, 5 May 2023 17:14:59 +0200 Subject: [PATCH 47/92] cifs: fix pcchunk length type in smb2_copychunk_range commit d66cde50c3c868af7abddafce701bb86e4a93039 upstream. Change type of pcchunk->Length from u32 to u64 to match smb2_copychunk_range arguments type. Fixes the problem where performing server-side copy with CIFS_IOC_COPYCHUNK_FILE ioctl resulted in incomplete copy of large files while returning -EINVAL. Fixes: 9bf0c9cd4314 ("CIFS: Fix SMB2/SMB3 Copy offload support (refcopy) for large files") Cc: Signed-off-by: Pawel Witek Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 120c7cb11b02..015b7b37edee 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1784,7 +1784,7 @@ smb2_copychunk_range(const unsigned int xid, pcchunk->SourceOffset = cpu_to_le64(src_off); pcchunk->TargetOffset = cpu_to_le64(dest_off); pcchunk->Length = - cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk)); + cpu_to_le32(min_t(u64, len, tcon->max_bytes_chunk)); /* Request server copy to target from src identified by key */ kfree(retbuf); From 4b87eec73e47e70bcafe98c36f32483a033e1688 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 5 May 2023 23:03:23 +0200 Subject: [PATCH 48/92] platform/x86: touchscreen_dmi: Add upside-down quirk for GDIX1002 ts on the Juno Tablet commit 6abfa99ce52f61a31bcfc2aaaae09006f5665495 upstream. The Juno Computers Juno Tablet has an upside-down mounted Goodix touchscreen. Add a quirk to invert both axis to correct for this. Link: https://junocomputers.com/us/product/juno-tablet/ Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230505210323.43177-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/touchscreen_dmi.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index b96fbc8dba09..0d1356e4e2c3 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -381,6 +381,11 @@ static const struct ts_dmi_data glavey_tm800a550l_data = { .properties = glavey_tm800a550l_props, }; +static const struct ts_dmi_data gdix1002_00_upside_down_data = { + .acpi_name = "GDIX1002:00", + .properties = gdix1001_upside_down_props, +}; + static const struct property_entry gp_electronic_t701_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 960), PROPERTY_ENTRY_U32("touchscreen-size-y", 640), @@ -1227,6 +1232,18 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BIOS_VERSION, "jumperx.T87.KFBNEEA"), }, }, + { + /* Juno Tablet */ + .driver_data = (void *)&gdix1002_00_upside_down_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Default string"), + /* Both product- and board-name being "Default string" is somewhat rare */ + DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_MATCH(DMI_BOARD_NAME, "Default string"), + /* Above matches are too generic, add partial bios-version match */ + DMI_MATCH(DMI_BIOS_VERSION, "JP2V1."), + }, + }, { /* Mediacom WinPad 7.0 W700 (same hw as Wintron surftab 7") */ .driver_data = (void *)&trekstor_surftab_wintron70_data, From 73aef14407de964b2a05d08c1f431298c7c3b7b5 Mon Sep 17 00:00:00 2001 From: Andrey Avdeev Date: Sun, 30 Apr 2023 11:01:10 +0300 Subject: [PATCH 49/92] platform/x86: touchscreen_dmi: Add info for the Dexp Ursus KX210i commit 4b65f95c87c35699bc6ad540d6b9dd7f950d0924 upstream. Add touchscreen info for the Dexp Ursus KX210i Signed-off-by: Andrey Avdeev Link: https://lore.kernel.org/r/ZE4gRgzRQCjXFYD0@avdeevavpc Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/touchscreen_dmi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 0d1356e4e2c3..55a18cd0c298 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -327,6 +327,22 @@ static const struct ts_dmi_data dexp_ursus_7w_data = { .properties = dexp_ursus_7w_props, }; +static const struct property_entry dexp_ursus_kx210i_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 5), + PROPERTY_ENTRY_U32("touchscreen-min-y", 2), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1720), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1137), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-dexp-ursus-kx210i.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data dexp_ursus_kx210i_data = { + .acpi_name = "MSSL1680:00", + .properties = dexp_ursus_kx210i_props, +}; + static const struct property_entry digma_citi_e200_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), PROPERTY_ENTRY_U32("touchscreen-size-y", 1500), @@ -1123,6 +1139,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "7W"), }, }, + { + /* DEXP Ursus KX210i */ + .driver_data = (void *)&dexp_ursus_kx210i_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "INSYDE Corp."), + DMI_MATCH(DMI_PRODUCT_NAME, "S107I"), + }, + }, { /* Digma Citi E200 */ .driver_data = (void *)&digma_citi_e200_data, From 2d65c97777e5b4a845637800d5d7b648f5772106 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Apr 2023 18:32:19 +0200 Subject: [PATCH 50/92] inotify: Avoid reporting event with invalid wd commit c915d8f5918bea7c3962b09b8884ca128bfd9b0c upstream. When inotify_freeing_mark() races with inotify_handle_inode_event() it can happen that inotify_handle_inode_event() sees that i_mark->wd got already reset to -1 and reports this value to userspace which can confuse the inotify listener. Avoid the problem by validating that wd is sensible (and pretend the mark got removed before the event got generated otherwise). CC: stable@vger.kernel.org Fixes: 7e790dd5fc93 ("inotify: fix error paths in inotify_update_watch") Message-Id: <20230424163219.9250-1-jack@suse.cz> Reported-by: syzbot+4a06d4373fd52f0b2f9c@syzkaller.appspotmail.com Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/notify/inotify/inotify_fsnotify.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 1901d799909b..66991c7fef9e 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -64,7 +64,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, struct fsnotify_event *fsn_event; struct fsnotify_group *group = inode_mark->group; int ret; - int len = 0; + int len = 0, wd; int alloc_len = sizeof(struct inotify_event_info); struct mem_cgroup *old_memcg; @@ -79,6 +79,13 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, i_mark = container_of(inode_mark, struct inotify_inode_mark, fsn_mark); + /* + * We can be racing with mark being detached. Don't report event with + * invalid wd. + */ + wd = READ_ONCE(i_mark->wd); + if (wd == -1) + return 0; /* * Whoever is interested in the event, pays for the allocation. Do not * trigger OOM killer in the target monitoring memcg as it may have @@ -109,7 +116,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, fsn_event = &event->fse; fsnotify_init_event(fsn_event, 0); event->mask = mask; - event->wd = i_mark->wd; + event->wd = wd; event->sync_cookie = cookie; event->name_len = len; if (len) From bbad64abd610fb007ab0650fe3a953261c0b8998 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 5 Mar 2023 20:00:34 -0800 Subject: [PATCH 51/92] sh: math-emu: fix macro redefined warning commit 58a49ad90939386a8682e842c474a0d2c00ec39c upstream. Fix a warning that was reported by the kernel test robot: In file included from ../include/math-emu/soft-fp.h:27, from ../arch/sh/math-emu/math.c:22: ../arch/sh/include/asm/sfp-machine.h:17: warning: "__BYTE_ORDER" redefined 17 | #define __BYTE_ORDER __BIG_ENDIAN In file included from ../arch/sh/math-emu/math.c:21: ../arch/sh/math-emu/sfp-util.h:71: note: this is the location of the previous definition 71 | #define __BYTE_ORDER __LITTLE_ENDIAN Fixes: b929926f01f2 ("sh: define __BIG_ENDIAN for math-emu") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: lore.kernel.org/r/202111121827.6v6SXtVv-lkp@intel.com Cc: John Paul Adrian Glaubitz Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Reviewed-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Reviewed-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/20230306040037.20350-5-rdunlap@infradead.org Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Greg Kroah-Hartman --- arch/sh/math-emu/sfp-util.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/sh/math-emu/sfp-util.h b/arch/sh/math-emu/sfp-util.h index 784f541344f3..bda50762b3d3 100644 --- a/arch/sh/math-emu/sfp-util.h +++ b/arch/sh/math-emu/sfp-util.h @@ -67,7 +67,3 @@ } while (0) #define abort() return 0 - -#define __BYTE_ORDER __LITTLE_ENDIAN - - From f19bc0d2a68c9c4f0e4d177642468d3e0fbb87cf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 5 Mar 2023 20:00:37 -0800 Subject: [PATCH 52/92] sh: mcount.S: fix build error when PRINTK is not enabled commit c2bd1e18c6f85c0027da2e5e7753b9bfd9f8e6dc upstream. Fix a build error in mcount.S when CONFIG_PRINTK is not enabled. Fixes this build error: sh2-linux-ld: arch/sh/lib/mcount.o: in function `stack_panic': (.text+0xec): undefined reference to `dump_stack' Fixes: e460ab27b6c3 ("sh: Fix up stack overflow check with ftrace disabled.") Signed-off-by: Randy Dunlap Cc: John Paul Adrian Glaubitz Cc: Yoshinori Sato Cc: Rich Felker Suggested-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Reviewed-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/20230306040037.20350-8-rdunlap@infradead.org Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Greg Kroah-Hartman --- arch/sh/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index 97b0e26cf05a..7bc1b10b81c9 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -18,7 +18,7 @@ config SH_STANDARD_BIOS config STACK_DEBUG bool "Check for stack overflows" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && PRINTK help This option will cause messages to be printed if free stack space drops below a certain limit. Saying Y here will add overhead to From 9245f34029b7c09d40442d20f4056a2ca5b53ae5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 5 Mar 2023 20:00:33 -0800 Subject: [PATCH 53/92] sh: init: use OF_EARLY_FLATTREE for early init commit 6cba655543c7959f8a6d2979b9d40a6a66b7ed4f upstream. When CONFIG_OF_EARLY_FLATTREE and CONFIG_SH_DEVICE_TREE are not set, SH3 build fails with a call to early_init_dt_scan(), so in arch/sh/kernel/setup.c and arch/sh/kernel/head_32.S, use CONFIG_OF_EARLY_FLATTREE instead of CONFIG_OF_FLATTREE. Fixes this build error: ../arch/sh/kernel/setup.c: In function 'sh_fdt_init': ../arch/sh/kernel/setup.c:262:26: error: implicit declaration of function 'early_init_dt_scan' [-Werror=implicit-function-declaration] 262 | if (!dt_virt || !early_init_dt_scan(dt_virt)) { Fixes: 03767daa1387 ("sh: fix build regression with CONFIG_OF && !CONFIG_OF_FLATTREE") Fixes: eb6b6930a70f ("sh: fix memory corruption of unflattened device tree") Signed-off-by: Randy Dunlap Suggested-by: Rob Herring Cc: Frank Rowand Cc: devicetree@vger.kernel.org Cc: Rich Felker Cc: Yoshinori Sato Cc: Geert Uytterhoeven Cc: John Paul Adrian Glaubitz Cc: linux-sh@vger.kernel.org Cc: stable@vger.kernel.org Reviewed-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/20230306040037.20350-4-rdunlap@infradead.org Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Greg Kroah-Hartman --- arch/sh/kernel/head_32.S | 6 +++--- arch/sh/kernel/setup.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S index 4adbd4ade319..b603b7968b38 100644 --- a/arch/sh/kernel/head_32.S +++ b/arch/sh/kernel/head_32.S @@ -64,7 +64,7 @@ ENTRY(_stext) ldc r0, r6_bank #endif -#ifdef CONFIG_OF_FLATTREE +#ifdef CONFIG_OF_EARLY_FLATTREE mov r4, r12 ! Store device tree blob pointer in r12 #endif @@ -315,7 +315,7 @@ ENTRY(_stext) 10: #endif -#ifdef CONFIG_OF_FLATTREE +#ifdef CONFIG_OF_EARLY_FLATTREE mov.l 8f, r0 ! Make flat device tree available early. jsr @r0 mov r12, r4 @@ -346,7 +346,7 @@ ENTRY(stack_start) 5: .long start_kernel 6: .long cpu_init 7: .long init_thread_union -#if defined(CONFIG_OF_FLATTREE) +#if defined(CONFIG_OF_EARLY_FLATTREE) 8: .long sh_fdt_init #endif diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 4144be650d41..556e463a43d2 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -244,7 +244,7 @@ void __init __weak plat_early_device_setup(void) { } -#ifdef CONFIG_OF_FLATTREE +#ifdef CONFIG_OF_EARLY_FLATTREE void __ref sh_fdt_init(phys_addr_t dt_phys) { static int done = 0; @@ -329,7 +329,7 @@ void __init setup_arch(char **cmdline_p) /* Let earlyprintk output early console messages */ sh_early_platform_driver_probe("earlyprintk", 1, 1); -#ifdef CONFIG_OF_FLATTREE +#ifdef CONFIG_OF_EARLY_FLATTREE #ifdef CONFIG_USE_BUILTIN_DTB unflatten_and_copy_device_tree(); #else From 62fe5d74ef7f4cbab25c223f3b894c7d47e8893a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 5 Mar 2023 20:00:32 -0800 Subject: [PATCH 54/92] sh: nmi_debug: fix return value of __setup handler commit d1155e4132de712a9d3066e2667ceaad39a539c5 upstream. __setup() handlers should return 1 to obsolete_checksetup() in init/main.c to indicate that the boot option has been handled. A return of 0 causes the boot option/value to be listed as an Unknown kernel parameter and added to init's (limited) argument or environment strings. Also, error return codes don't mean anything to obsolete_checksetup() -- only non-zero (usually 1) or zero. So return 1 from nmi_debug_setup(). Fixes: 1e1030dccb10 ("sh: nmi_debug support.") Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Cc: John Paul Adrian Glaubitz Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Cc: stable@vger.kernel.org Reviewed-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/20230306040037.20350-3-rdunlap@infradead.org Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Greg Kroah-Hartman --- arch/sh/kernel/nmi_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/kernel/nmi_debug.c b/arch/sh/kernel/nmi_debug.c index 11777867c6f5..a212b645b4cf 100644 --- a/arch/sh/kernel/nmi_debug.c +++ b/arch/sh/kernel/nmi_debug.c @@ -49,7 +49,7 @@ static int __init nmi_debug_setup(char *str) register_die_notifier(&nmi_debug_nb); if (*str != '=') - return 0; + return 1; for (p = str + 1; *p; p = sep + 1) { sep = strchr(p, ','); @@ -70,6 +70,6 @@ static int __init nmi_debug_setup(char *str) break; } - return 0; + return 1; } __setup("nmi_debug", nmi_debug_setup); From 30e0834becd8c35781628d3d244bdcd14e57cc31 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Mon, 20 Mar 2023 16:18:22 -0600 Subject: [PATCH 55/92] remoteproc: stm32: Call of_node_put() on iteration error commit ccadca5baf5124a880f2bb50ed1ec265415f025b upstream. Function of_phandle_iterator_next() calls of_node_put() on the last device_node it iterated over, but when the loop exits prematurely it has to be called explicitly. Fixes: 13140de09cc2 ("remoteproc: stm32: add an ST stm32_rproc driver") Cc: stable@vger.kernel.org Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20230320221826.2728078-2-mathieu.poirier@linaro.org Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/stm32_rproc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index d2414cc1d90d..24760d8ea637 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -231,11 +231,13 @@ static int stm32_rproc_parse_memory_regions(struct rproc *rproc) while (of_phandle_iterator_next(&it) == 0) { rmem = of_reserved_mem_lookup(it.node); if (!rmem) { + of_node_put(it.node); dev_err(dev, "unable to acquire memory-region\n"); return -EINVAL; } if (stm32_rproc_pa_to_da(rproc, rmem->base, &da) < 0) { + of_node_put(it.node); dev_err(dev, "memory region not valid %pa\n", &rmem->base); return -EINVAL; @@ -262,8 +264,10 @@ static int stm32_rproc_parse_memory_regions(struct rproc *rproc) it.node->name); } - if (!mem) + if (!mem) { + of_node_put(it.node); return -ENOMEM; + } rproc_add_carveout(rproc, mem); index++; From 777952ce11873c66a2a0bb556258dcbbde320430 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Mon, 20 Mar 2023 16:18:23 -0600 Subject: [PATCH 56/92] remoteproc: st: Call of_node_put() on iteration error commit 8a74918948b40317a5b5bab9739d13dcb5de2784 upstream. Function of_phandle_iterator_next() calls of_node_put() on the last device_node it iterated over, but when the loop exits prematurely it has to be called explicitly. Fixes: 3df52ed7f269 ("remoteproc: st: add reserved memory support") Cc: stable@vger.kernel.org Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20230320221826.2728078-3-mathieu.poirier@linaro.org Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/st_remoteproc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/remoteproc/st_remoteproc.c b/drivers/remoteproc/st_remoteproc.c index a3268d95a50e..e6bd3c7a950a 100644 --- a/drivers/remoteproc/st_remoteproc.c +++ b/drivers/remoteproc/st_remoteproc.c @@ -129,6 +129,7 @@ static int st_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw) while (of_phandle_iterator_next(&it) == 0) { rmem = of_reserved_mem_lookup(it.node); if (!rmem) { + of_node_put(it.node); dev_err(dev, "unable to acquire memory-region\n"); return -EINVAL; } @@ -150,8 +151,10 @@ static int st_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw) it.node->name); } - if (!mem) + if (!mem) { + of_node_put(it.node); return -ENOMEM; + } rproc_add_carveout(rproc, mem); index++; From fed6318e47222c88655b7c3a9b216fa94899a6e2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Feb 2023 16:06:27 +0100 Subject: [PATCH 57/92] ARM: dts: exynos: fix WM8960 clock name in Itop Elite commit 6c950c20da38debf1ed531e0b972bd8b53d1c11f upstream. The WM8960 Linux driver expects the clock to be named "mclk". Otherwise the clock will be ignored and not prepared/enabled by the driver. Cc: Fixes: 339b2fb36a67 ("ARM: dts: exynos: Add TOPEET itop elite based board") Link: https://lore.kernel.org/r/20230217150627.779764-3-krzysztof.kozlowski@linaro.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos4412-itop-elite.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos4412-itop-elite.dts b/arch/arm/boot/dts/exynos4412-itop-elite.dts index f6d0a5f5d339..9a2a49420d4d 100644 --- a/arch/arm/boot/dts/exynos4412-itop-elite.dts +++ b/arch/arm/boot/dts/exynos4412-itop-elite.dts @@ -179,7 +179,7 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; clocks = <&pmu_system_controller 0>; - clock-names = "MCLK1"; + clock-names = "mclk"; wlf,shared-lrclk; #sound-dai-cells = <0>; }; From 4a638a958230748031967662f91d7d14f5077869 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 12 Feb 2023 19:58:18 +0100 Subject: [PATCH 58/92] ARM: dts: s5pv210: correct MIPI CSIS clock name commit 665b9459bb53b8f19bd1541567e1fe9782c83c4b upstream. The Samsung S5P/Exynos MIPI CSIS bindings and Linux driver expect first clock name to be "csis". Otherwise the driver fails to probe. Fixes: 94ad0f6d9278 ("ARM: dts: Add Device tree for s5pv210 SoC") Cc: Link: https://lore.kernel.org/r/20230212185818.43503-2-krzysztof.kozlowski@linaro.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/s5pv210.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index eb7e3660ada7..81ab9fe9897f 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -583,7 +583,7 @@ csis0: csis@fa600000 { interrupts = <29>; clocks = <&clocks CLK_CSIS>, <&clocks SCLK_CSIS>; - clock-names = "clk_csis", + clock-names = "csis", "sclk_csis"; bus-width = <4>; status = "disabled"; From 957904f531fd857a92743b11fbc9c9ffdf7f3207 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Apr 2023 11:18:48 -0700 Subject: [PATCH 59/92] f2fs: fix potential corruption when moving a directory commit d94772154e524b329a168678836745d2773a6e02 upstream. F2FS has the same issue in ext4_rename causing crash revealed by xfstests/generic/707. See also commit 0813299c586b ("ext4: Fix possible corruption when moving a directory") CC: stable@vger.kernel.org Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/namei.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 72b109685db4..98263180c0ea 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -969,12 +969,20 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } + /* + * Copied from ext4_rename: we need to protect against old.inode + * directory getting converted from inline directory format into + * a normal one. + */ + if (S_ISDIR(old_inode->i_mode)) + inode_lock_nested(old_inode, I_MUTEX_NONDIR2); + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) err = PTR_ERR(old_page); - goto out; + goto out_unlock_old; } if (S_ISDIR(old_inode->i_mode)) { @@ -1082,6 +1090,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_unlock_op(sbi); + if (S_ISDIR(old_inode->i_mode)) + inode_unlock(old_inode); + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); @@ -1096,6 +1107,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_put_page(old_dir_page, 0); out_old: f2fs_put_page(old_page, 0); +out_unlock_old: + if (S_ISDIR(old_inode->i_mode)) + inode_unlock(old_inode); out: if (whiteout) iput(whiteout); From c85327c1e93ceef2bef9bb2d367e8d2b08b88a62 Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Wed, 12 Apr 2023 17:35:07 +0000 Subject: [PATCH 60/92] drm/panel: otm8009a: Set backlight parent to panel device commit ab4f869fba6119997f7630d600049762a2b014fa upstream. This is the logical place to put the backlight device, and it also fixes a kernel crash if the MIPI host is removed. Previously the backlight device would be unregistered twice when this happened - once as a child of the MIPI host through `mipi_dsi_host_unregister`, and once when the panel device is destroyed. Fixes: 12a6cbd4f3f1 ("drm/panel: otm8009a: Use new backlight API") Signed-off-by: James Cowgill Cc: stable@vger.kernel.org Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230412173450.199592-1-james.cowgill@blaize.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/panel/panel-orisetech-otm8009a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c index 6ac1accade80..b19597b836e3 100644 --- a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c +++ b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c @@ -458,7 +458,7 @@ static int otm8009a_probe(struct mipi_dsi_device *dsi) DRM_MODE_CONNECTOR_DSI); ctx->bl_dev = devm_backlight_device_register(dev, dev_name(dev), - dsi->host->dev, ctx, + dev, ctx, &otm8009a_backlight_ops, NULL); if (IS_ERR(ctx->bl_dev)) { From eed63477ae065b69ee757e38a1cd48ec51de9719 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 2 May 2023 11:59:08 -0400 Subject: [PATCH 61/92] drm/amdgpu: fix an amdgpu_irq_put() issue in gmc_v9_0_hw_fini() commit 922a76ba31adf84e72bc947267385be420c689ee upstream. As made mention of in commit 08c677cb0b43 ("drm/amdgpu: fix amdgpu_irq_put call trace in gmc_v10_0_hw_fini") and commit 13af556104fa ("drm/amdgpu: fix amdgpu_irq_put call trace in gmc_v11_0_hw_fini"). It is meaningless to call amdgpu_irq_put() for gmc.ecc_irq. So, remove it from gmc_v9_0_hw_fini(). Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2522 Fixes: 3029c855d79f ("drm/amdgpu: Fix desktop freezed after gpu-reset") Reviewed-by: Mario Limonciello Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 1673bf3bae55..945cbdbc2f99 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1686,7 +1686,6 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } - amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); return 0; From 20ca90ceda71ed90a4d6960acbe7d5e120b40c0d Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Sat, 6 May 2023 20:06:45 +0800 Subject: [PATCH 62/92] drm/amdgpu/gfx: disable gfx9 cp_ecc_error_irq only when enabling legacy gfx ras commit 4a76680311330aefe5074bed8f06afa354b85c48 upstream. gfx9 cp_ecc_error_irq is only enabled when legacy gfx ras is assert. So in gfx_v9_0_hw_fini, interrupt disablement for cp_ecc_error_irq should be executed under such condition, otherwise, an amdgpu_irq_put calltrace will occur. [ 7283.170322] RIP: 0010:amdgpu_irq_put+0x45/0x70 [amdgpu] [ 7283.170964] RSP: 0018:ffff9a5fc3967d00 EFLAGS: 00010246 [ 7283.170967] RAX: ffff98d88afd3040 RBX: ffff98d89da20000 RCX: 0000000000000000 [ 7283.170969] RDX: 0000000000000000 RSI: ffff98d89da2bef8 RDI: ffff98d89da20000 [ 7283.170971] RBP: ffff98d89da20000 R08: ffff98d89da2ca18 R09: 0000000000000006 [ 7283.170973] R10: ffffd5764243c008 R11: 0000000000000000 R12: 0000000000001050 [ 7283.170975] R13: ffff98d89da38978 R14: ffffffff999ae15a R15: ffff98d880130105 [ 7283.170978] FS: 0000000000000000(0000) GS:ffff98d996f00000(0000) knlGS:0000000000000000 [ 7283.170981] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7283.170983] CR2: 00000000f7a9d178 CR3: 00000001c42ea000 CR4: 00000000003506e0 [ 7283.170986] Call Trace: [ 7283.170988] [ 7283.170989] gfx_v9_0_hw_fini+0x1c/0x6d0 [amdgpu] [ 7283.171655] amdgpu_device_ip_suspend_phase2+0x101/0x1a0 [amdgpu] [ 7283.172245] amdgpu_device_suspend+0x103/0x180 [amdgpu] [ 7283.172823] amdgpu_pmops_freeze+0x21/0x60 [amdgpu] [ 7283.173412] pci_pm_freeze+0x54/0xc0 [ 7283.173419] ? __pfx_pci_pm_freeze+0x10/0x10 [ 7283.173425] dpm_run_callback+0x98/0x200 [ 7283.173430] __device_suspend+0x164/0x5f0 v2: drop gfx11 as it's fixed in a different solution by retiring cp_ecc_irq funcs(Hawking) Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2522 Signed-off-by: Guchun Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 38f4c7474487..629671f66b31 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3943,7 +3943,8 @@ static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); From c1420276be7a98df0074584bb9c1709cbc1a9df5 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Sat, 6 May 2023 16:52:59 +0800 Subject: [PATCH 63/92] drm/amdgpu: disable sdma ecc irq only when sdma RAS is enabled in suspend commit 8b229ada2669b74fdae06c83fbfda5a5a99fc253 upstream. sdma_v4_0_ip is shared on a few asics, but in sdma_v4_0_hw_fini, driver unconditionally disables ecc_irq which is only enabled on those asics enabling sdma ecc. This will introduce a warning in suspend cycle on those chips with sdma ip v4.0, while without sdma ecc. So this patch correct this. [ 7283.166354] RIP: 0010:amdgpu_irq_put+0x45/0x70 [amdgpu] [ 7283.167001] RSP: 0018:ffff9a5fc3967d08 EFLAGS: 00010246 [ 7283.167019] RAX: ffff98d88afd3770 RBX: 0000000000000001 RCX: 0000000000000000 [ 7283.167023] RDX: 0000000000000000 RSI: ffff98d89da30390 RDI: ffff98d89da20000 [ 7283.167025] RBP: ffff98d89da20000 R08: 0000000000036838 R09: 0000000000000006 [ 7283.167028] R10: ffffd5764243c008 R11: 0000000000000000 R12: ffff98d89da30390 [ 7283.167030] R13: ffff98d89da38978 R14: ffffffff999ae15a R15: ffff98d880130105 [ 7283.167032] FS: 0000000000000000(0000) GS:ffff98d996f00000(0000) knlGS:0000000000000000 [ 7283.167036] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7283.167039] CR2: 00000000f7a9d178 CR3: 00000001c42ea000 CR4: 00000000003506e0 [ 7283.167041] Call Trace: [ 7283.167046] [ 7283.167048] sdma_v4_0_hw_fini+0x38/0xa0 [amdgpu] [ 7283.167704] amdgpu_device_ip_suspend_phase2+0x101/0x1a0 [amdgpu] [ 7283.168296] amdgpu_device_suspend+0x103/0x180 [amdgpu] [ 7283.168875] amdgpu_pmops_freeze+0x21/0x60 [amdgpu] [ 7283.169464] pci_pm_freeze+0x54/0xc0 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2522 Signed-off-by: Guchun Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 1f2e2460e121..dbcaef3f35da 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1979,9 +1979,11 @@ static int sdma_v4_0_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) return 0; - for (i = 0; i < adev->sdma.num_instances; i++) { - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, - AMDGPU_SDMA_IRQ_INSTANCE0 + i); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + } } sdma_v4_0_ctx_switch_enable(adev, false); From 77fd800d3f1be3fd2ad3465ad0ac774f5b8f8263 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Sun, 9 Apr 2023 09:42:29 -0700 Subject: [PATCH 64/92] HID: wacom: Set a default resolution for older tablets commit 08a46b4190d345544d04ce4fe2e1844b772b8535 upstream. Some older tablets may not report physical maximum for X/Y coordinates. Set a default to prevent undefined resolution. Signed-off-by: Ping Cheng Link: https://lore.kernel.org/r/20230409164229.29777-1-ping.cheng@wacom.com Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index afb94b89fc4d..b65ed38c45e6 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1853,6 +1853,7 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage, int fmax = field->logical_maximum; unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid); int resolution_code = code; + int resolution = hidinput_calc_abs_res(field, resolution_code); if (equivalent_usage == HID_DG_TWIST) { resolution_code = ABS_RZ; @@ -1875,8 +1876,15 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage, switch (type) { case EV_ABS: input_set_abs_params(input, code, fmin, fmax, fuzz, 0); - input_abs_set_res(input, code, - hidinput_calc_abs_res(field, resolution_code)); + + /* older tablet may miss physical usage */ + if ((code == ABS_X || code == ABS_Y) && !resolution) { + resolution = WACOM_INTUOS_RES; + hid_warn(input, + "Wacom usage (%d) missing resolution \n", + code); + } + input_abs_set_res(input, code, resolution); break; case EV_KEY: input_set_capability(input, EV_KEY, code); From 4502ebbdc0e21e44a8a706428e420ae9c1bb9bba Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Fri, 24 Feb 2023 08:26:43 -0800 Subject: [PATCH 65/92] HID: wacom: insert timestamp to packed Bluetooth (BT) events commit 17d793f3ed53080dab6bbeabfc82de890c901001 upstream. To fully utilize the BT polling/refresh rate, a few input events are sent together to reduce event delay. This causes issue to the timestamp generated by input_sync since all the events in the same packet would pretty much have the same timestamp. This patch inserts time interval to the events by averaging the total time used for sending the packet. This decision was mainly based on observing the actual time interval between each BT polling. The interval doesn't seem to be constant, due to the network and system environment. So, using solutions other than averaging doesn't end up with valid timestamps. Signed-off-by: Ping Cheng Reviewed-by: Jason Gerecke Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 26 ++++++++++++++++++++++++++ drivers/hid/wacom_wac.h | 1 + 2 files changed, 27 insertions(+) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index b65ed38c45e6..6c64165fae13 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1265,6 +1265,9 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) struct input_dev *pen_input = wacom->pen_input; unsigned char *data = wacom->data; + int number_of_valid_frames = 0; + int time_interval = 15000000; + ktime_t time_packet_received = ktime_get(); int i; if (wacom->features.type == INTUOSP2_BT || @@ -1285,12 +1288,30 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) wacom->id[0] |= (wacom->serial[0] >> 32) & 0xFFFFF; } + /* number of valid frames */ for (i = 0; i < pen_frames; i++) { unsigned char *frame = &data[i*pen_frame_len + 1]; bool valid = frame[0] & 0x80; + + if (valid) + number_of_valid_frames++; + } + + if (number_of_valid_frames) { + if (wacom->hid_data.time_delayed) + time_interval = ktime_get() - wacom->hid_data.time_delayed; + time_interval /= number_of_valid_frames; + wacom->hid_data.time_delayed = time_packet_received; + } + + for (i = 0; i < number_of_valid_frames; i++) { + unsigned char *frame = &data[i*pen_frame_len + 1]; + bool valid = frame[0] & 0x80; bool prox = frame[0] & 0x40; bool range = frame[0] & 0x20; bool invert = frame[0] & 0x10; + int frames_number_reversed = number_of_valid_frames - i - 1; + int event_timestamp = time_packet_received - frames_number_reversed * time_interval; if (!valid) continue; @@ -1303,6 +1324,7 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) wacom->tool[0] = 0; wacom->id[0] = 0; wacom->serial[0] = 0; + wacom->hid_data.time_delayed = 0; return; } @@ -1339,6 +1361,7 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) get_unaligned_le16(&frame[11])); } } + if (wacom->tool[0]) { input_report_abs(pen_input, ABS_PRESSURE, get_unaligned_le16(&frame[5])); if (wacom->features.type == INTUOSP2_BT || @@ -1362,6 +1385,9 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) wacom->shared->stylus_in_proximity = prox; + /* add timestamp to unpack the frames */ + input_set_timestamp(pen_input, event_timestamp); + input_sync(pen_input); } } diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index ca172efcf072..88badfbae999 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -320,6 +320,7 @@ struct hid_data { int bat_connected; int ps_connected; bool pad_input_event_flag; + int time_delayed; }; struct wacom_remote_data { From 0cb6e9e7d3f1694ee55227f89a968ea4694db759 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 3 Sep 2021 09:51:36 +0200 Subject: [PATCH 66/92] KVM: x86: hyper-v: Avoid calling kvm_make_vcpus_request_mask() with vcpu_mask==NULL commit 6470accc7ba948b0b3aca22b273fe84ec638a116 upstream. In preparation to making kvm_make_vcpus_request_mask() use for_each_set_bit() switch kvm_hv_flush_tlb() to calling kvm_make_all_cpus_request() for 'all cpus' case. Note: kvm_make_all_cpus_request() (unlike kvm_make_vcpus_request_mask()) currently dynamically allocates cpumask on each call and this is suboptimal. Both kvm_make_all_cpus_request() and kvm_make_vcpus_request_mask() are going to be switched to using pre-allocated per-cpu masks. Reviewed-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Message-Id: <20210903075141.403071-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini Acked-by: Sean Christopherson Fixes: 6100066358ee ("KVM: Optimize kvm_make_vcpus_request_mask() a bit") Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/hyperv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 09ec1cda2d68..e03e320847cd 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1562,16 +1562,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, cpumask_clear(&hv_vcpu->tlb_flush); - vcpu_mask = all_cpus ? NULL : - sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, - vp_bitmap, vcpu_bitmap); - /* * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't * analyze it here, flush TLB regardless of the specified address space. */ - kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, - NULL, vcpu_mask, &hv_vcpu->tlb_flush); + if (all_cpus) { + kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST); + } else { + vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, + vp_bitmap, vcpu_bitmap); + + kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, + NULL, vcpu_mask, &hv_vcpu->tlb_flush); + } ret_success: /* We always do full TLB flush, set rep_done = rep_cnt. */ From 529f41f0eb1ef995bfa83c121c3cfe3a0720119a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jun 2022 10:09:03 -0400 Subject: [PATCH 67/92] KVM: x86: do not report a vCPU as preempted outside instruction boundaries commit 6cd88243c7e03845a450795e134b488fc2afb736 upstream. If a vCPU is outside guest mode and is scheduled out, it might be in the process of making a memory access. A problem occurs if another vCPU uses the PV TLB flush feature during the period when the vCPU is scheduled out, and a virtual address has already been translated but has not yet been accessed, because this is equivalent to using a stale TLB entry. To avoid this, only report a vCPU as preempted if sure that the guest is at an instruction boundary. A rescheduling request will be delivered to the host physical CPU as an external interrupt, so for simplicity consider any vmexit *not* instruction boundary except for external interrupts. It would in principle be okay to report the vCPU as preempted also if it is sleeping in kvm_vcpu_block(): a TLB flush IPI will incur the vmentry/vmexit overhead unnecessarily, and optimistic spinning is also unlikely to succeed. However, leave it for later because right now kvm_vcpu_check_block() is doing memory accesses. Even though the TLB flush issue only applies to virtual memory address, it's very much preferrable to be conservative. Reported-by: Jann Horn Signed-off-by: Paolo Bonzini [OP: use VCPU_STAT() for debugfs entries] Signed-off-by: Ovidiu Panait Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 1 + arch/x86/kvm/x86.c | 22 ++++++++++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 660012ab7bfa..af4b4d3c6ff6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -553,6 +553,7 @@ struct kvm_vcpu_arch { u64 ia32_misc_enable_msr; u64 smbase; u64 smi_count; + bool at_instruction_boundary; bool tpr_access_reporting; bool xsaves_enabled; u64 ia32_xss; @@ -1061,6 +1062,8 @@ struct kvm_vcpu_stat { u64 req_event; u64 halt_poll_success_ns; u64 halt_poll_fail_ns; + u64 preemption_reported; + u64 preemption_other; }; struct x86_instruction_info; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5775983fec56..7b2b61309d8a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3983,6 +3983,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) { + if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR) + vcpu->arch.at_instruction_boundary = true; } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d65cc9363567..9aedc7b06da7 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6510,6 +6510,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) return; handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); + vcpu->arch.at_instruction_boundary = true; } static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ccc8d1b972c..c1351335d22f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -231,6 +231,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("l1d_flush", l1d_flush), VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VCPU_STAT("preemption_reported", preemption_reported), + VCPU_STAT("preemption_other", preemption_other), VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped), VM_STAT("mmu_pte_write", mmu_pte_write), VM_STAT("mmu_pde_zapped", mmu_pde_zapped), @@ -4052,6 +4054,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) struct kvm_host_map map; struct kvm_steal_time *st; + /* + * The vCPU can be marked preempted if and only if the VM-Exit was on + * an instruction boundary and will not trigger guest emulation of any + * kind (see vcpu_run). Vendor specific code controls (conservatively) + * when this is true, for example allowing the vCPU to be marked + * preempted if and only if the VM-Exit was due to a host interrupt. + */ + if (!vcpu->arch.at_instruction_boundary) { + vcpu->stat.preemption_other++; + return; + } + + vcpu->stat.preemption_reported++; if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -9357,6 +9372,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.l1tf_flush_l1d = true; for (;;) { + /* + * If another guest vCPU requests a PV TLB flush in the middle + * of instruction emulation, the rest of the emulation could + * use a stale page translation. Assume that any code after + * this point can start executing an instruction. + */ + vcpu->arch.at_instruction_boundary = false; if (kvm_vcpu_running(vcpu)) { r = vcpu_enter_guest(vcpu); } else { From 5d356d902e9d5b1aaaaf2326d365340fa8a90c1b Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 16 Jan 2023 10:00:15 +0800 Subject: [PATCH 68/92] ext4: fix WARNING in mb_find_extent commit fa08a7b61dff8a4df11ff1e84abfc214b487caf7 upstream. Syzbot found the following issue: EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support! EXT4-fs (loop0): orphan cleanup on readonly fs ------------[ cut here ]------------ WARNING: CPU: 1 PID: 5067 at fs/ext4/mballoc.c:1869 mb_find_extent+0x8a1/0xe30 Modules linked in: CPU: 1 PID: 5067 Comm: syz-executor307 Not tainted 6.2.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 RIP: 0010:mb_find_extent+0x8a1/0xe30 fs/ext4/mballoc.c:1869 RSP: 0018:ffffc90003c9e098 EFLAGS: 00010293 RAX: ffffffff82405731 RBX: 0000000000000041 RCX: ffff8880783457c0 RDX: 0000000000000000 RSI: 0000000000000041 RDI: 0000000000000040 RBP: 0000000000000040 R08: ffffffff82405723 R09: ffffed10053c9402 R10: ffffed10053c9402 R11: 1ffff110053c9401 R12: 0000000000000000 R13: ffffc90003c9e538 R14: dffffc0000000000 R15: ffffc90003c9e2cc FS: 0000555556665300(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000056312f6796f8 CR3: 0000000022437000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ext4_mb_complex_scan_group+0x353/0x1100 fs/ext4/mballoc.c:2307 ext4_mb_regular_allocator+0x1533/0x3860 fs/ext4/mballoc.c:2735 ext4_mb_new_blocks+0xddf/0x3db0 fs/ext4/mballoc.c:5605 ext4_ext_map_blocks+0x1868/0x6880 fs/ext4/extents.c:4286 ext4_map_blocks+0xa49/0x1cc0 fs/ext4/inode.c:651 ext4_getblk+0x1b9/0x770 fs/ext4/inode.c:864 ext4_bread+0x2a/0x170 fs/ext4/inode.c:920 ext4_quota_write+0x225/0x570 fs/ext4/super.c:7105 write_blk fs/quota/quota_tree.c:64 [inline] get_free_dqblk+0x34a/0x6d0 fs/quota/quota_tree.c:130 do_insert_tree+0x26b/0x1aa0 fs/quota/quota_tree.c:340 do_insert_tree+0x722/0x1aa0 fs/quota/quota_tree.c:375 do_insert_tree+0x722/0x1aa0 fs/quota/quota_tree.c:375 do_insert_tree+0x722/0x1aa0 fs/quota/quota_tree.c:375 dq_insert_tree fs/quota/quota_tree.c:401 [inline] qtree_write_dquot+0x3b6/0x530 fs/quota/quota_tree.c:420 v2_write_dquot+0x11b/0x190 fs/quota/quota_v2.c:358 dquot_acquire+0x348/0x670 fs/quota/dquot.c:444 ext4_acquire_dquot+0x2dc/0x400 fs/ext4/super.c:6740 dqget+0x999/0xdc0 fs/quota/dquot.c:914 __dquot_initialize+0x3d0/0xcf0 fs/quota/dquot.c:1492 ext4_process_orphan+0x57/0x2d0 fs/ext4/orphan.c:329 ext4_orphan_cleanup+0xb60/0x1340 fs/ext4/orphan.c:474 __ext4_fill_super fs/ext4/super.c:5516 [inline] ext4_fill_super+0x81cd/0x8700 fs/ext4/super.c:5644 get_tree_bdev+0x400/0x620 fs/super.c:1282 vfs_get_tree+0x88/0x270 fs/super.c:1489 do_new_mount+0x289/0xad0 fs/namespace.c:3145 do_mount fs/namespace.c:3488 [inline] __do_sys_mount fs/namespace.c:3697 [inline] __se_sys_mount+0x2d3/0x3c0 fs/namespace.c:3674 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Add some debug information: mb_find_extent: mb_find_extent block=41, order=0 needed=64 next=0 ex=0/41/1@3735929054 64 64 7 block_bitmap: ff 3f 0c 00 fc 01 00 00 d2 3d 00 00 00 00 00 00 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Acctually, blocks per group is 64, but block bitmap indicate at least has 128 blocks. Now, ext4_validate_block_bitmap() didn't check invalid block's bitmap if set. To resolve above issue, add check like fsck "Padding at end of block bitmap is not set". Cc: stable@kernel.org Reported-by: syzbot+68223fe9f6c95ad43bed@syzkaller.appspotmail.com Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230116020015.1506120-1-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/balloc.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1afd60fcd772..50a0e90e8af9 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -303,6 +303,22 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, return desc; } +static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb, + ext4_group_t block_group, + struct buffer_head *bh) +{ + ext4_grpblk_t next_zero_bit; + unsigned long bitmap_size = sb->s_blocksize * 8; + unsigned int offset = num_clusters_in_group(sb, block_group); + + if (bitmap_size <= offset) + return 0; + + next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset); + + return (next_zero_bit < bitmap_size ? next_zero_bit : 0); +} + /* * Return the block number which was discovered to be invalid, or 0 if * the block bitmap is valid. @@ -401,6 +417,15 @@ static int ext4_validate_block_bitmap(struct super_block *sb, EXT4_GROUP_INFO_BBITMAP_CORRUPT); return -EFSCORRUPTED; } + blk = ext4_valid_block_bitmap_padding(sb, block_group, bh); + if (unlikely(blk != 0)) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set", + block_group, blk); + ext4_mark_group_bitmap_corrupted(sb, block_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); + return -EFSCORRUPTED; + } set_buffer_verified(bh); verified: ext4_unlock_group(sb, block_group); From 0dde3141c527b09b96bef1e7eeb18b8127810ce9 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 4 May 2023 12:15:25 +0000 Subject: [PATCH 69/92] ext4: avoid a potential slab-out-of-bounds in ext4_group_desc_csum commit 4f04351888a83e595571de672e0a4a8b74f4fb31 upstream. When modifying the block device while it is mounted by the filesystem, syzbot reported the following: BUG: KASAN: slab-out-of-bounds in crc16+0x206/0x280 lib/crc16.c:58 Read of size 1 at addr ffff888075f5c0a8 by task syz-executor.2/15586 CPU: 1 PID: 15586 Comm: syz-executor.2 Not tainted 6.2.0-rc5-syzkaller-00205-gc96618275234 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/12/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1b1/0x290 lib/dump_stack.c:106 print_address_description+0x74/0x340 mm/kasan/report.c:306 print_report+0x107/0x1f0 mm/kasan/report.c:417 kasan_report+0xcd/0x100 mm/kasan/report.c:517 crc16+0x206/0x280 lib/crc16.c:58 ext4_group_desc_csum+0x81b/0xb20 fs/ext4/super.c:3187 ext4_group_desc_csum_set+0x195/0x230 fs/ext4/super.c:3210 ext4_mb_clear_bb fs/ext4/mballoc.c:6027 [inline] ext4_free_blocks+0x191a/0x2810 fs/ext4/mballoc.c:6173 ext4_remove_blocks fs/ext4/extents.c:2527 [inline] ext4_ext_rm_leaf fs/ext4/extents.c:2710 [inline] ext4_ext_remove_space+0x24ef/0x46a0 fs/ext4/extents.c:2958 ext4_ext_truncate+0x177/0x220 fs/ext4/extents.c:4416 ext4_truncate+0xa6a/0xea0 fs/ext4/inode.c:4342 ext4_setattr+0x10c8/0x1930 fs/ext4/inode.c:5622 notify_change+0xe50/0x1100 fs/attr.c:482 do_truncate+0x200/0x2f0 fs/open.c:65 handle_truncate fs/namei.c:3216 [inline] do_open fs/namei.c:3561 [inline] path_openat+0x272b/0x2dd0 fs/namei.c:3714 do_filp_open+0x264/0x4f0 fs/namei.c:3741 do_sys_openat2+0x124/0x4e0 fs/open.c:1310 do_sys_open fs/open.c:1326 [inline] __do_sys_creat fs/open.c:1402 [inline] __se_sys_creat fs/open.c:1396 [inline] __x64_sys_creat+0x11f/0x160 fs/open.c:1396 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f72f8a8c0c9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f72f97e3168 EFLAGS: 00000246 ORIG_RAX: 0000000000000055 RAX: ffffffffffffffda RBX: 00007f72f8bac050 RCX: 00007f72f8a8c0c9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000280 RBP: 00007f72f8ae7ae9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffd165348bf R14: 00007f72f97e3300 R15: 0000000000022000 Replace le16_to_cpu(sbi->s_es->s_desc_size) with sbi->s_desc_size It reduces ext4's compiled text size, and makes the code more efficient (we remove an extra indirect reference and a potential byte swap on big endian systems), and there is no downside. It also avoids the potential KASAN / syzkaller failure, as a bonus. Reported-by: syzbot+fc51227e7100c9294894@syzkaller.appspotmail.com Reported-by: syzbot+8785e41224a3afd04321@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=70d28d11ab14bd7938f3e088365252aa923cff42 Link: https://syzkaller.appspot.com/bug?id=b85721b38583ecc6b5e72ff524c67302abbc30f3 Link: https://lore.kernel.org/all/000000000000ece18705f3b20934@google.com/ Fixes: 717d50e4971b ("Ext4: Uninitialized Block Groups") Cc: stable@vger.kernel.org Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20230504121525.3275886-1-tudor.ambarus@linaro.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e940fb07ef2e..ba445afbd617 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2831,11 +2831,9 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group, crc = crc16(crc, (__u8 *)gdp, offset); offset += sizeof(gdp->bg_checksum); /* skip checksum */ /* for checksum of struct ext4_group_desc do the rest...*/ - if (ext4_has_feature_64bit(sb) && - offset < le16_to_cpu(sbi->s_es->s_desc_size)) + if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size) crc = crc16(crc, (__u8 *)gdp + offset, - le16_to_cpu(sbi->s_es->s_desc_size) - - offset); + sbi->s_desc_size - offset); out: return cpu_to_le16(crc); From 4aa7f744fa3727818991802ac58331d188389b75 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 4 May 2023 14:55:24 +0200 Subject: [PATCH 70/92] ext4: fix data races when using cached status extents commit 492888df0c7b42fc0843631168b0021bc4caee84 upstream. When using cached extent stored in extent status tree in tree->cache_es another process holding ei->i_es_lock for reading can be racing with us setting new value of tree->cache_es. If the compiler would decide to refetch tree->cache_es at an unfortunate moment, it could result in a bogus in_range() check. Fix the possible race by using READ_ONCE() when using tree->cache_es only under ei->i_es_lock for reading. Cc: stable@kernel.org Reported-by: syzbot+4a03518df1e31b537066@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/000000000000d3b33905fa0fd4a6@google.com Suggested-by: Dmitry Vyukov Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230504125524.10802-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents_status.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index aa99a3659edf..fee54ab42bba 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -269,14 +269,12 @@ static void __es_find_extent_range(struct inode *inode, /* see if the extent has been cached */ es->es_lblk = es->es_len = es->es_pblk = 0; - if (tree->cache_es) { - es1 = tree->cache_es; - if (in_range(lblk, es1->es_lblk, es1->es_len)) { - es_debug("%u cached by [%u/%u) %llu %x\n", - lblk, es1->es_lblk, es1->es_len, - ext4_es_pblock(es1), ext4_es_status(es1)); - goto out; - } + es1 = READ_ONCE(tree->cache_es); + if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) { + es_debug("%u cached by [%u/%u) %llu %x\n", + lblk, es1->es_lblk, es1->es_len, + ext4_es_pblock(es1), ext4_es_status(es1)); + goto out; } es1 = __es_tree_search(&tree->root, lblk); @@ -295,7 +293,7 @@ static void __es_find_extent_range(struct inode *inode, } if (es1 && matching_fn(es1)) { - tree->cache_es = es1; + WRITE_ONCE(tree->cache_es, es1); es->es_lblk = es1->es_lblk; es->es_len = es1->es_len; es->es_pblk = es1->es_pblk; @@ -934,14 +932,12 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, /* find extent in cache firstly */ es->es_lblk = es->es_len = es->es_pblk = 0; - if (tree->cache_es) { - es1 = tree->cache_es; - if (in_range(lblk, es1->es_lblk, es1->es_len)) { - es_debug("%u cached by [%u/%u)\n", - lblk, es1->es_lblk, es1->es_len); - found = 1; - goto out; - } + es1 = READ_ONCE(tree->cache_es); + if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) { + es_debug("%u cached by [%u/%u)\n", + lblk, es1->es_lblk, es1->es_len); + found = 1; + goto out; } node = tree->root.rb_node; From 08838aeefa6fd310cddf8218fddda0a90fdf069e Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 5 May 2023 21:24:29 +0800 Subject: [PATCH 71/92] ext4: check iomap type only if ext4_iomap_begin() does not fail commit fa83c34e3e56b3c672af38059e066242655271b1 upstream. When ext4_iomap_overwrite_begin() calls ext4_iomap_begin() map blocks may fail for some reason (e.g. memory allocation failure, bare disk write), and later because "iomap->type ! = IOMAP_MAPPED" triggers WARN_ON(). When ext4 iomap_begin() returns an error, it is normal that the type of iomap->type may not match the expectation. Therefore, we only determine if iomap->type is as expected when ext4_iomap_begin() is executed successfully. Cc: stable@kernel.org Reported-by: syzbot+08106c4b7d60702dbc14@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/00000000000015760b05f9b4eee9@google.com Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230505132429.714648-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a93b93de5a60..735109b9e88d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3564,7 +3564,7 @@ static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset, */ flags &= ~IOMAP_WRITE; ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap); - WARN_ON_ONCE(iomap->type != IOMAP_MAPPED); + WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED); return ret; } From 37c69da3adc45fc34df0b8d07a158a6fa5b2a3f7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 5 May 2023 22:20:29 -0400 Subject: [PATCH 72/92] ext4: improve error recovery code paths in __ext4_remount() commit 4c0b4818b1f636bc96359f7817a2d8bab6370162 upstream. If there are failures while changing the mount options in __ext4_remount(), we need to restore the old mount options. This commit fixes two problem. The first is there is a chance that we will free the old quota file names before a potential failure leading to a use-after-free. The second problem addressed in this commit is if there is a failed read/write to read-only transition, if the quota has already been suspended, we need to renable quota handling. Cc: stable@kernel.org Link: https://lore.kernel.org/r/20230506142419.984260-2-tytso@mit.edu Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ba445afbd617..8694be513241 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6028,9 +6028,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } #ifdef CONFIG_QUOTA - /* Release old quota file names */ - for (i = 0; i < EXT4_MAXQUOTAS; i++) - kfree(old_opts.s_qf_names[i]); if (enable_quota) { if (sb_any_quota_suspended(sb)) dquot_resume(sb, -1); @@ -6040,6 +6037,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } } + /* Release old quota file names */ + for (i = 0; i < EXT4_MAXQUOTAS; i++) + kfree(old_opts.s_qf_names[i]); #endif if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks) ext4_release_system_zone(sb); @@ -6059,6 +6059,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) return 0; restore_opts: + /* + * If there was a failing r/w to ro transition, we may need to + * re-enable quota + */ + if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) && + sb_any_quota_suspended(sb)) + dquot_resume(sb, -1); sb->s_flags = old_sb_flags; sbi->s_mount_opt = old_opts.s_mount_opt; sbi->s_mount_opt2 = old_opts.s_mount_opt2; From 5f8b55136ad787aed2c184f7cb3e93772ae637a3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 6 May 2023 21:04:01 -0400 Subject: [PATCH 73/92] ext4: fix deadlock when converting an inline directory in nojournal mode commit f4ce24f54d9cca4f09a395f3eecce20d6bec4663 upstream. In no journal mode, ext4_finish_convert_inline_dir() can self-deadlock by calling ext4_handle_dirty_dirblock() when it already has taken the directory lock. There is a similar self-deadlock in ext4_incvert_inline_data_nolock() for data files which we'll fix at the same time. A simple reproducer demonstrating the problem: mke2fs -Fq -t ext2 -O inline_data -b 4k /dev/vdc 64 mount -t ext4 -o dirsync /dev/vdc /vdc cd /vdc mkdir file0 cd file0 touch file0 touch file1 attr -s BurnSpaceInEA -V abcde . touch supercalifragilisticexpialidocious Cc: stable@kernel.org Link: https://lore.kernel.org/r/20230507021608.1290720-1-tytso@mit.edu Reported-by: syzbot+91dccab7c64e2850a4e5@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=ba84cc80a9491d65416bc7877e1650c87530fe8a Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 0758f606f006..fd0567d0219c 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1172,6 +1172,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, ext4_initialize_dirent_tail(dir_block, inode->i_sb->s_blocksize); set_buffer_uptodate(dir_block); + unlock_buffer(dir_block); err = ext4_handle_dirty_dirblock(handle, inode, dir_block); if (err) return err; @@ -1245,6 +1246,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle, if (!S_ISDIR(inode->i_mode)) { memcpy(data_bh->b_data, buf, inline_size); set_buffer_uptodate(data_bh); + unlock_buffer(data_bh); error = ext4_handle_dirty_metadata(handle, inode, data_bh); } else { @@ -1252,7 +1254,6 @@ static int ext4_convert_inline_data_nolock(handle_t *handle, buf, inline_size); } - unlock_buffer(data_bh); out_restore: if (error) ext4_restore_inline_data(handle, inode, iloc, buf, inline_size); From 4597554b4f7b29e7fd78aa449bab648f8da4ee2c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 12 May 2023 15:11:02 -0400 Subject: [PATCH 74/92] ext4: add bounds checking in get_max_inline_xattr_value_size() commit 2220eaf90992c11d888fe771055d4de330385f01 upstream. Normally the extended attributes in the inode body would have been checked when the inode is first opened, but if someone is writing to the block device while the file system is mounted, it's possible for the inode table to get corrupted. Add bounds checking to avoid reading beyond the end of allocated memory if this happens. Reported-by: syzbot+1966db24521e5f6e23f7@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=1966db24521e5f6e23f7 Cc: stable@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index fd0567d0219c..d0bd1046f4a0 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -32,6 +32,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode, struct ext4_xattr_ibody_header *header; struct ext4_xattr_entry *entry; struct ext4_inode *raw_inode; + void *end; int free, min_offs; if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) @@ -55,14 +56,23 @@ static int get_max_inline_xattr_value_size(struct inode *inode, raw_inode = ext4_raw_inode(iloc); header = IHDR(inode, raw_inode); entry = IFIRST(header); + end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; /* Compute min_offs. */ - for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { + while (!IS_LAST_ENTRY(entry)) { + void *next = EXT4_XATTR_NEXT(entry); + + if (next >= end) { + EXT4_ERROR_INODE(inode, + "corrupt xattr in inline inode"); + return 0; + } if (!entry->e_value_inum && entry->e_value_size) { size_t offs = le16_to_cpu(entry->e_value_offs); if (offs < min_offs) min_offs = offs; } + entry = next; } free = min_offs - ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32); From d88fe8e6112696238deeaf09820db183663eabca Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 12 May 2023 15:16:27 -0400 Subject: [PATCH 75/92] ext4: bail out of ext4_xattr_ibody_get() fails for any reason commit 2a534e1d0d1591e951f9ece2fb460b2ff92edabd upstream. In ext4_update_inline_data(), if ext4_xattr_ibody_get() fails for any reason, it's best if we just fail as opposed to stumbling on, especially if the failure is EFSCORRUPTED. Cc: stable@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d0bd1046f4a0..979935c078fb 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -358,7 +358,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, error = ext4_xattr_ibody_get(inode, i.name_index, i.name, value, len); - if (error == -ENODATA) + if (error < 0) goto out; BUFFER_TRACE(is.iloc.bh, "get_write_access"); From b0fc279de4bf17e1710bb7e83906538ff8f11111 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 29 Apr 2023 16:14:46 -0400 Subject: [PATCH 76/92] ext4: remove a BUG_ON in ext4_mb_release_group_pa() commit 463808f237cf73e98a1a45ff7460c2406a150a0b upstream. If a malicious fuzzer overwrites the ext4 superblock while it is mounted such that the s_first_data_block is set to a very large number, the calculation of the block group can underflow, and trigger a BUG_ON check. Change this to be an ext4_warning so that we don't crash the kernel. Cc: stable@kernel.org Link: https://lore.kernel.org/r/20230430154311.579720-3-tytso@mit.edu Reported-by: syzbot+e2efa3efc15a1c9e95c3@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=69b28112e098b070f639efb356393af3ffec4220 Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 843840c2aced..a7c42e4bfc5e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4250,7 +4250,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, trace_ext4_mb_release_group_pa(sb, pa); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); - BUG_ON(group != e4b->bd_group && pa->pa_len != 0); + if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { + ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", + e4b->bd_group, group, pa->pa_pstart); + return 0; + } mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); From 1a8822343e67432b658145d2760a524c884da9d4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 30 Apr 2023 03:04:13 -0400 Subject: [PATCH 77/92] ext4: fix invalid free tracking in ext4_xattr_move_to_block() commit b87c7cdf2bed4928b899e1ce91ef0d147017ba45 upstream. In ext4_xattr_move_to_block(), the value of the extended attribute which we need to move to an external block may be allocated by kvmalloc() if the value is stored in an external inode. So at the end of the function the code tried to check if this was the case by testing entry->e_value_inum. However, at this point, the pointer to the xattr entry is no longer valid, because it was removed from the original location where it had been stored. So we could end up calling kvfree() on a pointer which was not allocated by kvmalloc(); or we could also potentially leak memory by not freeing the buffer when it should be freed. Fix this by storing whether it should be freed in a separate variable. Cc: stable@kernel.org Link: https://lore.kernel.org/r/20230430160426.581366-1-tytso@mit.edu Link: https://syzkaller.appspot.com/bug?id=5c2aee8256e30b55ccf57312c16d88417adbd5e1 Link: https://syzkaller.appspot.com/bug?id=41a6b5d4917c0412eb3b3c3c604965bed7d7420b Reported-by: syzbot+64b645917ce07d89bde5@syzkaller.appspotmail.com Reported-by: syzbot+0d042627c4f2ad332195@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 28fa9a64dc4b..abcba0255109 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2554,6 +2554,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, .in_inode = !!entry->e_value_inum, }; struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode); + int needs_kvfree = 0; int error; is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); @@ -2576,7 +2577,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, error = -ENOMEM; goto out; } - + needs_kvfree = 1; error = ext4_xattr_inode_get(inode, entry, buffer, value_size); if (error) goto out; @@ -2615,7 +2616,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, out: kfree(b_entry_name); - if (entry->e_value_inum && buffer) + if (needs_kvfree && buffer) kvfree(buffer); if (is) brelse(is->iloc.bh); From ef77d602e306489fc067a283306d752707ac75e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 17 Mar 2023 13:33:18 +0200 Subject: [PATCH 78/92] serial: 8250: Fix serial8250_tx_empty() race with DMA Tx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 146a37e05d620cef4ad430e5d1c9c077fe6fa76f upstream. There's a potential race before THRE/TEMT deasserts when DMA Tx is starting up (or the next batch of continuous Tx is being submitted). This can lead to misdetecting Tx empty condition. It is entirely normal for THRE/TEMT to be set for some time after the DMA Tx had been setup in serial8250_tx_dma(). As Tx side is definitely not empty at that point, it seems incorrect for serial8250_tx_empty() claim Tx is empty. Fix the race by also checking in serial8250_tx_empty() whether there's DMA Tx active. Note: This fix only addresses in-kernel race mainly to make using TCSADRAIN/FLUSH robust. Userspace can still cause other races but they seem userspace concurrency control problems. Fixes: 9ee4b83e51f74 ("serial: 8250: Add support for dmaengine") Cc: stable@vger.kernel.org Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230317113318.31327-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 12 ++++++++++++ drivers/tty/serial/8250/8250_port.c | 12 +++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index b6dc9003b8c4..0771cd226581 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -330,6 +330,13 @@ extern int serial8250_rx_dma(struct uart_8250_port *); extern void serial8250_rx_dma_flush(struct uart_8250_port *); extern int serial8250_request_dma(struct uart_8250_port *); extern void serial8250_release_dma(struct uart_8250_port *); + +static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) +{ + struct uart_8250_dma *dma = p->dma; + + return dma && dma->tx_running; +} #else static inline int serial8250_tx_dma(struct uart_8250_port *p) { @@ -345,6 +352,11 @@ static inline int serial8250_request_dma(struct uart_8250_port *p) return -1; } static inline void serial8250_release_dma(struct uart_8250_port *p) { } + +static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) +{ + return false; +} #endif static inline int ns16550a_goto_highspeed(struct uart_8250_port *up) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 1f9e4b87387b..b19908779e3b 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1971,19 +1971,25 @@ static int serial8250_tx_threshold_handle_irq(struct uart_port *port) static unsigned int serial8250_tx_empty(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); + unsigned int result = 0; unsigned long flags; unsigned int lsr; serial8250_rpm_get(up); spin_lock_irqsave(&port->lock, flags); - lsr = serial_port_in(port, UART_LSR); - up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + if (!serial8250_tx_dma_running(up)) { + lsr = serial_port_in(port, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + + if ((lsr & BOTH_EMPTY) == BOTH_EMPTY) + result = TIOCSER_TEMT; + } spin_unlock_irqrestore(&port->lock, flags); serial8250_rpm_put(up); - return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0; + return result; } unsigned int serial8250_do_get_mctrl(struct uart_port *port) From 01c0002ec7bdac9e49458aea6737a9edc0d220da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= Date: Wed, 3 May 2023 14:19:37 +0200 Subject: [PATCH 79/92] drbd: correctly submit flush bio on barrier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3899d94e3831ee07ea6821c032dc297aec80586a upstream. When we receive a flush command (or "barrier" in DRBD), we currently use a REQ_OP_FLUSH with the REQ_PREFLUSH flag set. The correct way to submit a flush bio is by using a REQ_OP_WRITE without any data, and set the REQ_PREFLUSH flag. Since commit b4a6bb3a67aa ("block: add a sanity check for non-write flush/fua bios"), this triggers a warning in the block layer, but this has been broken for quite some time before that. So use the correct set of flags to actually make the flush happen. Cc: Christoph Hellwig Cc: stable@vger.kernel.org Fixes: f9ff0da56437 ("drbd: allow parallel flushes for multi-volume resources") Reported-by: Thomas Voegtle Signed-off-by: Christoph Böhmwalder Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230503121937.17232-1-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/drbd/drbd_receiver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index dc333dbe5232..405e09575f08 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1299,7 +1299,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont bio_set_dev(bio, device->ldev->backing_bdev); bio->bi_private = octx; bio->bi_end_io = one_flush_endio; - bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; device->flush_jif = jiffies; set_bit(FLUSH_PENDING, &device->flags); From 4b19cbdb1dd39874fc530250c588041609bdc11d Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Wed, 10 May 2023 18:15:39 +0000 Subject: [PATCH 80/92] KVM: x86: Ensure PV TLB flush tracepoint reflects KVM behavior From: Lai Jiangshan commit af3511ff7fa2107d6410831f3d71030f5e8d2b25 upstream. In record_steal_time(), st->preempted is read twice, and trace_kvm_pv_tlb_flush() might output result inconsistent if kvm_vcpu_flush_tlb_guest() see a different st->preempted later. It is a very trivial problem and hardly has actual harm and can be avoided by reseting and reading st->preempted in atomic way via xchg(). Signed-off-by: Lai Jiangshan Message-Id: <20210531174628.10265-1-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini Signed-off-by: Rishabh Bhatnagar Tested-by: Allen Pais Acked-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c1351335d22f..d80c89d1971f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3041,9 +3041,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu) * expensive IPIs. */ if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) { + u8 st_preempted = xchg(&st->preempted, 0); + trace_kvm_pv_tlb_flush(vcpu->vcpu_id, - st->preempted & KVM_VCPU_FLUSH_TLB); - if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) + st_preempted & KVM_VCPU_FLUSH_TLB); + if (st_preempted & KVM_VCPU_FLUSH_TLB) kvm_vcpu_flush_tlb_guest(vcpu); } else { st->preempted = 0; From ebd3010d42bad1deefab2990f909d96a774f2921 Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Wed, 10 May 2023 18:15:40 +0000 Subject: [PATCH 81/92] KVM: x86: Fix recording of guest steal time / preempted status From: David Woodhouse commit 7e2175ebd695f17860c5bd4ad7616cce12ed4591 upstream. In commit b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed") we switched to using a gfn_to_pfn_cache for accessing the guest steal time structure in order to allow for an atomic xchg of the preempted field. This has a couple of problems. Firstly, kvm_map_gfn() doesn't work at all for IOMEM pages when the atomic flag is set, which it is in kvm_steal_time_set_preempted(). So a guest vCPU using an IOMEM page for its steal time would never have its preempted field set. Secondly, the gfn_to_pfn_cache is not invalidated in all cases where it should have been. There are two stages to the GFN->PFN conversion; first the GFN is converted to a userspace HVA, and then that HVA is looked up in the process page tables to find the underlying host PFN. Correct invalidation of the latter would require being hooked up to the MMU notifiers, but that doesn't happen---so it just keeps mapping and unmapping the *wrong* PFN after the userspace page tables change. In the !IOMEM case at least the stale page *is* pinned all the time it's cached, so it won't be freed and reused by anyone else while still receiving the steal time updates. The map/unmap dance only takes care of the KVM administrivia such as marking the page dirty. Until the gfn_to_pfn cache handles the remapping automatically by integrating with the MMU notifiers, we might as well not get a kernel mapping of it, and use the perfectly serviceable userspace HVA that we already have. We just need to implement the atomic xchg on the userspace address with appropriate exception handling, which is fairly trivial. Cc: stable@vger.kernel.org Fixes: b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed") Signed-off-by: David Woodhouse Message-Id: <3645b9b889dac6438394194bb5586a46b68d581f.camel@infradead.org> [I didn't entirely agree with David's assessment of the usefulness of the gfn_to_pfn cache, and integrated the outcome of the discussion in the above commit message. - Paolo] Signed-off-by: Paolo Bonzini [risbhat@amazon.com: Use the older mark_page_dirty_in_slot api without kvm argument] Signed-off-by: Rishabh Bhatnagar Tested-by: Allen Pais Acked-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/x86.c | 105 +++++++++++++++++++++++--------- 2 files changed, 76 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index af4b4d3c6ff6..3e9f1c820edb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -664,7 +664,7 @@ struct kvm_vcpu_arch { u8 preempted; u64 msr_val; u64 last_steal; - struct gfn_to_pfn_cache cache; + struct gfn_to_hva_cache cache; } st; u64 l1_tsc_offset; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d80c89d1971f..119447e31956 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3022,53 +3022,92 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) static void record_steal_time(struct kvm_vcpu *vcpu) { - struct kvm_host_map map; - struct kvm_steal_time *st; + struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; + struct kvm_steal_time __user *st; + struct kvm_memslots *slots; + u64 steal; + u32 version; if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - /* -EAGAIN is returned in atomic context so we can just return. */ - if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, - &map, &vcpu->arch.st.cache, false)) + if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm)) return; - st = map.hva + - offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + slots = kvm_memslots(vcpu->kvm); + + if (unlikely(slots->generation != ghc->generation || + kvm_is_error_hva(ghc->hva) || !ghc->memslot)) { + gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; + + /* We rely on the fact that it fits in a single page. */ + BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS); + + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) || + kvm_is_error_hva(ghc->hva) || !ghc->memslot) + return; + } + + st = (struct kvm_steal_time __user *)ghc->hva; + if (!user_access_begin(st, sizeof(*st))) + return; /* * Doing a TLB flush here, on the guest's behalf, can avoid * expensive IPIs. */ if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) { - u8 st_preempted = xchg(&st->preempted, 0); + u8 st_preempted = 0; + int err = -EFAULT; + + asm volatile("1: xchgb %0, %2\n" + "xor %1, %1\n" + "2:\n" + _ASM_EXTABLE_UA(1b, 2b) + : "+r" (st_preempted), + "+&r" (err) + : "m" (st->preempted)); + if (err) + goto out; + + user_access_end(); + + vcpu->arch.st.preempted = 0; trace_kvm_pv_tlb_flush(vcpu->vcpu_id, st_preempted & KVM_VCPU_FLUSH_TLB); if (st_preempted & KVM_VCPU_FLUSH_TLB) kvm_vcpu_flush_tlb_guest(vcpu); + + if (!user_access_begin(st, sizeof(*st))) + goto dirty; } else { - st->preempted = 0; + unsafe_put_user(0, &st->preempted, out); + vcpu->arch.st.preempted = 0; } - vcpu->arch.st.preempted = 0; + unsafe_get_user(version, &st->version, out); + if (version & 1) + version += 1; /* first time write, random junk */ - if (st->version & 1) - st->version += 1; /* first time write, random junk */ - - st->version += 1; + version += 1; + unsafe_put_user(version, &st->version, out); smp_wmb(); - st->steal += current->sched_info.run_delay - + unsafe_get_user(steal, &st->steal, out); + steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; + unsafe_put_user(steal, &st->steal, out); - smp_wmb(); + version += 1; + unsafe_put_user(version, &st->version, out); - st->version += 1; - - kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); + out: + user_access_end(); + dirty: + mark_page_dirty_in_slot(ghc->memslot, gpa_to_gfn(ghc->gpa)); } int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -4053,8 +4092,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) { - struct kvm_host_map map; - struct kvm_steal_time *st; + struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; + struct kvm_steal_time __user *st; + struct kvm_memslots *slots; + static const u8 preempted = KVM_VCPU_PREEMPTED; /* * The vCPU can be marked preempted if and only if the VM-Exit was on @@ -4075,16 +4116,23 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) if (vcpu->arch.st.preempted) return; - if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, - &vcpu->arch.st.cache, true)) + /* This happens on process exit */ + if (unlikely(current->mm != vcpu->kvm->mm)) return; - st = map.hva + - offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + slots = kvm_memslots(vcpu->kvm); - st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; + if (unlikely(slots->generation != ghc->generation || + kvm_is_error_hva(ghc->hva) || !ghc->memslot)) + return; - kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); + st = (struct kvm_steal_time __user *)ghc->hva; + BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted)); + + if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted))) + vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; + + mark_page_dirty_in_slot(ghc->memslot, gpa_to_gfn(ghc->gpa)); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -10266,11 +10314,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; int idx; - kvm_release_pfn(cache->pfn, cache->dirty, cache); - kvmclock_reset(vcpu); kvm_x86_ops.vcpu_free(vcpu); From 8e39c2f407aff06a5968fc5e923e0b080f78f869 Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Wed, 10 May 2023 18:15:41 +0000 Subject: [PATCH 82/92] KVM: Fix steal time asm constraints From: David Woodhouse commit 964b7aa0b040bdc6ec1c543ee620cda3f8b4c68a upstream. In 64-bit mode, x86 instruction encoding allows us to use the low 8 bits of any GPR as an 8-bit operand. In 32-bit mode, however, we can only use the [abcd] registers. For which, GCC has the "q" constraint instead of the less restrictive "r". Also fix st->preempted, which is an input/output operand rather than an input. Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status") Reported-by: kernel test robot Signed-off-by: David Woodhouse Message-Id: <89bf72db1b859990355f9c40713a34e0d2d86c98.camel@infradead.org> Signed-off-by: Paolo Bonzini Signed-off-by: Rishabh Bhatnagar Tested-by: Allen Pais Acked-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 119447e31956..40162a8757b2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3064,9 +3064,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) "xor %1, %1\n" "2:\n" _ASM_EXTABLE_UA(1b, 2b) - : "+r" (st_preempted), - "+&r" (err) - : "m" (st->preempted)); + : "+q" (st_preempted), + "+&r" (err), + "+m" (st->preempted)); if (err) goto out; From 029662004359364428d6cca688acb0441189af1b Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Wed, 10 May 2023 18:15:42 +0000 Subject: [PATCH 83/92] KVM: x86: Remove obsolete disabling of page faults in kvm_arch_vcpu_put() From: Sean Christopherson commit 19979fba9bfaeab427a8e106d915f0627c952828 upstream. Remove the disabling of page faults across kvm_steal_time_set_preempted() as KVM now accesses the steal time struct (shared with the guest) via a cached mapping (see commit b043138246a4, "x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed".) The cache lookup is flagged as atomic, thus it would be a bug if KVM tried to resolve a new pfn, i.e. we want the splat that would be reached via might_fault(). Signed-off-by: Sean Christopherson Message-Id: <20210123000334.3123628-2-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Rishabh Bhatnagar Tested-by: Allen Pais Acked-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 40162a8757b2..3881bf7d1ac4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4142,15 +4142,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) if (vcpu->preempted) vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu); - /* - * Disable page faults because we're in atomic context here. - * kvm_write_guest_offset_cached() would call might_fault() - * that relies on pagefault_disable() to tell if there's a - * bug. NOTE: the write to guest memory may not go through if - * during postcopy live migration or if there's heavy guest - * paging. - */ - pagefault_disable(); /* * kvm_memslots() will be called by * kvm_write_guest_offset_cached() so take the srcu lock. @@ -4158,7 +4149,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) idx = srcu_read_lock(&vcpu->kvm->srcu); kvm_steal_time_set_preempted(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); - pagefault_enable(); kvm_x86_ops.vcpu_put(vcpu); vcpu->arch.last_host_tsc = rdtsc(); /* From e10a73f5380958629b4ae8d2ecad29f3e82a1b7f Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Wed, 10 May 2023 18:15:43 +0000 Subject: [PATCH 84/92] KVM: x86: do not set st->preempted when going back to user space From: Paolo Bonzini commit 54aa83c90198e68eee8b0850c749bc70efb548da upstream. Similar to the Xen path, only change the vCPU's reported state if the vCPU was actually preempted. The reason for KVM's behavior is that for example optimistic spinning might not be a good idea if the guest is doing repeated exits to userspace; however, it is confusing and unlikely to make a difference, because well-tuned guests will hardly ever exit KVM_RUN in the first place. Suggested-by: Sean Christopherson Signed-off-by: Paolo Bonzini [risbhat@amazon.com: Don't check for xen msr as support is not available and skip the SEV-ES condition] Signed-off-by: Rishabh Bhatnagar Tested-by: Allen Pais Acked-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3881bf7d1ac4..116a225fb26e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4139,16 +4139,18 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; - if (vcpu->preempted) + if (vcpu->preempted) { vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu); - /* - * kvm_memslots() will be called by - * kvm_write_guest_offset_cached() so take the srcu lock. - */ - idx = srcu_read_lock(&vcpu->kvm->srcu); - kvm_steal_time_set_preempted(vcpu); - srcu_read_unlock(&vcpu->kvm->srcu, idx); + /* + * Take the srcu lock as memslots will be accessed to check the gfn + * cache generation against the memslots generation. + */ + idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_steal_time_set_preempted(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + } + kvm_x86_ops.vcpu_put(vcpu); vcpu->arch.last_host_tsc = rdtsc(); /* From 4bffae22bec7e035e9d5a0c0db7286b6bd258f56 Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Wed, 10 May 2023 18:15:45 +0000 Subject: [PATCH 85/92] KVM: x86: revalidate steal time cache if MSR value changes From: Paolo Bonzini commit 901d3765fa804ce42812f1d5b1f3de2dfbb26723 upstream. Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status", 2021-11-11) open coded the previous call to kvm_map_gfn, but in doing so it dropped the comparison between the cached guest physical address and the one in the MSR. This cause an incorrect cache hit if the guest modifies the steal time address while the memslots remain the same. This can happen with kexec, in which case the steal time data is written at the address used by the old kernel instead of the old one. While at it, rename the variable from gfn to gpa since it is a plain physical address and not a right-shifted one. Reported-by: Dave Young Reported-by: Xiaoying Yan Analyzed-by: Dr. David Alan Gilbert Cc: David Woodhouse Cc: stable@vger.kernel.org Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status") Signed-off-by: Paolo Bonzini Signed-off-by: Rishabh Bhatnagar Tested-by: Allen Pais Acked-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 116a225fb26e..0503bb8b64e5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3025,6 +3025,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; struct kvm_steal_time __user *st; struct kvm_memslots *slots; + gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; u64 steal; u32 version; @@ -3037,13 +3038,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu) slots = kvm_memslots(vcpu->kvm); if (unlikely(slots->generation != ghc->generation || + gpa != ghc->gpa || kvm_is_error_hva(ghc->hva) || !ghc->memslot)) { - gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; - /* We rely on the fact that it fits in a single page. */ BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS); - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) || + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) || kvm_is_error_hva(ghc->hva) || !ghc->memslot) return; } From 85cfbaa575eaa843a15ab2d54b953196c2189b2f Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Wed, 10 May 2023 18:15:46 +0000 Subject: [PATCH 86/92] KVM: x86: do not report preemption if the steal time cache is stale From: Paolo Bonzini commit c3c28d24d910a746b02f496d190e0e8c6560224b upstream. Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status", 2021-11-11) open coded the previous call to kvm_map_gfn, but in doing so it dropped the comparison between the cached guest physical address and the one in the MSR. This cause an incorrect cache hit if the guest modifies the steal time address while the memslots remain the same. This can happen with kexec, in which case the preempted bit is written at the address used by the old kernel instead of the old one. Cc: David Woodhouse Cc: stable@vger.kernel.org Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status") Signed-off-by: Paolo Bonzini Signed-off-by: Rishabh Bhatnagar Tested-by: Allen Pais Acked-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0503bb8b64e5..8461aa63c251 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4096,6 +4096,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) struct kvm_steal_time __user *st; struct kvm_memslots *slots; static const u8 preempted = KVM_VCPU_PREEMPTED; + gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; /* * The vCPU can be marked preempted if and only if the VM-Exit was on @@ -4123,6 +4124,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) slots = kvm_memslots(vcpu->kvm); if (unlikely(slots->generation != ghc->generation || + gpa != ghc->gpa || kvm_is_error_hva(ghc->hva) || !ghc->memslot)) return; From fcfe05990a5ce95361dbcae308632861d3c40dfd Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Wed, 10 May 2023 18:15:47 +0000 Subject: [PATCH 87/92] KVM: x86: move guest_pv_has out of user_access section From: Paolo Bonzini commit 3e067fd8503d6205aa0c1c8f48f6b209c592d19c upstream. When UBSAN is enabled, the code emitted for the call to guest_pv_has includes a call to __ubsan_handle_load_invalid_value. objtool complains that this call happens with UACCESS enabled; to avoid the warning, pull the calls to user_access_begin into both arms of the "if" statement, after the check for guest_pv_has. Reported-by: Stephen Rothwell Cc: David Woodhouse Signed-off-by: Paolo Bonzini Signed-off-by: Rishabh Bhatnagar Tested-by: Allen Pais Acked-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8461aa63c251..5fbae8cc0697 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3049,9 +3049,6 @@ static void record_steal_time(struct kvm_vcpu *vcpu) } st = (struct kvm_steal_time __user *)ghc->hva; - if (!user_access_begin(st, sizeof(*st))) - return; - /* * Doing a TLB flush here, on the guest's behalf, can avoid * expensive IPIs. @@ -3060,6 +3057,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) u8 st_preempted = 0; int err = -EFAULT; + if (!user_access_begin(st, sizeof(*st))) + return; + asm volatile("1: xchgb %0, %2\n" "xor %1, %1\n" "2:\n" @@ -3082,6 +3082,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) if (!user_access_begin(st, sizeof(*st))) goto dirty; } else { + if (!user_access_begin(st, sizeof(*st))) + return; + unsafe_put_user(0, &st->preempted, out); vcpu->arch.st.preempted = 0; } From 32232bcd4e5300e678718d5c29da4dfa07ade01e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 14 May 2023 13:41:27 +0900 Subject: [PATCH 88/92] printk: declare printk_deferred_{enter,safe}() in include/linux/printk.h commit 85e3e7fbbb720b9897fba9a99659e31cbd1c082e upstream. [This patch implements subset of original commit 85e3e7fbbb72 ("printk: remove NMI tracking") where commit 1007843a9190 ("mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock") depends on, for commit 3d36424b3b58 ("mm/page_alloc: fix race condition between build_all_zonelists and page allocation") was backported to stable.] All NMI contexts are handled the same as the safe context: store the message and defer printing. There is no need to have special NMI context tracking for this. Using in_nmi() is enough. There are several parts of the kernel that are manually calling into the printk NMI context tracking in order to cause general printk deferred printing: arch/arm/kernel/smp.c arch/powerpc/kexec/crash.c kernel/trace/trace.c For arm/kernel/smp.c and powerpc/kexec/crash.c, provide a new function pair printk_deferred_enter/exit that explicitly achieves the same objective. For ftrace, remove the printk context manipulation completely. It was added in commit 03fc7f9c99c1 ("printk/nmi: Prevent deadlock when accessing the main log buffer in NMI"). The purpose was to enforce storing messages directly into the ring buffer even in NMI context. It really should have only modified the behavior in NMI context. There is no need for a special behavior any longer. All messages are always stored directly now. The console deferring is handled transparently in vprintk(). Signed-off-by: John Ogness [pmladek@suse.com: Remove special handling in ftrace.c completely. Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210715193359.25946-5-john.ogness@linutronix.de [penguin-kernel: Copy only printk_deferred_{enter,safe}() definition ] Signed-off-by: Tetsuo Handa Signed-off-by: Greg Kroah-Hartman --- include/linux/printk.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/linux/printk.h b/include/linux/printk.h index fe7eb2351610..344f6da3d4c3 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -623,4 +623,23 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, #define print_hex_dump_bytes(prefix_str, prefix_type, buf, len) \ print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true) +#ifdef CONFIG_PRINTK +extern void __printk_safe_enter(void); +extern void __printk_safe_exit(void); +/* + * The printk_deferred_enter/exit macros are available only as a hack for + * some code paths that need to defer all printk console printing. Interrupts + * must be disabled for the deferred duration. + */ +#define printk_deferred_enter __printk_safe_enter +#define printk_deferred_exit __printk_safe_exit +#else +static inline void printk_deferred_enter(void) +{ +} +static inline void printk_deferred_exit(void) +{ +} +#endif + #endif From f2656f437fb1981646d806a314df6eb937feba4f Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 15 Mar 2021 19:49:37 +0800 Subject: [PATCH 89/92] drm/exynos: move to use request_irq by IRQF_NO_AUTOEN flag commit a4e5eed2c6a689ef2b6ad8d7ae86665c69039379 upstream. After this patch cbe16f35bee68 genirq: Add IRQF_NO_AUTOEN for request_irq/nmi() is merged. request_irq() after setting IRQ_NOAUTOEN as below irq_set_status_flags(irq, IRQ_NOAUTOEN); request_irq(dev, irq...); can be replaced by request_irq() with IRQF_NO_AUTOEN flag. v2: Fix the problem of using wrong flags Signed-off-by: Tian Tao Signed-off-by: Inki Dae Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++-- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 1f79bc2a881e..c277d2fc50c6 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -775,8 +775,8 @@ static int decon_conf_irq(struct decon_context *ctx, const char *name, return irq; } } - irq_set_status_flags(irq, IRQ_NOAUTOEN); - ret = devm_request_irq(ctx->dev, irq, handler, flags, "drm_decon", ctx); + ret = devm_request_irq(ctx->dev, irq, handler, + flags | IRQF_NO_AUTOEN, "drm_decon", ctx); if (ret < 0) { dev_err(ctx->dev, "IRQ %s request failed\n", name); return ret; diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 5b9666fc7af1..afb03de2880f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1353,10 +1353,9 @@ static int exynos_dsi_register_te_irq(struct exynos_dsi *dsi, } te_gpio_irq = gpio_to_irq(dsi->te_gpio); - irq_set_status_flags(te_gpio_irq, IRQ_NOAUTOEN); ret = request_threaded_irq(te_gpio_irq, exynos_dsi_te_irq_handler, NULL, - IRQF_TRIGGER_RISING, "TE", dsi); + IRQF_TRIGGER_RISING | IRQF_NO_AUTOEN, "TE", dsi); if (ret) { dev_err(dsi->dev, "request interrupt failed with %d\n", ret); gpio_free(dsi->te_gpio); @@ -1802,9 +1801,9 @@ static int exynos_dsi_probe(struct platform_device *pdev) if (dsi->irq < 0) return dsi->irq; - irq_set_status_flags(dsi->irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(dev, dsi->irq, NULL, - exynos_dsi_irq, IRQF_ONESHOT, + exynos_dsi_irq, + IRQF_ONESHOT | IRQF_NO_AUTOEN, dev_name(dev), dsi); if (ret) { dev_err(dev, "failed to request dsi irq\n"); From a992c387b41186ab968fd176ca26b432b05c53ec Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 4 Apr 2023 23:31:58 +0900 Subject: [PATCH 90/92] mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1007843a91909a4995ee78a538f62d8665705b66 upstream. syzbot is reporting circular locking dependency which involves zonelist_update_seq seqlock [1], for this lock is checked by memory allocation requests which do not need to be retried. One deadlock scenario is kmalloc(GFP_ATOMIC) from an interrupt handler. CPU0 ---- __build_all_zonelists() { write_seqlock(&zonelist_update_seq); // makes zonelist_update_seq.seqcount odd // e.g. timer interrupt handler runs at this moment some_timer_func() { kmalloc(GFP_ATOMIC) { __alloc_pages_slowpath() { read_seqbegin(&zonelist_update_seq) { // spins forever because zonelist_update_seq.seqcount is odd } } } } // e.g. timer interrupt handler finishes write_sequnlock(&zonelist_update_seq); // makes zonelist_update_seq.seqcount even } This deadlock scenario can be easily eliminated by not calling read_seqbegin(&zonelist_update_seq) from !__GFP_DIRECT_RECLAIM allocation requests, for retry is applicable to only __GFP_DIRECT_RECLAIM allocation requests. But Michal Hocko does not know whether we should go with this approach. Another deadlock scenario which syzbot is reporting is a race between kmalloc(GFP_ATOMIC) from tty_insert_flip_string_and_push_buffer() with port->lock held and printk() from __build_all_zonelists() with zonelist_update_seq held. CPU0 CPU1 ---- ---- pty_write() { tty_insert_flip_string_and_push_buffer() { __build_all_zonelists() { write_seqlock(&zonelist_update_seq); build_zonelists() { printk() { vprintk() { vprintk_default() { vprintk_emit() { console_unlock() { console_flush_all() { console_emit_next_record() { con->write() = serial8250_console_write() { spin_lock_irqsave(&port->lock, flags); tty_insert_flip_string() { tty_insert_flip_string_fixed_flag() { __tty_buffer_request_room() { tty_buffer_alloc() { kmalloc(GFP_ATOMIC | __GFP_NOWARN) { __alloc_pages_slowpath() { zonelist_iter_begin() { read_seqbegin(&zonelist_update_seq); // spins forever because zonelist_update_seq.seqcount is odd spin_lock_irqsave(&port->lock, flags); // spins forever because port->lock is held } } } } } } } } spin_unlock_irqrestore(&port->lock, flags); // message is printed to console spin_unlock_irqrestore(&port->lock, flags); } } } } } } } } } write_sequnlock(&zonelist_update_seq); } } } This deadlock scenario can be eliminated by preventing interrupt context from calling kmalloc(GFP_ATOMIC) and preventing printk() from calling console_flush_all() while zonelist_update_seq.seqcount is odd. Since Petr Mladek thinks that __build_all_zonelists() can become a candidate for deferring printk() [2], let's address this problem by disabling local interrupts in order to avoid kmalloc(GFP_ATOMIC) and disabling synchronous printk() in order to avoid console_flush_all() . As a side effect of minimizing duration of zonelist_update_seq.seqcount being odd by disabling synchronous printk(), latency at read_seqbegin(&zonelist_update_seq) for both !__GFP_DIRECT_RECLAIM and __GFP_DIRECT_RECLAIM allocation requests will be reduced. Although, from lockdep perspective, not calling read_seqbegin(&zonelist_update_seq) (i.e. do not record unnecessary locking dependency) from interrupt context is still preferable, even if we don't allow calling kmalloc(GFP_ATOMIC) inside write_seqlock(&zonelist_update_seq)/write_sequnlock(&zonelist_update_seq) section... Link: https://lkml.kernel.org/r/8796b95c-3da3-5885-fddd-6ef55f30e4d3@I-love.SAKURA.ne.jp Fixes: 3d36424b3b58 ("mm/page_alloc: fix race condition between build_all_zonelists and page allocation") Link: https://lkml.kernel.org/r/ZCrs+1cDqPWTDFNM@alley [2] Reported-by: syzbot Link: https://syzkaller.appspot.com/bug?extid=223c7461c58c58a4cb10 [1] Signed-off-by: Tetsuo Handa Acked-by: Michal Hocko Acked-by: Mel Gorman Cc: Petr Mladek Cc: David Hildenbrand Cc: Ilpo Järvinen Cc: John Ogness Cc: Patrick Daly Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1fd41b91a1a8..d85435db35f3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5973,7 +5973,21 @@ static void __build_all_zonelists(void *data) int nid; int __maybe_unused cpu; pg_data_t *self = data; + unsigned long flags; + /* + * Explicitly disable this CPU's interrupts before taking seqlock + * to prevent any IRQ handler from calling into the page allocator + * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock. + */ + local_irq_save(flags); + /* + * Explicitly disable this CPU's synchronous printk() before taking + * seqlock to prevent any printk() from trying to hold port->lock, for + * tty_insert_flip_string_and_push_buffer() on other CPU might be + * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held. + */ + printk_deferred_enter(); write_seqlock(&zonelist_update_seq); #ifdef CONFIG_NUMA @@ -6008,6 +6022,8 @@ static void __build_all_zonelists(void *data) } write_sequnlock(&zonelist_update_seq); + printk_deferred_exit(); + local_irq_restore(flags); } static noinline void __init From 3ebe5d6d69ceba8e20b0b74e43192a6c8b49d4f0 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 24 Mar 2023 10:42:37 -0400 Subject: [PATCH 91/92] drm/amd/display: Fix hang when skipping modeset commit da5e14909776edea4462672fb4a3007802d262e7 upstream. [Why&How] When skipping full modeset since the only state change was a front porch change, the DC commit sequence requires extra checks to handle non existant plane states being asked to be removed from context. Reviewed-by: Alvin Lee Acked-by: Qingqing Zhuo Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index dbdf0e210522..3ca1ee396e4c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7248,6 +7248,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, continue; dc_plane = dm_new_plane_state->dc_state; + if (!dc_plane) + continue; bundle->surface_updates[planes_count].surface = dc_plane; if (new_pcrtc_state->color_mgmt_changed) { @@ -8562,8 +8564,9 @@ static int dm_update_plane_state(struct dc *dc, return -EINVAL; } + if (dm_old_plane_state->dc_state) + dc_plane_state_release(dm_old_plane_state->dc_state); - dc_plane_state_release(dm_old_plane_state->dc_state); dm_new_plane_state->dc_state = NULL; *lock_and_validation_needed = true; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 1e47afc4ccc1..f1eda1a6496d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1502,6 +1502,9 @@ bool dc_remove_plane_from_context( struct dc_stream_status *stream_status = NULL; struct resource_pool *pool = dc->res_pool; + if (!plane_state) + return true; + for (i = 0; i < context->stream_count; i++) if (context->streams[i] == stream) { stream_status = &context->stream_status[i]; From 4c893ff55907c61456bcb917781c0dd687a1e123 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 May 2023 11:48:20 +0200 Subject: [PATCH 92/92] Linux 5.10.180 Link: https://lore.kernel.org/r/20230515161736.775969473@linuxfoundation.org Tested-by: Chris Paterson (CIP) Tested-by: Shuah Khan Tested-by: Sudip Mukherjee Tested-by: Guenter Roeck Tested-by: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3ddcade4be8f..c2f8e1644abd 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 179 +SUBLEVEL = 180 EXTRAVERSION = NAME = Dare mighty things