From 54d0d83a53508d687fd4a225f8aa1f18559562d0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Nov 2023 15:57:13 +0100 Subject: [PATCH 01/68] hrtimers: Push pending hrtimers away from outgoing CPU earlier [ Upstream commit 5c0930ccaad5a74d74e8b18b648c5eb21ed2fe94 ] 2b8272ff4a70 ("cpu/hotplug: Prevent self deadlock on CPU hot-unplug") solved the straight forward CPU hotplug deadlock vs. the scheduler bandwidth timer. Yu discovered a more involved variant where a task which has a bandwidth timer started on the outgoing CPU holds a lock and then gets throttled. If the lock required by one of the CPU hotplug callbacks the hotplug operation deadlocks because the unthrottling timer event is not handled on the dying CPU and can only be recovered once the control CPU reaches the hotplug state which pulls the pending hrtimers from the dead CPU. Solve this by pushing the hrtimers away from the dying CPU in the dying callbacks. Nothing can queue a hrtimer on the dying CPU at that point because all other CPUs spin in stop_machine() with interrupts disabled and once the operation is finished the CPU is marked offline. Reported-by: Yu Liao Signed-off-by: Thomas Gleixner Tested-by: Liu Tie Link: https://lore.kernel.org/r/87a5rphara.ffs@tglx Signed-off-by: Sasha Levin --- include/linux/cpuhotplug.h | 1 + include/linux/hrtimer.h | 4 ++-- kernel/cpu.c | 8 +++++++- kernel/time/hrtimer.c | 33 ++++++++++++--------------------- 4 files changed, 22 insertions(+), 24 deletions(-) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8134cc3b99cd..206d7ac411b8 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -141,6 +141,7 @@ enum cpuhp_state { CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, + CPUHP_AP_HRTIMERS_DYING, CPUHP_AP_X86_TBOOT_DYING, CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 48be92aded5e..16c68a7287bc 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -526,9 +526,9 @@ extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU -int hrtimers_dead_cpu(unsigned int cpu); +int hrtimers_cpu_dying(unsigned int cpu); #else -#define hrtimers_dead_cpu NULL +#define hrtimers_cpu_dying NULL #endif #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index c08456af0c7f..ba579bb6b897 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1473,7 +1473,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { [CPUHP_HRTIMERS_PREPARE] = { .name = "hrtimers:prepare", .startup.single = hrtimers_prepare_cpu, - .teardown.single = hrtimers_dead_cpu, + .teardown.single = NULL, }, [CPUHP_SMPCFD_PREPARE] = { .name = "smpcfd:prepare", @@ -1540,6 +1540,12 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = NULL, .teardown.single = smpcfd_dying_cpu, }, + [CPUHP_AP_HRTIMERS_DYING] = { + .name = "hrtimers:dying", + .startup.single = NULL, + .teardown.single = hrtimers_cpu_dying, + }, + /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8e3c9228aec9..e2a055e46255 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2105,29 +2105,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, } } -int hrtimers_dead_cpu(unsigned int scpu) +int hrtimers_cpu_dying(unsigned int dying_cpu) { struct hrtimer_cpu_base *old_base, *new_base; - int i; + int i, ncpu = cpumask_first(cpu_active_mask); - BUG_ON(cpu_online(scpu)); - tick_cancel_sched_timer(scpu); + tick_cancel_sched_timer(dying_cpu); + + old_base = this_cpu_ptr(&hrtimer_bases); + new_base = &per_cpu(hrtimer_bases, ncpu); - /* - * this BH disable ensures that raise_softirq_irqoff() does - * not wakeup ksoftirqd (and acquire the pi-lock) while - * holding the cpu_base lock - */ - local_bh_disable(); - local_irq_disable(); - old_base = &per_cpu(hrtimer_bases, scpu); - new_base = this_cpu_ptr(&hrtimer_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - raw_spin_lock(&new_base->lock); - raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock(&old_base->lock); + raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], @@ -2138,15 +2131,13 @@ int hrtimers_dead_cpu(unsigned int scpu) * The migration might have changed the first expiring softirq * timer on this CPU. Update it. */ - hrtimer_update_softirq_timer(new_base, false); + __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); + /* Tell the other CPU to retrigger the next event */ + smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); - raw_spin_unlock(&old_base->lock); raw_spin_unlock(&new_base->lock); + raw_spin_unlock(&old_base->lock); - /* Check, if we got expired work to do */ - __hrtimer_peek_ahead_timers(); - local_irq_enable(); - local_bh_enable(); return 0; } From 427deb5ba5661c4ae1cfb35955d2e01bd5f3090a Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 13 Nov 2023 21:13:23 +0100 Subject: [PATCH 02/68] netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test [ Upstream commit 28628fa952fefc7f2072ce6e8016968cc452b1ba ] Linkui Xiao reported that there's a race condition when ipset swap and destroy is called, which can lead to crash in add/del/test element operations. Swap then destroy are usual operations to replace a set with another one in a production system. The issue can in some cases be reproduced with the script: ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576 ipset add hash_ip1 172.20.0.0/16 ipset add hash_ip1 192.168.0.0/16 iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT while [ 1 ] do # ... Ongoing traffic... ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576 ipset add hash_ip2 172.20.0.0/16 ipset swap hash_ip1 hash_ip2 ipset destroy hash_ip2 sleep 0.05 done In the race case the possible order of the operations are CPU0 CPU1 ip_set_test ipset swap hash_ip1 hash_ip2 ipset destroy hash_ip2 hash_net_kadt Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy removed it, hash_net_kadt crashes. The fix is to force ip_set_swap() to wait for all readers to finish accessing the old set pointers by calling synchronize_rcu(). The first version of the patch was written by Linkui Xiao . v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden ip_set_destroy() unnecessarily when all sets are destroyed. v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair. So there's no need to extend the rcu read locked area in ipset itself. Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.org/ Reported by: Linkui Xiao Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d3be0d0b0bda..d4feda487e5e 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); ip_set_dereference((inst)->ip_set_list)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] +#define ip_set_dereference_nfnl(p) \ + rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -556,15 +558,10 @@ __ip_set_put_netlink(struct ip_set *set) static inline struct ip_set * ip_set_rcu_get(struct net *net, ip_set_id_t index) { - struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - rcu_read_lock(); - /* ip_set_list itself needs to be protected */ - set = rcu_dereference(inst->ip_set_list)[index]; - rcu_read_unlock(); - - return set; + /* ip_set_list and the set pointer need to be protected */ + return ip_set_dereference_nfnl(inst->ip_set_list)[index]; } static inline void @@ -1255,6 +1252,9 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); + /* Make sure all readers of the old set pointers are completed. */ + synchronize_rcu(); + return 0; } From cd49b8e07d016c1163555e98ba38b87f9fb9097c Mon Sep 17 00:00:00 2001 From: Alex Pakhunov Date: Mon, 13 Nov 2023 10:23:49 -0800 Subject: [PATCH 03/68] tg3: Move the [rt]x_dropped counters to tg3_napi [ Upstream commit 907d1bdb8b2cc0357d03a1c34d2a08d9943760b1 ] This change moves [rt]x_dropped counters to tg3_napi so that they can be updated by a single writer, race-free. Signed-off-by: Alex Pakhunov Signed-off-by: Vincent Wong Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20231113182350.37472-1-alexey.pakhunov@spacex.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/tg3.c | 38 +++++++++++++++++++++++++---- drivers/net/ethernet/broadcom/tg3.h | 4 +-- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 90bfaea6d629..fb2543b6cec1 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6870,7 +6870,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) desc_idx, *post_ptr); drop_it_no_recycle: /* Other statistics kept track of by card. */ - tp->rx_dropped++; + tnapi->rx_dropped++; goto next_pkt; } @@ -8169,7 +8169,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) drop: dev_kfree_skb_any(skb); drop_nofree: - tp->tx_dropped++; + tnapi->tx_dropped++; return NETDEV_TX_OK; } @@ -9348,7 +9348,7 @@ static void __tg3_set_rx_mode(struct net_device *); /* tp->lock is held. */ static int tg3_halt(struct tg3 *tp, int kind, bool silent) { - int err; + int err, i; tg3_stop_fw(tp); @@ -9369,6 +9369,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent) /* And make sure the next sample is new data */ memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats)); + + for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) { + struct tg3_napi *tnapi = &tp->napi[i]; + + tnapi->rx_dropped = 0; + tnapi->tx_dropped = 0; + } } return err; @@ -11925,6 +11932,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) { struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev; struct tg3_hw_stats *hw_stats = tp->hw_stats; + unsigned long rx_dropped; + unsigned long tx_dropped; + int i; stats->rx_packets = old_stats->rx_packets + get_stat64(&hw_stats->rx_ucast_packets) + @@ -11971,8 +11981,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) stats->rx_missed_errors = old_stats->rx_missed_errors + get_stat64(&hw_stats->rx_discards); - stats->rx_dropped = tp->rx_dropped; - stats->tx_dropped = tp->tx_dropped; + /* Aggregate per-queue counters. The per-queue counters are updated + * by a single writer, race-free. The result computed by this loop + * might not be 100% accurate (counters can be updated in the middle of + * the loop) but the next tg3_get_nstats() will recompute the current + * value so it is acceptable. + * + * Note that these counters wrap around at 4G on 32bit machines. + */ + rx_dropped = (unsigned long)(old_stats->rx_dropped); + tx_dropped = (unsigned long)(old_stats->tx_dropped); + + for (i = 0; i < tp->irq_cnt; i++) { + struct tg3_napi *tnapi = &tp->napi[i]; + + rx_dropped += tnapi->rx_dropped; + tx_dropped += tnapi->tx_dropped; + } + + stats->rx_dropped = rx_dropped; + stats->tx_dropped = tx_dropped; } static int tg3_get_regs_len(struct net_device *dev) diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 6953d0546acb..811627abde5c 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -3018,6 +3018,7 @@ struct tg3_napi { u16 *rx_rcb_prod_idx; struct tg3_rx_prodring_set prodring; struct tg3_rx_buffer_desc *rx_rcb; + unsigned long rx_dropped; u32 tx_prod ____cacheline_aligned; u32 tx_cons; @@ -3026,6 +3027,7 @@ struct tg3_napi { u32 prodmbox; struct tg3_tx_buffer_desc *tx_ring; struct tg3_tx_ring_info *tx_buffers; + unsigned long tx_dropped; dma_addr_t status_mapping; dma_addr_t rx_rcb_mapping; @@ -3219,8 +3221,6 @@ struct tg3 { /* begin "everything else" cacheline(s) section */ - unsigned long rx_dropped; - unsigned long tx_dropped; struct rtnl_link_stats64 net_stats_prev; struct tg3_ethtool_stats estats_prev; From d3464415305097a025b93db045cab3e2811ff2f6 Mon Sep 17 00:00:00 2001 From: Alex Pakhunov Date: Mon, 13 Nov 2023 10:23:50 -0800 Subject: [PATCH 04/68] tg3: Increment tx_dropped in tg3_tso_bug() [ Upstream commit 17dd5efe5f36a96bd78012594fabe21efb01186b ] tg3_tso_bug() drops a packet if it cannot be segmented for any reason. The number of discarded frames should be incremented accordingly. Signed-off-by: Alex Pakhunov Signed-off-by: Vincent Wong Reviewed-by: Pavan Chebbi Link: https://lore.kernel.org/r/20231113182350.37472-2-alexey.pakhunov@spacex.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/tg3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index fb2543b6cec1..4cfb0d0ee80c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7896,8 +7896,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, segs = skb_gso_segment(skb, tp->dev->features & ~(NETIF_F_TSO | NETIF_F_TSO6)); - if (IS_ERR(segs) || !segs) + if (IS_ERR(segs) || !segs) { + tnapi->tx_dropped++; goto tg3_tso_bug_end; + } do { nskb = segs; From d162a5e6a51da19544d5ad289d8a94bbebb1569f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 15 Nov 2023 13:16:53 +0900 Subject: [PATCH 05/68] kconfig: fix memory leak from range properties [ Upstream commit ae1eff0349f2e908fc083630e8441ea6dc434dc0 ] Currently, sym_validate_range() duplicates the range string using xstrdup(), which is overwritten by a subsequent sym_calc_value() call. It results in a memory leak. Instead, only the pointer should be copied. Below is a test case, with a summary from Valgrind. [Test Kconfig] config FOO int "foo" range 10 20 [Test .config] CONFIG_FOO=0 [Before] LEAK SUMMARY: definitely lost: 3 bytes in 1 blocks indirectly lost: 0 bytes in 0 blocks possibly lost: 0 bytes in 0 blocks still reachable: 17,465 bytes in 21 blocks suppressed: 0 bytes in 0 blocks [After] LEAK SUMMARY: definitely lost: 0 bytes in 0 blocks indirectly lost: 0 bytes in 0 blocks possibly lost: 0 bytes in 0 blocks still reachable: 17,462 bytes in 20 blocks suppressed: 0 bytes in 0 blocks Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/symbol.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index f56eec5ea4c7..342fefe5dba4 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -117,9 +117,9 @@ static long long sym_get_range_val(struct symbol *sym, int base) static void sym_validate_range(struct symbol *sym) { struct property *prop; + struct symbol *range_sym; int base; long long val, val2; - char str[64]; switch (sym->type) { case S_INT: @@ -135,17 +135,15 @@ static void sym_validate_range(struct symbol *sym) if (!prop) return; val = strtoll(sym->curr.val, NULL, base); - val2 = sym_get_range_val(prop->expr->left.sym, base); + range_sym = prop->expr->left.sym; + val2 = sym_get_range_val(range_sym, base); if (val >= val2) { - val2 = sym_get_range_val(prop->expr->right.sym, base); + range_sym = prop->expr->right.sym; + val2 = sym_get_range_val(range_sym, base); if (val <= val2) return; } - if (sym->type == S_INT) - sprintf(str, "%lld", val2); - else - sprintf(str, "0x%llx", val2); - sym->curr.val = xstrdup(str); + sym->curr.val = range_sym->curr.val; } static void sym_set_changed(struct symbol *sym) From 13f27a05377dad9a146d1e05295a768fb9830eb5 Mon Sep 17 00:00:00 2001 From: YuanShang Date: Tue, 31 Oct 2023 10:32:37 +0800 Subject: [PATCH 06/68] drm/amdgpu: correct chunk_ptr to a pointer to chunk. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 50d51374b498457c4dea26779d32ccfed12ddaff ] The variable "chunk_ptr" should be a pointer pointing to a struct drm_amdgpu_cs_chunk instead of to a pointer of that. Signed-off-by: YuanShang Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1a8305521176..f9c725a8991b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -141,7 +141,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs } for (i = 0; i < p->nchunks; i++) { - struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; + struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; From d786067be2ebb81e5e3d4fad2578435b05e5c4c7 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 10 Oct 2019 12:01:48 +0200 Subject: [PATCH 07/68] of: base: Add of_get_cpu_state_node() to get idle states for a CPU node [ Upstream commit b9f8c26afc405a4a616e765e949bdd551151e41d ] The CPU's idle state nodes are currently parsed at the common cpuidle DT library, but also when initializing data for specific CPU idle operations, as in the PSCI cpuidle driver case and qcom-spm cpuidle case. To avoid open-coding, let's introduce of_get_cpu_state_node(), which takes the device node for the CPU and the index to the requested idle state node, as in-parameters. In case a corresponding idle state node is found, it returns the node with the refcount incremented for it, else it returns NULL. Moreover, for PSCI there are two options to describe the CPU's idle states [1], either via a flattened description or a hierarchical layout. Hence, let's take both options into account. [1] Documentation/devicetree/bindings/arm/psci.yaml Suggested-by: Sudeep Holla Co-developed-by: Lina Iyer Signed-off-by: Lina Iyer Reviewed-by: Rob Herring Reviewed-by: Daniel Lezcano Signed-off-by: Ulf Hansson Reviewed-by: Sudeep Holla Stable-dep-of: d79972789d17 ("of: dynamic: Fix of_reconfig_get_state_change() return value documentation") Signed-off-by: Sasha Levin --- drivers/of/base.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/of.h | 8 ++++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index b5c84607a74b..3f13b982e8b5 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -477,6 +477,42 @@ int of_cpu_node_to_id(struct device_node *cpu_node) } EXPORT_SYMBOL(of_cpu_node_to_id); +/** + * of_get_cpu_state_node - Get CPU's idle state node at the given index + * + * @cpu_node: The device node for the CPU + * @index: The index in the list of the idle states + * + * Two generic methods can be used to describe a CPU's idle states, either via + * a flattened description through the "cpu-idle-states" binding or via the + * hierarchical layout, using the "power-domains" and the "domain-idle-states" + * bindings. This function check for both and returns the idle state node for + * the requested index. + * + * In case an idle state node is found at @index, the refcount is incremented + * for it, so call of_node_put() on it when done. Returns NULL if not found. + */ +struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, + int index) +{ + struct of_phandle_args args; + int err; + + err = of_parse_phandle_with_args(cpu_node, "power-domains", + "#power-domain-cells", 0, &args); + if (!err) { + struct device_node *state_node = + of_parse_phandle(args.np, "domain-idle-states", index); + + of_node_put(args.np); + if (state_node) + return state_node; + } + + return of_parse_phandle(cpu_node, "cpu-idle-states", index); +} +EXPORT_SYMBOL(of_get_cpu_state_node); + /** * __of_device_is_compatible() - Check if the node matches given constraints * @device: pointer to node diff --git a/include/linux/of.h b/include/linux/of.h index a7621e2b440a..e29b341598e9 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -351,6 +351,8 @@ extern const void *of_get_property(const struct device_node *node, int *lenp); extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); extern struct device_node *of_get_next_cpu_node(struct device_node *prev); +extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, + int index); #define for_each_property_of_node(dn, pp) \ for (pp = dn->properties; pp != NULL; pp = pp->next) @@ -765,6 +767,12 @@ static inline struct device_node *of_get_next_cpu_node(struct device_node *prev) return NULL; } +static inline struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, + int index) +{ + return NULL; +} + static inline int of_n_addr_cells(struct device_node *np) { return 0; From da36a3ef32b476ca6464226010ef73135fc3c3a4 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 19 Jun 2020 09:20:03 +0100 Subject: [PATCH 08/68] ACPI/IORT: Make iort_get_device_domain IRQ domain agnostic [ Upstream commit d1718a1b7a86743b9c517bf9521695ba909c734f ] iort_get_device_domain() is PCI specific but it need not be, since it can be used to retrieve IRQ domain nexus of any kind by adding an irq_domain_bus_token input to it. Make it PCI agnostic by also renaming the requestor ID input to a more generic ID name. Signed-off-by: Lorenzo Pieralisi Acked-by: Bjorn Helgaas # pci/msi.c Cc: Will Deacon Cc: Hanjun Guo Cc: Bjorn Helgaas Cc: Sudeep Holla Cc: Robin Murphy Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20200619082013.13661-3-lorenzo.pieralisi@arm.com Signed-off-by: Catalin Marinas Stable-dep-of: d79972789d17 ("of: dynamic: Fix of_reconfig_get_state_change() return value documentation") Signed-off-by: Sasha Levin --- drivers/acpi/arm64/iort.c | 14 +++++++------- drivers/pci/msi.c | 3 ++- include/linux/acpi_iort.h | 7 ++++--- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 09eb170f26d2..0428e74b6002 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -503,7 +503,6 @@ static struct acpi_iort_node *iort_find_dev_node(struct device *dev) node = iort_get_iort_node(dev->fwnode); if (node) return node; - /* * if not, then it should be a platform device defined in * DSDT/SSDT (with Named Component node in IORT) @@ -594,13 +593,13 @@ static int __maybe_unused iort_find_its_base(u32 its_id, phys_addr_t *base) /** * iort_dev_find_its_id() - Find the ITS identifier for a device * @dev: The device. - * @req_id: Device's requester ID + * @id: Device's ID * @idx: Index of the ITS identifier list. * @its_id: ITS identifier. * * Returns: 0 on success, appropriate error value otherwise */ -static int iort_dev_find_its_id(struct device *dev, u32 req_id, +static int iort_dev_find_its_id(struct device *dev, u32 id, unsigned int idx, int *its_id) { struct acpi_iort_its_group *its; @@ -610,7 +609,7 @@ static int iort_dev_find_its_id(struct device *dev, u32 req_id, if (!node) return -ENXIO; - node = iort_node_map_id(node, req_id, NULL, IORT_MSI_TYPE); + node = iort_node_map_id(node, id, NULL, IORT_MSI_TYPE); if (!node) return -ENXIO; @@ -633,19 +632,20 @@ static int iort_dev_find_its_id(struct device *dev, u32 req_id, * * Returns: the MSI domain for this device, NULL otherwise */ -struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id) +struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, + enum irq_domain_bus_token bus_token) { struct fwnode_handle *handle; int its_id; - if (iort_dev_find_its_id(dev, req_id, 0, &its_id)) + if (iort_dev_find_its_id(dev, id, 0, &its_id)) return NULL; handle = iort_find_domain_token(its_id); if (!handle) return NULL; - return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI); + return irq_find_matching_fwnode(handle, bus_token); } static void iort_set_device_domain(struct device *dev, diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 715c85d4e688..6336316c3793 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1626,7 +1626,8 @@ struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid); dom = of_msi_map_get_device_domain(&pdev->dev, rid); if (!dom) - dom = iort_get_device_domain(&pdev->dev, rid); + dom = iort_get_device_domain(&pdev->dev, rid, + DOMAIN_BUS_PCI_MSI); return dom; } #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 64f700254ca0..6bf603a4e6d2 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -30,7 +30,8 @@ struct fwnode_handle *iort_find_domain_token(int trans_id); #ifdef CONFIG_ACPI_IORT void acpi_iort_init(void); u32 iort_msi_map_rid(struct device *dev, u32 req_id); -struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); +struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, + enum irq_domain_bus_token bus_token); void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ @@ -41,8 +42,8 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); static inline void acpi_iort_init(void) { } static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) { return req_id; } -static inline struct irq_domain *iort_get_device_domain(struct device *dev, - u32 req_id) +static inline struct irq_domain *iort_get_device_domain( + struct device *dev, u32 id, enum irq_domain_bus_token bus_token) { return NULL; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ From f7a85520087a6286148926d04e7cc3857f8ad15f Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 19 Jun 2020 09:20:04 +0100 Subject: [PATCH 09/68] ACPI/IORT: Make iort_msi_map_rid() PCI agnostic [ Upstream commit 39c3cf566ceafa7c1ae331a5f26fbb685d670001 ] There is nothing PCI specific in iort_msi_map_rid(). Rename the function using a bus protocol agnostic name, iort_msi_map_id(), and convert current callers to it. Signed-off-by: Lorenzo Pieralisi Acked-by: Bjorn Helgaas Cc: Will Deacon Cc: Hanjun Guo Cc: Bjorn Helgaas Cc: Sudeep Holla Cc: Robin Murphy Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20200619082013.13661-4-lorenzo.pieralisi@arm.com Signed-off-by: Catalin Marinas Stable-dep-of: d79972789d17 ("of: dynamic: Fix of_reconfig_get_state_change() return value documentation") Signed-off-by: Sasha Levin --- drivers/acpi/arm64/iort.c | 12 ++++++------ drivers/pci/msi.c | 2 +- include/linux/acpi_iort.h | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 0428e74b6002..13ae2ce9f50d 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -521,22 +521,22 @@ static struct acpi_iort_node *iort_find_dev_node(struct device *dev) } /** - * iort_msi_map_rid() - Map a MSI requester ID for a device + * iort_msi_map_id() - Map a MSI input ID for a device * @dev: The device for which the mapping is to be done. - * @req_id: The device requester ID. + * @input_id: The device input ID. * - * Returns: mapped MSI RID on success, input requester ID otherwise + * Returns: mapped MSI ID on success, input ID otherwise */ -u32 iort_msi_map_rid(struct device *dev, u32 req_id) +u32 iort_msi_map_id(struct device *dev, u32 input_id) { struct acpi_iort_node *node; u32 dev_id; node = iort_find_dev_node(dev); if (!node) - return req_id; + return input_id; - iort_node_map_id(node, req_id, &dev_id, IORT_MSI_TYPE); + iort_node_map_id(node, input_id, &dev_id, IORT_MSI_TYPE); return dev_id; } diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 6336316c3793..cc863cdd5cc7 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1604,7 +1604,7 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev) of_node = irq_domain_get_of_node(domain); rid = of_node ? of_msi_map_rid(&pdev->dev, of_node, rid) : - iort_msi_map_rid(&pdev->dev, rid); + iort_msi_map_id(&pdev->dev, rid); return rid; } diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 6bf603a4e6d2..cce2cbd4f301 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -29,7 +29,7 @@ void iort_deregister_domain_token(int trans_id); struct fwnode_handle *iort_find_domain_token(int trans_id); #ifdef CONFIG_ACPI_IORT void acpi_iort_init(void); -u32 iort_msi_map_rid(struct device *dev, u32 req_id); +u32 iort_msi_map_id(struct device *dev, u32 id); struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, enum irq_domain_bus_token bus_token); void acpi_configure_pmsi_domain(struct device *dev); @@ -40,8 +40,8 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); #else static inline void acpi_iort_init(void) { } -static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) -{ return req_id; } +static inline u32 iort_msi_map_id(struct device *dev, u32 id) +{ return id; } static inline struct irq_domain *iort_get_device_domain( struct device *dev, u32 id, enum irq_domain_bus_token bus_token) { return NULL; } From e5cfaab662959688087460d9fcdade07d4706f20 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 19 Jun 2020 09:20:07 +0100 Subject: [PATCH 10/68] of/iommu: Make of_map_rid() PCI agnostic [ Upstream commit 746a71d02b5d15817fcb13c956ba999a87773952 ] There is nothing PCI specific (other than the RID - requester ID) in the of_map_rid() implementation, so the same function can be reused for input/output IDs mapping for other busses just as well. Rename the RID instances/names to a generic "id" tag. No functionality change intended. Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Acked-by: Joerg Roedel Cc: Rob Herring Cc: Joerg Roedel Cc: Robin Murphy Cc: Marc Zyngier Link: https://lore.kernel.org/r/20200619082013.13661-7-lorenzo.pieralisi@arm.com Signed-off-by: Catalin Marinas Stable-dep-of: d79972789d17 ("of: dynamic: Fix of_reconfig_get_state_change() return value documentation") Signed-off-by: Sasha Levin --- drivers/iommu/of_iommu.c | 4 ++-- drivers/of/base.c | 42 ++++++++++++++++++++-------------------- drivers/of/irq.c | 2 +- include/linux/of.h | 4 ++-- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 614a93aa5305..656939ed842e 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -121,7 +121,7 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) struct of_phandle_args iommu_spec = { .args_count = 1 }; int err; - err = of_map_rid(info->np, alias, "iommu-map", "iommu-map-mask", + err = of_map_id(info->np, alias, "iommu-map", "iommu-map-mask", &iommu_spec.np, iommu_spec.args); if (err) return err == -ENODEV ? NO_IOMMU : err; @@ -137,7 +137,7 @@ static int of_fsl_mc_iommu_init(struct fsl_mc_device *mc_dev, struct of_phandle_args iommu_spec = { .args_count = 1 }; int err; - err = of_map_rid(master_np, mc_dev->icid, "iommu-map", + err = of_map_id(master_np, mc_dev->icid, "iommu-map", "iommu-map-mask", &iommu_spec.np, iommu_spec.args); if (err) diff --git a/drivers/of/base.c b/drivers/of/base.c index 3f13b982e8b5..5c97366628b1 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -2280,15 +2280,15 @@ int of_find_last_cache_level(unsigned int cpu) } /** - * of_map_rid - Translate a requester ID through a downstream mapping. + * of_map_id - Translate an ID through a downstream mapping. * @np: root complex device node. - * @rid: device requester ID to map. + * @id: device ID to map. * @map_name: property name of the map to use. * @map_mask_name: optional property name of the mask to use. * @target: optional pointer to a target device node. * @id_out: optional pointer to receive the translated ID. * - * Given a device requester ID, look up the appropriate implementation-defined + * Given a device ID, look up the appropriate implementation-defined * platform ID and/or the target device which receives transactions on that * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or * @id_out may be NULL if only the other is required. If @target points to @@ -2298,11 +2298,11 @@ int of_find_last_cache_level(unsigned int cpu) * * Return: 0 on success or a standard error code on failure. */ -int of_map_rid(struct device_node *np, u32 rid, +int of_map_id(struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out) { - u32 map_mask, masked_rid; + u32 map_mask, masked_id; int map_len; const __be32 *map = NULL; @@ -2314,7 +2314,7 @@ int of_map_rid(struct device_node *np, u32 rid, if (target) return -ENODEV; /* Otherwise, no map implies no translation */ - *id_out = rid; + *id_out = id; return 0; } @@ -2334,22 +2334,22 @@ int of_map_rid(struct device_node *np, u32 rid, if (map_mask_name) of_property_read_u32(np, map_mask_name, &map_mask); - masked_rid = map_mask & rid; + masked_id = map_mask & id; for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) { struct device_node *phandle_node; - u32 rid_base = be32_to_cpup(map + 0); + u32 id_base = be32_to_cpup(map + 0); u32 phandle = be32_to_cpup(map + 1); u32 out_base = be32_to_cpup(map + 2); - u32 rid_len = be32_to_cpup(map + 3); + u32 id_len = be32_to_cpup(map + 3); - if (rid_base & ~map_mask) { - pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n", + if (id_base & ~map_mask) { + pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores id-base (0x%x)\n", np, map_name, map_name, - map_mask, rid_base); + map_mask, id_base); return -EFAULT; } - if (masked_rid < rid_base || masked_rid >= rid_base + rid_len) + if (masked_id < id_base || masked_id >= id_base + id_len) continue; phandle_node = of_find_node_by_phandle(phandle); @@ -2367,20 +2367,20 @@ int of_map_rid(struct device_node *np, u32 rid, } if (id_out) - *id_out = masked_rid - rid_base + out_base; + *id_out = masked_id - id_base + out_base; - pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n", - np, map_name, map_mask, rid_base, out_base, - rid_len, rid, masked_rid - rid_base + out_base); + pr_debug("%pOF: %s, using mask %08x, id-base: %08x, out-base: %08x, length: %08x, id: %08x -> %08x\n", + np, map_name, map_mask, id_base, out_base, + id_len, id, masked_id - id_base + out_base); return 0; } - pr_info("%pOF: no %s translation for rid 0x%x on %pOF\n", np, map_name, - rid, target && *target ? *target : NULL); + pr_info("%pOF: no %s translation for id 0x%x on %pOF\n", np, map_name, + id, target && *target ? *target : NULL); /* Bypasses translation */ if (id_out) - *id_out = rid; + *id_out = id; return 0; } -EXPORT_SYMBOL_GPL(of_map_rid); +EXPORT_SYMBOL_GPL(of_map_id); diff --git a/drivers/of/irq.c b/drivers/of/irq.c index a296eaf52a5b..d632bc5b3a2d 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -587,7 +587,7 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np, * "msi-map" property. */ for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) - if (!of_map_rid(parent_dev->of_node, rid_in, "msi-map", + if (!of_map_id(parent_dev->of_node, rid_in, "msi-map", "msi-map-mask", np, &rid_out)) break; return rid_out; diff --git a/include/linux/of.h b/include/linux/of.h index e29b341598e9..e070c5ed62a0 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -554,7 +554,7 @@ bool of_console_check(struct device_node *dn, char *name, int index); extern int of_cpu_node_to_id(struct device_node *np); -int of_map_rid(struct device_node *np, u32 rid, +int of_map_id(struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out); @@ -978,7 +978,7 @@ static inline int of_cpu_node_to_id(struct device_node *np) return -ENODEV; } -static inline int of_map_rid(struct device_node *np, u32 rid, +static inline int of_map_id(struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out) { From ae374c57afeb7c07a5a491c273b94aa05427cd70 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Fri, 19 Jun 2020 09:20:10 +0100 Subject: [PATCH 11/68] of/irq: make of_msi_map_get_device_domain() bus agnostic [ Upstream commit 6f881aba01109a01a43e4f135673c19190f61133 ] of_msi_map_get_device_domain() is PCI specific but it need not be and can be easily changed to be bus agnostic in order to be used by other busses by adding an IRQ domain bus token as an input parameter. Signed-off-by: Diana Craciun Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Acked-by: Bjorn Helgaas # pci/msi.c Cc: Bjorn Helgaas Cc: Rob Herring Cc: Marc Zyngier Link: https://lore.kernel.org/r/20200619082013.13661-10-lorenzo.pieralisi@arm.com Signed-off-by: Catalin Marinas Stable-dep-of: d79972789d17 ("of: dynamic: Fix of_reconfig_get_state_change() return value documentation") Signed-off-by: Sasha Levin --- drivers/of/irq.c | 8 +++++--- drivers/pci/msi.c | 2 +- include/linux/of_irq.h | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index d632bc5b3a2d..1005e4f349ef 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -613,18 +613,20 @@ u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in) * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain * @dev: device for which the mapping is to be done. * @rid: Requester ID for the device. + * @bus_token: Bus token * * Walk up the device hierarchy looking for devices with a "msi-map" * property. * * Returns: the MSI domain for this device (or NULL on failure) */ -struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 rid) +struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id, + u32 bus_token) { struct device_node *np = NULL; - __of_msi_map_rid(dev, &np, rid); - return irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI); + __of_msi_map_rid(dev, &np, id); + return irq_find_matching_host(np, bus_token); } /** diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index cc863cdd5cc7..392d2ecf7dc8 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1624,7 +1624,7 @@ struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) u32 rid = pci_dev_id(pdev); pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid); - dom = of_msi_map_get_device_domain(&pdev->dev, rid); + dom = of_msi_map_get_device_domain(&pdev->dev, rid, DOMAIN_BUS_PCI_MSI); if (!dom) dom = iort_get_device_domain(&pdev->dev, rid, DOMAIN_BUS_PCI_MSI); diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 1214cabb2247..7142a3722758 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -52,7 +52,8 @@ extern struct irq_domain *of_msi_get_domain(struct device *dev, struct device_node *np, enum irq_domain_bus_token token); extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, - u32 rid); + u32 id, + u32 bus_token); extern void of_msi_configure(struct device *dev, struct device_node *np); u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in); #else @@ -85,7 +86,7 @@ static inline struct irq_domain *of_msi_get_domain(struct device *dev, return NULL; } static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev, - u32 rid) + u32 id, u32 bus_token) { return NULL; } From 8c4fcbe27a7a9398903b4d67d6a47a9cf5cc8859 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 19 Jun 2020 09:20:11 +0100 Subject: [PATCH 12/68] of/irq: Make of_msi_map_rid() PCI bus agnostic [ Upstream commit 2bcdd8f2c07f1aa1bfd34fa0dab8e06949e34846 ] There is nothing PCI bus specific in the of_msi_map_rid() implementation other than the requester ID tag for the input ID space. Rename requester ID to a more generic ID so that the translation code can be used by all busses that require input/output ID translations. No functional change intended. Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Cc: Bjorn Helgaas Cc: Rob Herring Cc: Marc Zyngier Link: https://lore.kernel.org/r/20200619082013.13661-11-lorenzo.pieralisi@arm.com Signed-off-by: Catalin Marinas Stable-dep-of: d79972789d17 ("of: dynamic: Fix of_reconfig_get_state_change() return value documentation") Signed-off-by: Sasha Levin --- drivers/of/irq.c | 28 ++++++++++++++-------------- drivers/pci/msi.c | 2 +- include/linux/of_irq.h | 8 ++++---- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 1005e4f349ef..25d17b8a1a1a 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -576,43 +576,43 @@ void __init of_irq_init(const struct of_device_id *matches) } } -static u32 __of_msi_map_rid(struct device *dev, struct device_node **np, - u32 rid_in) +static u32 __of_msi_map_id(struct device *dev, struct device_node **np, + u32 id_in) { struct device *parent_dev; - u32 rid_out = rid_in; + u32 id_out = id_in; /* * Walk up the device parent links looking for one with a * "msi-map" property. */ for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) - if (!of_map_id(parent_dev->of_node, rid_in, "msi-map", - "msi-map-mask", np, &rid_out)) + if (!of_map_id(parent_dev->of_node, id_in, "msi-map", + "msi-map-mask", np, &id_out)) break; - return rid_out; + return id_out; } /** - * of_msi_map_rid - Map a MSI requester ID for a device. + * of_msi_map_id - Map a MSI ID for a device. * @dev: device for which the mapping is to be done. * @msi_np: device node of the expected msi controller. - * @rid_in: unmapped MSI requester ID for the device. + * @id_in: unmapped MSI ID for the device. * * Walk up the device hierarchy looking for devices with a "msi-map" - * property. If found, apply the mapping to @rid_in. + * property. If found, apply the mapping to @id_in. * - * Returns the mapped MSI requester ID. + * Returns the mapped MSI ID. */ -u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in) +u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in) { - return __of_msi_map_rid(dev, &msi_np, rid_in); + return __of_msi_map_id(dev, &msi_np, id_in); } /** * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain * @dev: device for which the mapping is to be done. - * @rid: Requester ID for the device. + * @id: Device ID. * @bus_token: Bus token * * Walk up the device hierarchy looking for devices with a "msi-map" @@ -625,7 +625,7 @@ struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id, { struct device_node *np = NULL; - __of_msi_map_rid(dev, &np, id); + __of_msi_map_id(dev, &np, id); return irq_find_matching_host(np, bus_token); } diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 392d2ecf7dc8..c4f4a8a3bf8f 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1603,7 +1603,7 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev) pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid); of_node = irq_domain_get_of_node(domain); - rid = of_node ? of_msi_map_rid(&pdev->dev, of_node, rid) : + rid = of_node ? of_msi_map_id(&pdev->dev, of_node, rid) : iort_msi_map_id(&pdev->dev, rid); return rid; diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 7142a3722758..e8b78139f78c 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -55,7 +55,7 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id, u32 bus_token); extern void of_msi_configure(struct device *dev, struct device_node *np); -u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in); +u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else static inline int of_irq_count(struct device_node *dev) { @@ -93,10 +93,10 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev static inline void of_msi_configure(struct device *dev, struct device_node *np) { } -static inline u32 of_msi_map_rid(struct device *dev, - struct device_node *msi_np, u32 rid_in) +static inline u32 of_msi_map_id(struct device *dev, + struct device_node *msi_np, u32 id_in) { - return rid_in; + return id_in; } #endif From 36ce931a803b7f1066668e1c0ab846c1a4b0c35d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 18 Mar 2021 10:40:30 +0000 Subject: [PATCH 13/68] of: base: Fix some formatting issues and provide missing descriptions [ Upstream commit 3637d49e11219512920aca8b8ccd0994be33fa8b ] Fixes the following W=1 kernel build warning(s): drivers/of/base.c:315: warning: Function parameter or member 'cpun' not described in '__of_find_n_match_cpu_property' drivers/of/base.c:315: warning: Function parameter or member 'prop_name' not described in '__of_find_n_match_cpu_property' drivers/of/base.c:315: warning: Function parameter or member 'cpu' not described in '__of_find_n_match_cpu_property' drivers/of/base.c:315: warning: Function parameter or member 'thread' not described in '__of_find_n_match_cpu_property' drivers/of/base.c:315: warning: expecting prototype for property holds the physical id of the(). Prototype was for __of_find_n_match_cpu_property() instead drivers/of/base.c:1139: warning: Function parameter or member 'match' not described in 'of_find_matching_node_and_match' drivers/of/base.c:1779: warning: Function parameter or member 'np' not described in '__of_add_property' drivers/of/base.c:1779: warning: Function parameter or member 'prop' not described in '__of_add_property' drivers/of/base.c:1800: warning: Function parameter or member 'np' not described in 'of_add_property' drivers/of/base.c:1800: warning: Function parameter or member 'prop' not described in 'of_add_property' drivers/of/base.c:1849: warning: Function parameter or member 'np' not described in 'of_remove_property' drivers/of/base.c:1849: warning: Function parameter or member 'prop' not described in 'of_remove_property' drivers/of/base.c:2137: warning: Function parameter or member 'dn' not described in 'of_console_check' drivers/of/base.c:2137: warning: Function parameter or member 'name' not described in 'of_console_check' drivers/of/base.c:2137: warning: Function parameter or member 'index' not described in 'of_console_check' Cc: Rob Herring Cc: Frank Rowand Cc: "David S. Miller" Cc: devicetree@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210318104036.3175910-5-lee.jones@linaro.org Stable-dep-of: d79972789d17 ("of: dynamic: Fix of_reconfig_get_state_change() return value documentation") Signed-off-by: Sasha Levin --- drivers/of/base.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 5c97366628b1..4032814133fe 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -367,7 +367,7 @@ bool __weak arch_match_cpu_phys_id(int cpu, u64 phys_id) return (u32)phys_id == cpu; } -/** +/* * Checks if the given "prop_name" property holds the physical id of the * core/thread corresponding to the logical cpu 'cpu'. If 'thread' is not * NULL, local thread number within the core is returned in it. @@ -1190,7 +1190,7 @@ EXPORT_SYMBOL(of_match_node); * will; typically, you pass what the previous call * returned. of_node_put() will be called on it * @matches: array of of device match structures to search in - * @match Updated to point at the matches entry which matched + * @match: Updated to point at the matches entry which matched * * Returns a node pointer with refcount incremented, use * of_node_put() on it when done. @@ -1853,6 +1853,8 @@ EXPORT_SYMBOL(of_count_phandle_with_args); /** * __of_add_property - Add a property to a node without lock operations + * @np: Caller's Device Node + * @prob: Property to add */ int __of_add_property(struct device_node *np, struct property *prop) { @@ -1874,6 +1876,8 @@ int __of_add_property(struct device_node *np, struct property *prop) /** * of_add_property - Add a property to a node + * @np: Caller's Device Node + * @prob: Property to add */ int of_add_property(struct device_node *np, struct property *prop) { @@ -1918,6 +1922,8 @@ int __of_remove_property(struct device_node *np, struct property *prop) /** * of_remove_property - Remove a property from a node. + * @np: Caller's Device Node + * @prob: Property to remove * * Note that we don't actually remove it, since we have given out * who-knows-how-many pointers to the data using get-property. @@ -2203,9 +2209,9 @@ EXPORT_SYMBOL_GPL(of_alias_get_highest_id); /** * of_console_check() - Test and setup console for DT setup - * @dn - Pointer to device node - * @name - Name to use for preferred console without index. ex. "ttyS" - * @index - Index to use for preferred console. + * @dn: Pointer to device node + * @name: Name to use for preferred console without index. ex. "ttyS" + * @index: Index to use for preferred console. * * Check if the given device node matches the stdout-path property in the * /chosen node. If it does then register it as the preferred console and return From b31cb14cac85a49b118c10e036612a166fc9e631 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 26 Mar 2021 13:26:06 -0600 Subject: [PATCH 14/68] of: Fix kerneldoc output formatting [ Upstream commit 62f026f082e4d762a47b43ea693b38f025122332 ] The indentation of the kerneldoc comments affects the output formatting. Leading tabs in particular don't work, sections need to be indented under the section header, and several code blocks are reformatted. Cc: Frank Rowand Cc: Mauro Carvalho Chehab Signed-off-by: Rob Herring Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210326192606.3702739-1-robh@kernel.org Stable-dep-of: d79972789d17 ("of: dynamic: Fix of_reconfig_get_state_change() return value documentation") Signed-off-by: Sasha Levin --- drivers/of/base.c | 265 +++++++++++++++++++++++----------------------- drivers/of/fdt.c | 9 +- 2 files changed, 136 insertions(+), 138 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 4032814133fe..c4c64ef5fb7a 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -713,11 +713,11 @@ bool of_device_is_big_endian(const struct device_node *device) EXPORT_SYMBOL(of_device_is_big_endian); /** - * of_get_parent - Get a node's parent if any - * @node: Node to get parent + * of_get_parent - Get a node's parent if any + * @node: Node to get parent * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. + * Return: A node pointer with refcount incremented, use + * of_node_put() on it when done. */ struct device_node *of_get_parent(const struct device_node *node) { @@ -735,15 +735,15 @@ struct device_node *of_get_parent(const struct device_node *node) EXPORT_SYMBOL(of_get_parent); /** - * of_get_next_parent - Iterate to a node's parent - * @node: Node to get parent of + * of_get_next_parent - Iterate to a node's parent + * @node: Node to get parent of * - * This is like of_get_parent() except that it drops the - * refcount on the passed node, making it suitable for iterating - * through a node's parents. + * This is like of_get_parent() except that it drops the + * refcount on the passed node, making it suitable for iterating + * through a node's parents. * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. + * Return: A node pointer with refcount incremented, use + * of_node_put() on it when done. */ struct device_node *of_get_next_parent(struct device_node *node) { @@ -781,13 +781,13 @@ static struct device_node *__of_get_next_child(const struct device_node *node, child = __of_get_next_child(parent, child)) /** - * of_get_next_child - Iterate a node childs - * @node: parent node - * @prev: previous child of the parent node, or NULL to get first + * of_get_next_child - Iterate a node childs + * @node: parent node + * @prev: previous child of the parent node, or NULL to get first * - * Returns a node pointer with refcount incremented, use of_node_put() on - * it when done. Returns NULL when prev is the last child. Decrements the - * refcount of prev. + * Return: A node pointer with refcount incremented, use of_node_put() on + * it when done. Returns NULL when prev is the last child. Decrements the + * refcount of prev. */ struct device_node *of_get_next_child(const struct device_node *node, struct device_node *prev) @@ -803,12 +803,12 @@ struct device_node *of_get_next_child(const struct device_node *node, EXPORT_SYMBOL(of_get_next_child); /** - * of_get_next_available_child - Find the next available child node - * @node: parent node - * @prev: previous child of the parent node, or NULL to get first + * of_get_next_available_child - Find the next available child node + * @node: parent node + * @prev: previous child of the parent node, or NULL to get first * - * This function is like of_get_next_child(), except that it - * automatically skips any disabled nodes (i.e. status = "disabled"). + * This function is like of_get_next_child(), except that it + * automatically skips any disabled nodes (i.e. status = "disabled"). */ struct device_node *of_get_next_available_child(const struct device_node *node, struct device_node *prev) @@ -834,12 +834,12 @@ struct device_node *of_get_next_available_child(const struct device_node *node, EXPORT_SYMBOL(of_get_next_available_child); /** - * of_get_next_cpu_node - Iterate on cpu nodes - * @prev: previous child of the /cpus node, or NULL to get first + * of_get_next_cpu_node - Iterate on cpu nodes + * @prev: previous child of the /cpus node, or NULL to get first * - * Returns a cpu node pointer with refcount incremented, use of_node_put() - * on it when done. Returns NULL when prev is the last child. Decrements - * the refcount of prev. + * Return: A cpu node pointer with refcount incremented, use of_node_put() + * on it when done. Returns NULL when prev is the last child. Decrements + * the refcount of prev. */ struct device_node *of_get_next_cpu_node(struct device_node *prev) { @@ -896,15 +896,15 @@ struct device_node *of_get_compatible_child(const struct device_node *parent, EXPORT_SYMBOL(of_get_compatible_child); /** - * of_get_child_by_name - Find the child node by name for a given parent - * @node: parent node - * @name: child name to look for. + * of_get_child_by_name - Find the child node by name for a given parent + * @node: parent node + * @name: child name to look for. * - * This function looks for child node for given matching name + * This function looks for child node for given matching name * - * Returns a node pointer if found, with refcount incremented, use - * of_node_put() on it when done. - * Returns NULL if node is not found. + * Return: A node pointer if found, with refcount incremented, use + * of_node_put() on it when done. + * Returns NULL if node is not found. */ struct device_node *of_get_child_by_name(const struct device_node *node, const char *name) @@ -955,22 +955,22 @@ struct device_node *__of_find_node_by_full_path(struct device_node *node, } /** - * of_find_node_opts_by_path - Find a node matching a full OF path - * @path: Either the full path to match, or if the path does not - * start with '/', the name of a property of the /aliases - * node (an alias). In the case of an alias, the node - * matching the alias' value will be returned. - * @opts: Address of a pointer into which to store the start of - * an options string appended to the end of the path with - * a ':' separator. + * of_find_node_opts_by_path - Find a node matching a full OF path + * @path: Either the full path to match, or if the path does not + * start with '/', the name of a property of the /aliases + * node (an alias). In the case of an alias, the node + * matching the alias' value will be returned. + * @opts: Address of a pointer into which to store the start of + * an options string appended to the end of the path with + * a ':' separator. * - * Valid paths: - * /foo/bar Full path - * foo Valid alias - * foo/bar Valid alias + relative path + * Valid paths: + * * /foo/bar Full path + * * foo Valid alias + * * foo/bar Valid alias + relative path * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. + * Return: A node pointer with refcount incremented, use + * of_node_put() on it when done. */ struct device_node *of_find_node_opts_by_path(const char *path, const char **opts) { @@ -1020,15 +1020,15 @@ struct device_node *of_find_node_opts_by_path(const char *path, const char **opt EXPORT_SYMBOL(of_find_node_opts_by_path); /** - * of_find_node_by_name - Find a node by its "name" property - * @from: The node to start searching from or NULL; the node + * of_find_node_by_name - Find a node by its "name" property + * @from: The node to start searching from or NULL; the node * you pass will not be searched, only the next one * will. Typically, you pass what the previous call * returned. of_node_put() will be called on @from. - * @name: The name string to match against + * @name: The name string to match against * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. + * Return: A node pointer with refcount incremented, use + * of_node_put() on it when done. */ struct device_node *of_find_node_by_name(struct device_node *from, const char *name) @@ -1047,16 +1047,16 @@ struct device_node *of_find_node_by_name(struct device_node *from, EXPORT_SYMBOL(of_find_node_by_name); /** - * of_find_node_by_type - Find a node by its "device_type" property - * @from: The node to start searching from, or NULL to start searching + * of_find_node_by_type - Find a node by its "device_type" property + * @from: The node to start searching from, or NULL to start searching * the entire device tree. The node you pass will not be * searched, only the next one will; typically, you pass * what the previous call returned. of_node_put() will be * called on from for you. - * @type: The type string to match against + * @type: The type string to match against * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. + * Return: A node pointer with refcount incremented, use + * of_node_put() on it when done. */ struct device_node *of_find_node_by_type(struct device_node *from, const char *type) @@ -1075,18 +1075,18 @@ struct device_node *of_find_node_by_type(struct device_node *from, EXPORT_SYMBOL(of_find_node_by_type); /** - * of_find_compatible_node - Find a node based on type and one of the + * of_find_compatible_node - Find a node based on type and one of the * tokens in its "compatible" property - * @from: The node to start searching from or NULL, the node - * you pass will not be searched, only the next one - * will; typically, you pass what the previous call - * returned. of_node_put() will be called on it - * @type: The type string to match "device_type" or NULL to ignore - * @compatible: The string to match to one of the tokens in the device - * "compatible" list. + * @from: The node to start searching from or NULL, the node + * you pass will not be searched, only the next one + * will; typically, you pass what the previous call + * returned. of_node_put() will be called on it + * @type: The type string to match "device_type" or NULL to ignore + * @compatible: The string to match to one of the tokens in the device + * "compatible" list. * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. + * Return: A node pointer with refcount incremented, use + * of_node_put() on it when done. */ struct device_node *of_find_compatible_node(struct device_node *from, const char *type, const char *compatible) @@ -1106,16 +1106,16 @@ struct device_node *of_find_compatible_node(struct device_node *from, EXPORT_SYMBOL(of_find_compatible_node); /** - * of_find_node_with_property - Find a node which has a property with - * the given name. - * @from: The node to start searching from or NULL, the node - * you pass will not be searched, only the next one - * will; typically, you pass what the previous call - * returned. of_node_put() will be called on it - * @prop_name: The name of the property to look for. + * of_find_node_with_property - Find a node which has a property with + * the given name. + * @from: The node to start searching from or NULL, the node + * you pass will not be searched, only the next one + * will; typically, you pass what the previous call + * returned. of_node_put() will be called on it + * @prop_name: The name of the property to look for. * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. + * Return: A node pointer with refcount incremented, use + * of_node_put() on it when done. */ struct device_node *of_find_node_with_property(struct device_node *from, const char *prop_name) @@ -1164,10 +1164,10 @@ const struct of_device_id *__of_match_node(const struct of_device_id *matches, /** * of_match_node - Tell if a device_node has a matching of_match structure - * @matches: array of of device match structures to search in - * @node: the of device structure to match against + * @matches: array of of device match structures to search in + * @node: the of device structure to match against * - * Low level utility function used by device matching. + * Low level utility function used by device matching. */ const struct of_device_id *of_match_node(const struct of_device_id *matches, const struct device_node *node) @@ -1183,17 +1183,17 @@ const struct of_device_id *of_match_node(const struct of_device_id *matches, EXPORT_SYMBOL(of_match_node); /** - * of_find_matching_node_and_match - Find a node based on an of_device_id - * match table. - * @from: The node to start searching from or NULL, the node - * you pass will not be searched, only the next one - * will; typically, you pass what the previous call - * returned. of_node_put() will be called on it - * @matches: array of of device match structures to search in - * @match: Updated to point at the matches entry which matched + * of_find_matching_node_and_match - Find a node based on an of_device_id + * match table. + * @from: The node to start searching from or NULL, the node + * you pass will not be searched, only the next one + * will; typically, you pass what the previous call + * returned. of_node_put() will be called on it + * @matches: array of of device match structures to search in + * @match: Updated to point at the matches entry which matched * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. + * Return: A node pointer with refcount incremented, use + * of_node_put() on it when done. */ struct device_node *of_find_matching_node_and_match(struct device_node *from, const struct of_device_id *matches, @@ -1539,21 +1539,21 @@ EXPORT_SYMBOL(of_parse_phandle); * Caller is responsible to call of_node_put() on the returned out_args->np * pointer. * - * Example: + * Example:: * - * phandle1: node1 { + * phandle1: node1 { * #list-cells = <2>; - * } + * }; * - * phandle2: node2 { + * phandle2: node2 { * #list-cells = <1>; - * } + * }; * - * node3 { + * node3 { * list = <&phandle1 1 2 &phandle2 3>; - * } + * }; * - * To get a device_node of the `node2' node you may call this: + * To get a device_node of the ``node2`` node you may call this: * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args); */ int of_parse_phandle_with_args(const struct device_node *np, const char *list_name, @@ -1591,29 +1591,29 @@ EXPORT_SYMBOL(of_parse_phandle_with_args); * Caller is responsible to call of_node_put() on the returned out_args->np * pointer. * - * Example: + * Example:: * - * phandle1: node1 { - * #list-cells = <2>; - * } + * phandle1: node1 { + * #list-cells = <2>; + * }; * - * phandle2: node2 { - * #list-cells = <1>; - * } + * phandle2: node2 { + * #list-cells = <1>; + * }; * - * phandle3: node3 { - * #list-cells = <1>; - * list-map = <0 &phandle2 3>, - * <1 &phandle2 2>, - * <2 &phandle1 5 1>; - * list-map-mask = <0x3>; - * }; + * phandle3: node3 { + * #list-cells = <1>; + * list-map = <0 &phandle2 3>, + * <1 &phandle2 2>, + * <2 &phandle1 5 1>; + * list-map-mask = <0x3>; + * }; * - * node4 { - * list = <&phandle1 1 2 &phandle3 0>; - * } + * node4 { + * list = <&phandle1 1 2 &phandle3 0>; + * }; * - * To get a device_node of the `node2' node you may call this: + * To get a device_node of the ``node2`` node you may call this: * of_parse_phandle_with_args(node4, "list", "list", 1, &args); */ int of_parse_phandle_with_args_map(const struct device_node *np, @@ -1773,19 +1773,19 @@ EXPORT_SYMBOL(of_parse_phandle_with_args_map); * Caller is responsible to call of_node_put() on the returned out_args->np * pointer. * - * Example: + * Example:: * - * phandle1: node1 { - * } + * phandle1: node1 { + * }; * - * phandle2: node2 { - * } + * phandle2: node2 { + * }; * - * node3 { - * list = <&phandle1 0 2 &phandle2 2 3>; - * } + * node3 { + * list = <&phandle1 0 2 &phandle2 2 3>; + * }; * - * To get a device_node of the `node2' node you may call this: + * To get a device_node of the ``node2`` node you may call this: * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args); */ int of_parse_phandle_with_fixed_args(const struct device_node *np, @@ -2030,13 +2030,12 @@ static void of_alias_add(struct alias_prop *ap, struct device_node *np, /** * of_alias_scan - Scan all properties of the 'aliases' node + * @dt_alloc: An allocator that provides a virtual address to memory + * for storing the resulting tree * * The function scans all the properties of the 'aliases' node and populates * the global lookup table with the properties. It returns the * number of alias properties found, or an error code in case of failure. - * - * @dt_alloc: An allocator that provides a virtual address to memory - * for storing the resulting tree */ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) { @@ -2231,12 +2230,12 @@ bool of_console_check(struct device_node *dn, char *name, int index) EXPORT_SYMBOL_GPL(of_console_check); /** - * of_find_next_cache_node - Find a node's subsidiary cache - * @np: node of type "cpu" or "cache" + * of_find_next_cache_node - Find a node's subsidiary cache + * @np: node of type "cpu" or "cache" * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. Caller should hold a reference - * to np. + * Return: A node pointer with refcount incremented, use + * of_node_put() on it when done. Caller should hold a reference + * to np. */ struct device_node *of_find_next_cache_node(const struct device_node *np) { diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 6d519ef3c5da..09a98668e7db 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -349,11 +349,6 @@ static int unflatten_dt_nodes(const void *blob, /** * __unflatten_device_tree - create tree of device_nodes from flat blob - * - * unflattens a device-tree, creating the - * tree of struct device_node. It also fills the "name" and "type" - * pointers of the nodes so the normal device-tree walking functions - * can be used. * @blob: The blob to expand * @dad: Parent device node * @mynodes: The device_node tree created by the call @@ -361,6 +356,10 @@ static int unflatten_dt_nodes(const void *blob, * for the resulting tree * @detached: if true set OF_DETACHED on @mynodes * + * unflattens a device-tree, creating the tree of struct device_node. It also + * fills the "name" and "type" pointers of the nodes so the normal device-tree + * walking functions can be used. + * * Returns NULL on failure or the memory chunk containing the unflattened * device tree on success. */ From 5cadae629e4498b9bfd661bfee471172778bda05 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 25 Mar 2021 10:47:12 -0600 Subject: [PATCH 15/68] of: Add missing 'Return' section in kerneldoc comments [ Upstream commit 8c8239c2c1fb82f171cb22a707f3bb88a2f22109 ] Many of the DT kerneldoc comments are lacking a 'Return' section. Let's add the section in cases we have a description of return values. There's still some cases where the return values are not documented. Cc: Frank Rowand Cc: Mauro Carvalho Chehab Signed-off-by: Rob Herring Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210325164713.1296407-8-robh@kernel.org Stable-dep-of: d79972789d17 ("of: dynamic: Fix of_reconfig_get_state_change() return value documentation") Signed-off-by: Sasha Levin --- drivers/of/base.c | 39 +++++++++++++------------ drivers/of/dynamic.c | 19 ++++++++----- drivers/of/fdt.c | 8 +++--- drivers/of/irq.c | 14 ++++----- drivers/of/overlay.c | 16 +++++------ drivers/of/platform.c | 10 +++---- drivers/of/property.c | 66 +++++++++++++++++++++++++++---------------- include/linux/of.h | 63 ++++++++++++++++++++++++++--------------- 8 files changed, 140 insertions(+), 95 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index c4c64ef5fb7a..c8af9a65f98b 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -306,7 +306,7 @@ struct device_node *__of_find_all_nodes(struct device_node *prev) * @prev: Previous node or NULL to start iteration * of_node_put() will be called on it * - * Returns a node pointer with refcount incremented, use + * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_find_all_nodes(struct device_node *prev) @@ -436,7 +436,7 @@ bool __weak arch_find_n_match_cpu_physical_id(struct device_node *cpun, * before booting secondary cores. This function uses arch_match_cpu_phys_id * which can be overridden by architecture specific implementation. * - * Returns a node pointer for the logical cpu with refcount incremented, use + * Return: A node pointer for the logical cpu with refcount incremented, use * of_node_put() on it when done. Returns NULL if not found. */ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) @@ -456,8 +456,8 @@ EXPORT_SYMBOL(of_get_cpu_node); * * @cpu_node: Pointer to the device_node for CPU. * - * Returns the logical CPU number of the given CPU device_node. - * Returns -ENODEV if the CPU is not found. + * Return: The logical CPU number of the given CPU device_node or -ENODEV if the + * CPU is not found. */ int of_cpu_node_to_id(struct device_node *cpu_node) { @@ -489,7 +489,7 @@ EXPORT_SYMBOL(of_cpu_node_to_id); * bindings. This function check for both and returns the idle state node for * the requested index. * - * In case an idle state node is found at @index, the refcount is incremented + * Return: An idle state node if found at @index. The refcount is incremented * for it, so call of_node_put() on it when done. Returns NULL if not found. */ struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, @@ -623,7 +623,7 @@ int of_device_compatible_match(struct device_node *device, * of_machine_is_compatible - Test root of device tree for a given compatible value * @compat: compatible string to look for in root node's compatible property. * - * Returns a positive integer if the root node has the given value in its + * Return: A positive integer if the root node has the given value in its * compatible property. */ int of_machine_is_compatible(const char *compat) @@ -645,7 +645,7 @@ EXPORT_SYMBOL(of_machine_is_compatible); * * @device: Node to check for availability, with locks already held * - * Returns true if the status property is absent or set to "okay" or "ok", + * Return: True if the status property is absent or set to "okay" or "ok", * false otherwise */ static bool __of_device_is_available(const struct device_node *device) @@ -673,7 +673,7 @@ static bool __of_device_is_available(const struct device_node *device) * * @device: Node to check for availability * - * Returns true if the status property is absent or set to "okay" or "ok", + * Return: True if the status property is absent or set to "okay" or "ok", * false otherwise */ bool of_device_is_available(const struct device_node *device) @@ -694,7 +694,7 @@ EXPORT_SYMBOL(of_device_is_available); * * @device: Node to check for endianness * - * Returns true if the device has a "big-endian" property, or if the kernel + * Return: True if the device has a "big-endian" property, or if the kernel * was compiled for BE *and* the device has a "native-endian" property. * Returns false otherwise. * @@ -878,7 +878,7 @@ EXPORT_SYMBOL(of_get_next_cpu_node); * Lookup child node whose compatible property contains the given compatible * string. * - * Returns a node pointer with refcount incremented, use of_node_put() on it + * Return: a node pointer with refcount incremented, use of_node_put() on it * when done; or NULL if not found. */ struct device_node *of_get_compatible_child(const struct device_node *parent, @@ -1232,7 +1232,7 @@ EXPORT_SYMBOL(of_find_matching_node_and_match); * It does this by stripping the manufacturer prefix (as delimited by a ',') * from the first entry in the compatible list property. * - * This routine returns 0 on success, <0 on failure. + * Return: This routine returns 0 on success, <0 on failure. */ int of_modalias_node(struct device_node *node, char *modalias, int len) { @@ -1252,7 +1252,7 @@ EXPORT_SYMBOL_GPL(of_modalias_node); * of_find_node_by_phandle - Find a node given a phandle * @handle: phandle of the node to find * - * Returns a node pointer with refcount incremented, use + * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_find_node_by_phandle(phandle handle) @@ -1505,7 +1505,7 @@ static int __of_parse_phandle_with_args(const struct device_node *np, * @index: For properties holding a table of phandles, this is the index into * the table * - * Returns the device_node pointer with refcount incremented. Use + * Return: The device_node pointer with refcount incremented. Use * of_node_put() on it when done. */ struct device_node *of_parse_phandle(const struct device_node *np, @@ -1805,7 +1805,7 @@ EXPORT_SYMBOL(of_parse_phandle_with_fixed_args); * @list_name: property name that contains a list * @cells_name: property name that specifies phandles' arguments count * - * Returns the number of phandle + argument tuples within a property. It + * Return: The number of phandle + argument tuples within a property. It * is a typical pattern to encode a list of phandle and variable * arguments into a single property. The number of arguments is encoded * by a property in the phandle-target node. For example, a gpios @@ -2104,7 +2104,9 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) * @stem: Alias stem of the given device_node * * The function travels the lookup table to get the alias id for the given - * device_node and alias stem. It returns the alias id if found. + * device_node and alias stem. + * + * Return: The alias id if found. */ int of_alias_get_id(struct device_node *np, const char *stem) { @@ -2213,8 +2215,9 @@ EXPORT_SYMBOL_GPL(of_alias_get_highest_id); * @index: Index to use for preferred console. * * Check if the given device node matches the stdout-path property in the - * /chosen node. If it does then register it as the preferred console and return - * TRUE. Otherwise return FALSE. + * /chosen node. If it does then register it as the preferred console. + * + * Return: TRUE if console successfully setup. Otherwise return FALSE. */ bool of_console_check(struct device_node *dn, char *name, int index) { @@ -2265,7 +2268,7 @@ struct device_node *of_find_next_cache_node(const struct device_node *np) * * @cpu: cpu number(logical index) for which the last cache level is needed * - * Returns the the level at which the last cache is present. It is exactly + * Return: The the level at which the last cache is present. It is exactly * same as the total number of cache levels for the given logical cpu. */ int of_find_last_cache_level(unsigned int cpu) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 49b16f76d78e..92ee15be78d4 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -27,7 +27,7 @@ static struct device_node *kobj_to_device_node(struct kobject *kobj) * @node: Node to inc refcount, NULL is supported to simplify writing of * callers * - * Returns node. + * Return: The node with refcount incremented. */ struct device_node *of_node_get(struct device_node *node) { @@ -104,7 +104,8 @@ int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p) * @arg - argument of the of notifier * * Returns the new state of a device based on the notifier used. - * Returns 0 on device going from enabled to disabled, 1 on device + * + * Return: 0 on device going from enabled to disabled, 1 on device * going from disabled to enabled and -1 on no change. */ int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *pr) @@ -372,7 +373,8 @@ void of_node_release(struct kobject *kobj) * property structure and the property name & contents. The property's * flags have the OF_DYNAMIC bit set so that we can differentiate between * dynamically allocated properties and not. - * Returns the newly allocated property or NULL on out of memory error. + * + * Return: The newly allocated property or NULL on out of memory error. */ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags) { @@ -415,7 +417,7 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags) * another node. The node data are dynamically allocated and all the node * flags have the OF_DYNAMIC & OF_DETACHED bits set. * - * Returns the newly allocated node or NULL on out of memory error. + * Return: The newly allocated node or NULL on out of memory error. */ struct device_node *__of_node_dup(const struct device_node *np, const char *full_name) @@ -781,7 +783,8 @@ static int __of_changeset_apply(struct of_changeset *ocs) * Any side-effects of live tree state changes are applied here on * success, like creation/destruction of devices and side-effects * like creation of sysfs properties and directories. - * Returns 0 on success, a negative error value in case of an error. + * + * Return: 0 on success, a negative error value in case of an error. * On error the partially applied effects are reverted. */ int of_changeset_apply(struct of_changeset *ocs) @@ -875,7 +878,8 @@ static int __of_changeset_revert(struct of_changeset *ocs) * was before the application. * Any side-effects like creation/destruction of devices and * removal of sysfs properties and directories are applied. - * Returns 0 on success, a negative error value in case of an error. + * + * Return: 0 on success, a negative error value in case of an error. */ int of_changeset_revert(struct of_changeset *ocs) { @@ -903,7 +907,8 @@ EXPORT_SYMBOL_GPL(of_changeset_revert); * + OF_RECONFIG_ADD_PROPERTY * + OF_RECONFIG_REMOVE_PROPERTY, * + OF_RECONFIG_UPDATE_PROPERTY - * Returns 0 on success, a negative error value in case of an error. + * + * Return: 0 on success, a negative error value in case of an error. */ int of_changeset_action(struct of_changeset *ocs, unsigned long action, struct device_node *np, struct property *prop) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 09a98668e7db..1db951f43353 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -282,7 +282,7 @@ static void reverse_nodes(struct device_node *parent) * @dad: Parent struct device_node * @nodepp: The device_node tree created by the call * - * It returns the size of unflattened device tree or error code + * Return: The size of unflattened device tree or error code */ static int unflatten_dt_nodes(const void *blob, void *mem, @@ -360,7 +360,7 @@ static int unflatten_dt_nodes(const void *blob, * fills the "name" and "type" pointers of the nodes so the normal device-tree * walking functions can be used. * - * Returns NULL on failure or the memory chunk containing the unflattened + * Return: NULL on failure or the memory chunk containing the unflattened * device tree on success. */ void *__unflatten_device_tree(const void *blob, @@ -441,7 +441,7 @@ static DEFINE_MUTEX(of_fdt_unflatten_mutex); * pointers of the nodes so the normal device-tree walking functions * can be used. * - * Returns NULL on failure or the memory chunk containing the unflattened + * Return: NULL on failure or the memory chunk containing the unflattened * device tree on success. */ void *of_fdt_unflatten_tree(const unsigned long *blob, @@ -719,7 +719,7 @@ const void *__init of_get_flat_dt_prop(unsigned long node, const char *name, * @node: node to test * @compat: compatible string to compare with compatible list. * - * On match, returns a non-zero value with smaller values returned for more + * Return: a non-zero value on match with smaller values returned for more * specific compatible values. */ static int of_fdt_is_compatible(const void *blob, diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 25d17b8a1a1a..352e14b007e7 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -48,7 +48,7 @@ EXPORT_SYMBOL_GPL(irq_of_parse_and_map); * of_irq_find_parent - Given a device node, find its interrupt parent node * @child: pointer to device node * - * Returns a pointer to the interrupt parent node, or NULL if the interrupt + * Return: A pointer to the interrupt parent node, or NULL if the interrupt * parent could not be determined. */ struct device_node *of_irq_find_parent(struct device_node *child) @@ -81,14 +81,14 @@ EXPORT_SYMBOL_GPL(of_irq_find_parent); * @addr: address specifier (start of "reg" property of the device) in be32 format * @out_irq: structure of_phandle_args updated by this function * - * Returns 0 on success and a negative number on error - * * This function is a low-level interrupt tree walking function. It * can be used to do a partial walk with synthetized reg and interrupts * properties, for example when resolving PCI interrupts when no device * node exist for the parent. It takes an interrupt specifier structure as * input, walks the tree looking for any interrupt-map properties, translates * the specifier for each map, and then returns the translated map. + * + * Return: 0 on success and a negative number on error */ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) { @@ -380,7 +380,7 @@ EXPORT_SYMBOL_GPL(of_irq_to_resource); * @dev: pointer to device tree node * @index: zero-based index of the IRQ * - * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or + * Return: Linux IRQ number on success, or 0 on the IRQ mapping failure, or * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case * of any other failure. */ @@ -407,7 +407,7 @@ EXPORT_SYMBOL_GPL(of_irq_get); * @dev: pointer to device tree node * @name: IRQ name * - * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or + * Return: Linux IRQ number on success, or 0 on the IRQ mapping failure, or * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case * of any other failure. */ @@ -447,7 +447,7 @@ int of_irq_count(struct device_node *dev) * @res: array of resources to fill in * @nr_irqs: the number of IRQs (and upper bound for num of @res elements) * - * Returns the size of the filled in table (up to @nr_irqs). + * Return: The size of the filled in table (up to @nr_irqs). */ int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs) @@ -602,7 +602,7 @@ static u32 __of_msi_map_id(struct device *dev, struct device_node **np, * Walk up the device hierarchy looking for devices with a "msi-map" * property. If found, apply the mapping to @id_in. * - * Returns the mapped MSI ID. + * Return: The mapped MSI ID. */ u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in) { diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index dc298775f762..b551fe44f2f9 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -296,7 +296,7 @@ static struct property *dup_and_fixup_symbol_prop( * * Update of property in symbols node is not allowed. * - * Returns 0 on success, -ENOMEM if memory allocation failure, or -EINVAL if + * Return: 0 on success, -ENOMEM if memory allocation failure, or -EINVAL if * invalid @overlay. */ static int add_changeset_property(struct overlay_changeset *ovcs, @@ -401,7 +401,7 @@ static int add_changeset_property(struct overlay_changeset *ovcs, * * NOTE_2: Multiple mods of created nodes not supported. * - * Returns 0 on success, -ENOMEM if memory allocation failure, or -EINVAL if + * Return: 0 on success, -ENOMEM if memory allocation failure, or -EINVAL if * invalid @overlay. */ static int add_changeset_node(struct overlay_changeset *ovcs, @@ -473,7 +473,7 @@ static int add_changeset_node(struct overlay_changeset *ovcs, * * Do not allow symbols node to have any children. * - * Returns 0 on success, -ENOMEM if memory allocation failure, or -EINVAL if + * Return: 0 on success, -ENOMEM if memory allocation failure, or -EINVAL if * invalid @overlay_node. */ static int build_changeset_next_level(struct overlay_changeset *ovcs, @@ -604,7 +604,7 @@ static int find_dup_cset_prop(struct overlay_changeset *ovcs, * the same node or duplicate {add, delete, or update} properties entries * for the same property. * - * Returns 0 on success, or -EINVAL if duplicate changeset entry found. + * Return: 0 on success, or -EINVAL if duplicate changeset entry found. */ static int changeset_dup_entry_check(struct overlay_changeset *ovcs) { @@ -628,7 +628,7 @@ static int changeset_dup_entry_check(struct overlay_changeset *ovcs) * any portions of the changeset that were successfully created will remain * in @ovcs->cset. * - * Returns 0 on success, -ENOMEM if memory allocation failure, or -EINVAL if + * Return: 0 on success, -ENOMEM if memory allocation failure, or -EINVAL if * invalid overlay in @ovcs->fragments[]. */ static int build_changeset(struct overlay_changeset *ovcs) @@ -724,7 +724,7 @@ static struct device_node *find_target(struct device_node *info_node) * the top level of @tree. The relevant top level nodes are the fragment * nodes and the __symbols__ node. Any other top level node will be ignored. * - * Returns 0 on success, -ENOMEM if memory allocation failure, -EINVAL if error + * Return: 0 on success, -ENOMEM if memory allocation failure, -EINVAL if error * detected in @tree, or -ENOSPC if idr_alloc() error. */ static int init_overlay_changeset(struct overlay_changeset *ovcs, @@ -1180,7 +1180,7 @@ static int overlay_removal_is_ok(struct overlay_changeset *remove_ovcs) * If an error is returned by an overlay changeset post-remove notifier * then no further overlay changeset post-remove notifier will be called. * - * Returns 0 on success, or a negative error number. *ovcs_id is set to + * Return: 0 on success, or a negative error number. *ovcs_id is set to * zero after reverting the changeset, even if a subsequent error occurs. */ int of_overlay_remove(int *ovcs_id) @@ -1267,7 +1267,7 @@ EXPORT_SYMBOL_GPL(of_overlay_remove); * * Removes all overlays from the system in the correct order. * - * Returns 0 on success, or a negative error number + * Return: 0 on success, or a negative error number */ int of_overlay_remove_all(void) { diff --git a/drivers/of/platform.c b/drivers/of/platform.c index b47a2292fe8e..cf5dbc9536f2 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -44,7 +44,7 @@ static const struct of_device_id of_skipped_node_table[] = { * Takes a reference to the embedded struct device which needs to be dropped * after use. * - * Returns platform_device pointer, or NULL if not found + * Return: platform_device pointer, or NULL if not found */ struct platform_device *of_find_device_by_node(struct device_node *np) { @@ -160,7 +160,7 @@ EXPORT_SYMBOL(of_device_alloc); * @platform_data: pointer to populate platform_data pointer with * @parent: Linux device model parent device. * - * Returns pointer to created platform device, or NULL if a device was not + * Return: Pointer to created platform device, or NULL if a device was not * registered. Unavailable devices will not get registered. */ static struct platform_device *of_platform_device_create_pdata( @@ -204,7 +204,7 @@ static struct platform_device *of_platform_device_create_pdata( * @bus_id: name to assign device * @parent: Linux device model parent device. * - * Returns pointer to created platform device, or NULL if a device was not + * Return: Pointer to created platform device, or NULL if a device was not * registered. Unavailable devices will not get registered. */ struct platform_device *of_platform_device_create(struct device_node *np, @@ -463,7 +463,7 @@ EXPORT_SYMBOL(of_platform_bus_probe); * New board support should be using this function instead of * of_platform_bus_probe(). * - * Returns 0 on success, < 0 on failure. + * Return: 0 on success, < 0 on failure. */ int of_platform_populate(struct device_node *root, const struct of_device_id *matches, @@ -594,7 +594,7 @@ static void devm_of_platform_populate_release(struct device *dev, void *res) * Similar to of_platform_populate(), but will automatically call * of_platform_depopulate() when the device is unbound from the bus. * - * Returns 0 on success, < 0 on failure. + * Return: 0 on success, < 0 on failure. */ int devm_of_platform_populate(struct device *dev) { diff --git a/drivers/of/property.c b/drivers/of/property.c index f6010ec0f67b..28ea326d102f 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -36,9 +36,11 @@ * @elem_size: size of the individual element * * Search for a property in a device node and count the number of elements of - * size elem_size in it. Returns number of elements on sucess, -EINVAL if the - * property does not exist or its length does not match a multiple of elem_size - * and -ENODATA if the property does not have a value. + * size elem_size in it. + * + * Return: The number of elements on sucess, -EINVAL if the property does not + * exist or its length does not match a multiple of elem_size and -ENODATA if + * the property does not have a value. */ int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size) @@ -70,8 +72,9 @@ EXPORT_SYMBOL_GPL(of_property_count_elems_of_size); * @len: if !=NULL, actual length is written to here * * Search for a property in a device node and valid the requested size. - * Returns the property value on success, -EINVAL if the property does not - * exist, -ENODATA if property does not have a value, and -EOVERFLOW if the + * + * Return: The property value on success, -EINVAL if the property does not + * exist, -ENODATA if property does not have a value, and -EOVERFLOW if the * property data is too small or too large. * */ @@ -104,7 +107,9 @@ static void *of_find_property_value_of_size(const struct device_node *np, * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 32-bit value from - * it. Returns 0 on success, -EINVAL if the property does not exist, + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * @@ -136,7 +141,9 @@ EXPORT_SYMBOL_GPL(of_property_read_u32_index); * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 64-bit value from - * it. Returns 0 on success, -EINVAL if the property does not exist, + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * @@ -171,12 +178,14 @@ EXPORT_SYMBOL_GPL(of_property_read_u64_index); * sz_min will be read. * * Search for a property in a device node and read 8-bit value(s) from - * it. Returns number of elements read on success, -EINVAL if the property - * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW - * if the property data is smaller than sz_min or longer than sz_max. + * it. * * dts entry of array should be like: - * property = /bits/ 8 <0x50 0x60 0x70>; + * ``property = /bits/ 8 <0x50 0x60 0x70>;`` + * + * Return: The number of elements read on success, -EINVAL if the property + * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW + * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u8 value can be decoded. */ @@ -219,12 +228,14 @@ EXPORT_SYMBOL_GPL(of_property_read_variable_u8_array); * sz_min will be read. * * Search for a property in a device node and read 16-bit value(s) from - * it. Returns number of elements read on success, -EINVAL if the property - * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW - * if the property data is smaller than sz_min or longer than sz_max. + * it. * * dts entry of array should be like: - * property = /bits/ 16 <0x5000 0x6000 0x7000>; + * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` + * + * Return: The number of elements read on success, -EINVAL if the property + * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW + * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u16 value can be decoded. */ @@ -267,7 +278,9 @@ EXPORT_SYMBOL_GPL(of_property_read_variable_u16_array); * sz_min will be read. * * Search for a property in a device node and read 32-bit value(s) from - * it. Returns number of elements read on success, -EINVAL if the property + * it. + * + * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * @@ -306,7 +319,9 @@ EXPORT_SYMBOL_GPL(of_property_read_variable_u32_array); * @out_value: pointer to return value, modified only if return value is 0. * * Search for a property in a device node and read a 64-bit value from - * it. Returns 0 on success, -EINVAL if the property does not exist, + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * @@ -341,7 +356,9 @@ EXPORT_SYMBOL_GPL(of_property_read_u64); * sz_min will be read. * * Search for a property in a device node and read 64-bit value(s) from - * it. Returns number of elements read on success, -EINVAL if the property + * it. + * + * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * @@ -383,10 +400,11 @@ EXPORT_SYMBOL_GPL(of_property_read_variable_u64_array); * return value is 0. * * Search for a property in a device tree node and retrieve a null - * terminated string value (pointer to data, not a copy). Returns 0 on - * success, -EINVAL if the property does not exist, -ENODATA if property - * does not have a value, and -EILSEQ if the string is not null-terminated - * within the length of the property data. + * terminated string value (pointer to data, not a copy). + * + * Return: 0 on success, -EINVAL if the property does not exist, -ENODATA if + * property does not have a value, and -EILSEQ if the string is not + * null-terminated within the length of the property data. * * The out_string pointer is modified only if a valid string can be decoded. */ @@ -750,7 +768,7 @@ EXPORT_SYMBOL(of_graph_get_remote_port_parent); * @node: pointer to a local endpoint device_node * * Return: Remote port node associated with remote endpoint node linked - * to @node. Use of_node_put() on it when done. + * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_port(const struct device_node *node) { @@ -783,7 +801,7 @@ EXPORT_SYMBOL(of_graph_get_endpoint_count); * @endpoint: identifier (value of reg property) of the endpoint node * * Return: Remote device node associated with remote endpoint node linked - * to @node. Use of_node_put() on it when done. + * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_node(const struct device_node *node, u32 port, u32 endpoint) diff --git a/include/linux/of.h b/include/linux/of.h index e070c5ed62a0..8681277af9c6 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -424,12 +424,14 @@ extern int of_detach_node(struct device_node *); * @sz: number of array elements to read * * Search for a property in a device node and read 8-bit value(s) from - * it. Returns 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. + * it. * * dts entry of array should be like: - * property = /bits/ 8 <0x50 0x60 0x70>; + * ``property = /bits/ 8 <0x50 0x60 0x70>;`` + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. * * The out_values is modified only if a valid u8 value can be decoded. */ @@ -454,12 +456,14 @@ static inline int of_property_read_u8_array(const struct device_node *np, * @sz: number of array elements to read * * Search for a property in a device node and read 16-bit value(s) from - * it. Returns 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. + * it. * * dts entry of array should be like: - * property = /bits/ 16 <0x5000 0x6000 0x7000>; + * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. * * The out_values is modified only if a valid u16 value can be decoded. */ @@ -485,7 +489,9 @@ static inline int of_property_read_u16_array(const struct device_node *np, * @sz: number of array elements to read * * Search for a property in a device node and read 32-bit value(s) from - * it. Returns 0 on success, -EINVAL if the property does not exist, + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * @@ -513,7 +519,9 @@ static inline int of_property_read_u32_array(const struct device_node *np, * @sz: number of array elements to read * * Search for a property in a device node and read 64-bit value(s) from - * it. Returns 0 on success, -EINVAL if the property does not exist, + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * @@ -1046,7 +1054,9 @@ static inline bool of_node_is_type(const struct device_node *np, const char *typ * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u8 elements - * in it. Returns number of elements on sucess, -EINVAL if the property does + * in it. + * + * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u8 and -ENODATA if the * property does not have a value. */ @@ -1063,7 +1073,9 @@ static inline int of_property_count_u8_elems(const struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u16 elements - * in it. Returns number of elements on sucess, -EINVAL if the property does + * in it. + * + * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u16 and -ENODATA if the * property does not have a value. */ @@ -1080,7 +1092,9 @@ static inline int of_property_count_u16_elems(const struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u32 elements - * in it. Returns number of elements on sucess, -EINVAL if the property does + * in it. + * + * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u32 and -ENODATA if the * property does not have a value. */ @@ -1097,7 +1111,9 @@ static inline int of_property_count_u32_elems(const struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u64 elements - * in it. Returns number of elements on sucess, -EINVAL if the property does + * in it. + * + * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u64 and -ENODATA if the * property does not have a value. */ @@ -1118,7 +1134,7 @@ static inline int of_property_count_u64_elems(const struct device_node *np, * Search for a property in a device tree node and retrieve a list of * terminated string values (pointer to data, not a copy) in that property. * - * If @out_strs is NULL, the number of strings in the property is returned. + * Return: If @out_strs is NULL, the number of strings in the property is returned. */ static inline int of_property_read_string_array(const struct device_node *np, const char *propname, const char **out_strs, @@ -1134,10 +1150,11 @@ static inline int of_property_read_string_array(const struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device tree node and retrieve the number of null - * terminated string contain in it. Returns the number of strings on - * success, -EINVAL if the property does not exist, -ENODATA if property - * does not have a value, and -EILSEQ if the string is not null-terminated - * within the length of the property data. + * terminated string contain in it. + * + * Return: The number of strings on success, -EINVAL if the property does not + * exist, -ENODATA if property does not have a value, and -EILSEQ if the string + * is not null-terminated within the length of the property data. */ static inline int of_property_count_strings(const struct device_node *np, const char *propname) @@ -1157,7 +1174,8 @@ static inline int of_property_count_strings(const struct device_node *np, * Search for a property in a device tree node and retrieve a null * terminated string value (pointer to data, not a copy) in the list of strings * contained in that property. - * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if + * + * Return: 0 on success, -EINVAL if the property does not exist, -ENODATA if * property does not have a value, and -EILSEQ if the string is not * null-terminated within the length of the property data. * @@ -1177,7 +1195,8 @@ static inline int of_property_read_string_index(const struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device node. - * Returns true if the property exists false otherwise. + * + * Return: true if the property exists false otherwise. */ static inline bool of_property_read_bool(const struct device_node *np, const char *propname) @@ -1423,7 +1442,7 @@ static inline int of_reconfig_get_state_change(unsigned long action, * of_device_is_system_power_controller - Tells if system-power-controller is found for device_node * @np: Pointer to the given device_node * - * return true if present false otherwise + * Return: true if present false otherwise */ static inline bool of_device_is_system_power_controller(const struct device_node *np) { From 5cd05bbaaef425286cfafd69e01e8d33527bc68f Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 23 Nov 2023 15:47:18 +0100 Subject: [PATCH 16/68] of: dynamic: Fix of_reconfig_get_state_change() return value documentation [ Upstream commit d79972789d17499b6091ded2fc0c6763c501a5ba ] The documented numeric return values do not match the actual returned values. Fix them by using the enum names instead of raw numbers. Fixes: b53a2340d0d3 ("of/reconfig: Add of_reconfig_get_state_change() of notifier helper.") Signed-off-by: Luca Ceresoli Link: https://lore.kernel.org/r/20231123-fix-of_reconfig_get_state_change-docs-v1-1-f51892050ff9@bootlin.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/dynamic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 92ee15be78d4..ae969630958c 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -105,8 +105,9 @@ int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p) * * Returns the new state of a device based on the notifier used. * - * Return: 0 on device going from enabled to disabled, 1 on device - * going from disabled to enabled and -1 on no change. + * Return: OF_RECONFIG_CHANGE_REMOVE on device going from enabled to + * disabled, OF_RECONFIG_CHANGE_ADD on device going from disabled to + * enabled and OF_RECONFIG_NO_CHANGE on no change. */ int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *pr) { From be1ab8bf0510e3f898d1696cd50a2506d4ed9ff8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Nov 2023 16:06:30 +0000 Subject: [PATCH 17/68] ipv6: fix potential NULL deref in fib6_add() [ Upstream commit 75475bb51e78a3f54ad2f69380f2a1c985e85f2d ] If fib6_find_prefix() returns NULL, we should silently fallback using fib6_null_entry regardless of RT6_DEBUG value. syzbot reported: WARNING: CPU: 0 PID: 5477 at net/ipv6/ip6_fib.c:1516 fib6_add+0x310d/0x3fa0 net/ipv6/ip6_fib.c:1516 Modules linked in: CPU: 0 PID: 5477 Comm: syz-executor.0 Not tainted 6.7.0-rc2-syzkaller-00029-g9b6de136b5f0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:fib6_add+0x310d/0x3fa0 net/ipv6/ip6_fib.c:1516 Code: 00 48 8b 54 24 68 e8 42 22 00 00 48 85 c0 74 14 49 89 c6 e8 d5 d3 c2 f7 eb 5d e8 ce d3 c2 f7 e9 ca 00 00 00 e8 c4 d3 c2 f7 90 <0f> 0b 90 48 b8 00 00 00 00 00 fc ff df 48 8b 4c 24 38 80 3c 01 00 RSP: 0018:ffffc90005067740 EFLAGS: 00010293 RAX: ffffffff89cba5bc RBX: ffffc90005067ab0 RCX: ffff88801a2e9dc0 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: ffffc90005067980 R08: ffffffff89cbca85 R09: 1ffff110040d4b85 R10: dffffc0000000000 R11: ffffed10040d4b86 R12: 00000000ffffffff R13: 1ffff110051c3904 R14: ffff8880206a5c00 R15: ffff888028e1c820 FS: 00007f763783c6c0(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f763783bff8 CR3: 000000007f74d000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __ip6_ins_rt net/ipv6/route.c:1303 [inline] ip6_route_add+0x88/0x120 net/ipv6/route.c:3847 ipv6_route_ioctl+0x525/0x7b0 net/ipv6/route.c:4467 inet6_ioctl+0x21a/0x270 net/ipv6/af_inet6.c:575 sock_do_ioctl+0x152/0x460 net/socket.c:1220 sock_ioctl+0x615/0x8c0 net/socket.c:1339 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0xf8/0x170 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x45/0x110 arch/x86/entry/common.c:82 Fixes: 7bbfe00e0252 ("ipv6: fix general protection fault in fib6_add()") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Wei Wang Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20231129160630.3509216-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/ip6_fib.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ef55489651f8..d74a825c50f0 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1433,13 +1433,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { pn_leaf = fib6_find_prefix(info->nl_net, table, pn); -#if RT6_DEBUG >= 2 - if (!pn_leaf) { - WARN_ON(!pn_leaf); + if (!pn_leaf) pn_leaf = info->nl_net->ipv6.fib6_null_entry; - } -#endif fib6_info_hold(pn_leaf); rcu_assign_pointer(pn->leaf, pn_leaf); } From 9f5a25aa1bccd175a51e4b2482c643b4e2171825 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 29 Nov 2023 21:58:53 -0800 Subject: [PATCH 18/68] hv_netvsc: rndis_filter needs to select NLS [ Upstream commit 6c89f49964375c904cea33c0247467873f4daf2c ] rndis_filter uses utf8s_to_utf16s() which is provided by setting NLS, so select NLS to fix the build error: ERROR: modpost: "utf8s_to_utf16s" [drivers/net/hyperv/hv_netvsc.ko] undefined! Fixes: 1ce09e899d28 ("hyperv: Add support for setting MAC from within guests") Signed-off-by: Randy Dunlap Cc: Haiyang Zhang Cc: K. Y. Srinivasan Cc: Wei Liu Cc: Dexuan Cui Reviewed-by: Simon Horman Tested-by: Simon Horman # build-tested Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20231130055853.19069-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/hyperv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig index ca7bf7f897d3..c8cbd85adcf9 100644 --- a/drivers/net/hyperv/Kconfig +++ b/drivers/net/hyperv/Kconfig @@ -3,5 +3,6 @@ config HYPERV_NET tristate "Microsoft Hyper-V virtual network driver" depends on HYPERV select UCS2_STRING + select NLS help Select this option to enable the Hyper-V virtual network driver. From d124c18267b1cf3da2b6f2e74a473fc6cd752fae Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 28 Jan 2021 20:48:02 +0100 Subject: [PATCH 19/68] net: arcnet: Fix RESET flag handling [ Upstream commit 01365633bd1c836240f9bbf86bbeee749795480a ] The main arcnet interrupt handler calls arcnet_close() then arcnet_open(), if the RESET status flag is encountered. This is invalid: 1) In general, interrupt handlers should never call ->ndo_stop() and ->ndo_open() functions. They are usually full of blocking calls and other methods that are expected to be called only from drivers init and exit code paths. 2) arcnet_close() contains a del_timer_sync(). If the irq handler interrupts the to-be-deleted timer, del_timer_sync() will just loop forever. 3) arcnet_close() also calls tasklet_kill(), which has a warning if called from irq context. 4) For device reset, the sequence "arcnet_close(); arcnet_open();" is not complete. Some children arcnet drivers have special init/exit code sequences, which then embed a call to arcnet_open() and arcnet_close() accordingly. Check drivers/net/arcnet/com20020.c. Run the device RESET sequence from a scheduled workqueue instead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20210128194802.727770-1-a.darwish@linutronix.de Signed-off-by: Jakub Kicinski Stable-dep-of: 6b17a597fc2f ("arcnet: restoring support for multiple Sohard Arcnet cards") Signed-off-by: Sasha Levin --- drivers/net/arcnet/arc-rimi.c | 4 +- drivers/net/arcnet/arcdevice.h | 6 +++ drivers/net/arcnet/arcnet.c | 66 +++++++++++++++++++++++++++++-- drivers/net/arcnet/com20020-isa.c | 4 +- drivers/net/arcnet/com20020-pci.c | 2 +- drivers/net/arcnet/com20020_cs.c | 2 +- drivers/net/arcnet/com90io.c | 4 +- drivers/net/arcnet/com90xx.c | 4 +- 8 files changed, 78 insertions(+), 14 deletions(-) diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c index 14a5fb378145..313b636edf23 100644 --- a/drivers/net/arcnet/arc-rimi.c +++ b/drivers/net/arcnet/arc-rimi.c @@ -332,7 +332,7 @@ static int __init arc_rimi_init(void) dev->irq = 9; if (arcrimi_probe(dev)) { - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -349,7 +349,7 @@ static void __exit arc_rimi_exit(void) iounmap(lp->mem_start); release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); free_irq(dev->irq, dev); - free_netdev(dev); + free_arcdev(dev); } #ifndef MODULE diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index b0f5bc07aef5..7178f5349fa8 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -298,6 +298,10 @@ struct arcnet_local { int excnak_pending; /* We just got an excesive nak interrupt */ + /* RESET flag handling */ + int reset_in_progress; + struct work_struct reset_work; + struct { uint16_t sequence; /* sequence number (incs with each packet) */ __be16 aborted_seq; @@ -350,7 +354,9 @@ void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc) void arcnet_unregister_proto(struct ArcProto *proto); irqreturn_t arcnet_interrupt(int irq, void *dev_id); + struct net_device *alloc_arcdev(const char *name); +void free_arcdev(struct net_device *dev); int arcnet_open(struct net_device *dev); int arcnet_close(struct net_device *dev); diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 2b112d3d8540..cf652c76af85 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -387,10 +387,44 @@ static void arcnet_timer(struct timer_list *t) struct arcnet_local *lp = from_timer(lp, t, timer); struct net_device *dev = lp->dev; - if (!netif_carrier_ok(dev)) { + spin_lock_irq(&lp->lock); + + if (!lp->reset_in_progress && !netif_carrier_ok(dev)) { netif_carrier_on(dev); netdev_info(dev, "link up\n"); } + + spin_unlock_irq(&lp->lock); +} + +static void reset_device_work(struct work_struct *work) +{ + struct arcnet_local *lp; + struct net_device *dev; + + lp = container_of(work, struct arcnet_local, reset_work); + dev = lp->dev; + + /* Do not bring the network interface back up if an ifdown + * was already done. + */ + if (!netif_running(dev) || !lp->reset_in_progress) + return; + + rtnl_lock(); + + /* Do another check, in case of an ifdown that was triggered in + * the small race window between the exit condition above and + * acquiring RTNL. + */ + if (!netif_running(dev) || !lp->reset_in_progress) + goto out; + + dev_close(dev); + dev_open(dev, NULL); + +out: + rtnl_unlock(); } static void arcnet_reply_tasklet(unsigned long data) @@ -452,12 +486,25 @@ struct net_device *alloc_arcdev(const char *name) lp->dev = dev; spin_lock_init(&lp->lock); timer_setup(&lp->timer, arcnet_timer, 0); + INIT_WORK(&lp->reset_work, reset_device_work); } return dev; } EXPORT_SYMBOL(alloc_arcdev); +void free_arcdev(struct net_device *dev) +{ + struct arcnet_local *lp = netdev_priv(dev); + + /* Do not cancel this at ->ndo_close(), as the workqueue itself + * indirectly calls the ifdown path through dev_close(). + */ + cancel_work_sync(&lp->reset_work); + free_netdev(dev); +} +EXPORT_SYMBOL(free_arcdev); + /* Open/initialize the board. This is called sometime after booting when * the 'ifconfig' program is run. * @@ -587,6 +634,10 @@ int arcnet_close(struct net_device *dev) /* shut down the card */ lp->hw.close(dev); + + /* reset counters */ + lp->reset_in_progress = 0; + module_put(lp->hw.owner); return 0; } @@ -820,6 +871,9 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) spin_lock_irqsave(&lp->lock, flags); + if (lp->reset_in_progress) + goto out; + /* RESET flag was enabled - if device is not running, we must * clear it right away (but nothing else). */ @@ -852,11 +906,14 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) if (status & RESETflag) { arc_printk(D_NORMAL, dev, "spurious reset (status=%Xh)\n", status); - arcnet_close(dev); - arcnet_open(dev); + + lp->reset_in_progress = 1; + netif_stop_queue(dev); + netif_carrier_off(dev); + schedule_work(&lp->reset_work); /* get out of the interrupt handler! */ - break; + goto out; } /* RX is inhibited - we must have received something. * Prepare to receive into the next buffer. @@ -1052,6 +1109,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) udelay(1); lp->hw.intmask(dev, lp->intmask); +out: spin_unlock_irqrestore(&lp->lock, flags); return retval; } diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c index cd27fdc1059b..141b05451252 100644 --- a/drivers/net/arcnet/com20020-isa.c +++ b/drivers/net/arcnet/com20020-isa.c @@ -169,7 +169,7 @@ static int __init com20020_init(void) dev->irq = 9; if (com20020isa_probe(dev)) { - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -182,7 +182,7 @@ static void __exit com20020_exit(void) unregister_netdev(my_dev); free_irq(my_dev->irq, my_dev); release_region(my_dev->base_addr, ARCNET_TOTAL_SIZE); - free_netdev(my_dev); + free_arcdev(my_dev); } #ifndef MODULE diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 9f44e2e458df..b4f8798d8c50 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -294,7 +294,7 @@ static void com20020pci_remove(struct pci_dev *pdev) unregister_netdev(dev); free_irq(dev->irq, dev); - free_netdev(dev); + free_arcdev(dev); } } diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index cf607ffcf358..9cc5eb6a8e90 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -177,7 +177,7 @@ static void com20020_detach(struct pcmcia_device *link) dev = info->dev; if (dev) { dev_dbg(&link->dev, "kfree...\n"); - free_netdev(dev); + free_arcdev(dev); } dev_dbg(&link->dev, "kfree2...\n"); kfree(info); diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c index 186bbf87bc84..5843b8976fd1 100644 --- a/drivers/net/arcnet/com90io.c +++ b/drivers/net/arcnet/com90io.c @@ -396,7 +396,7 @@ static int __init com90io_init(void) err = com90io_probe(dev); if (err) { - free_netdev(dev); + free_arcdev(dev); return err; } @@ -419,7 +419,7 @@ static void __exit com90io_exit(void) free_irq(dev->irq, dev); release_region(dev->base_addr, ARCNET_TOTAL_SIZE); - free_netdev(dev); + free_arcdev(dev); } module_init(com90io_init) diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c index bd75d06ad7df..5e40ecf189b1 100644 --- a/drivers/net/arcnet/com90xx.c +++ b/drivers/net/arcnet/com90xx.c @@ -554,7 +554,7 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem, err_release_mem: release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); err_free_dev: - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -672,7 +672,7 @@ static void __exit com90xx_exit(void) release_region(dev->base_addr, ARCNET_TOTAL_SIZE); release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); - free_netdev(dev); + free_arcdev(dev); } } From f265467319337d94caf6fede0042da50474d3e40 Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 14 Mar 2021 14:08:36 -0400 Subject: [PATCH 20/68] net: arcnet: com20020 fix error handling [ Upstream commit 6577b9a551aedb86bca6d4438c28386361845108 ] There are two issues when handling error case in com20020pci_probe() 1. priv might be not initialized yet when calling com20020pci_remove() from com20020pci_probe(), since the priv is set at the very last but it can jump to error handling in the middle and priv remains NULL. 2. memory leak - the net device is allocated in alloc_arcdev but not properly released if error happens in the middle of the big for loop [ 1.529110] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 1.531447] RIP: 0010:com20020pci_remove+0x15/0x60 [com20020_pci] [ 1.536805] Call Trace: [ 1.536939] com20020pci_probe+0x3f2/0x48c [com20020_pci] [ 1.537226] local_pci_probe+0x48/0x80 [ 1.539918] com20020pci_init+0x3f/0x1000 [com20020_pci] Signed-off-by: Tong Zhang Signed-off-by: David S. Miller Stable-dep-of: 6b17a597fc2f ("arcnet: restoring support for multiple Sohard Arcnet cards") Signed-off-by: Sasha Levin --- drivers/net/arcnet/com20020-pci.c | 34 +++++++++++++++++-------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index b4f8798d8c50..28dccbc0e8d8 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -127,6 +127,8 @@ static int com20020pci_probe(struct pci_dev *pdev, int i, ioaddr, ret; struct resource *r; + ret = 0; + if (pci_enable_device(pdev)) return -EIO; @@ -142,6 +144,8 @@ static int com20020pci_probe(struct pci_dev *pdev, priv->ci = ci; mm = &ci->misc_map; + pci_set_drvdata(pdev, priv); + INIT_LIST_HEAD(&priv->list_dev); if (mm->size) { @@ -164,7 +168,7 @@ static int com20020pci_probe(struct pci_dev *pdev, dev = alloc_arcdev(device); if (!dev) { ret = -ENOMEM; - goto out_port; + break; } dev->dev_port = i; @@ -181,7 +185,7 @@ static int com20020pci_probe(struct pci_dev *pdev, pr_err("IO region %xh-%xh already allocated\n", ioaddr, ioaddr + cm->size - 1); ret = -EBUSY; - goto out_port; + goto err_free_arcdev; } /* Dummy access after Reset @@ -219,18 +223,18 @@ static int com20020pci_probe(struct pci_dev *pdev, if (arcnet_inb(ioaddr, COM20020_REG_R_STATUS) == 0xFF) { pr_err("IO address %Xh is empty!\n", ioaddr); ret = -EIO; - goto out_port; + goto err_free_arcdev; } if (com20020_check(dev)) { ret = -EIO; - goto out_port; + goto err_free_arcdev; } card = devm_kzalloc(&pdev->dev, sizeof(struct com20020_dev), GFP_KERNEL); if (!card) { ret = -ENOMEM; - goto out_port; + goto err_free_arcdev; } card->index = i; @@ -256,29 +260,29 @@ static int com20020pci_probe(struct pci_dev *pdev, ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); if (ret) - goto out_port; + goto err_free_arcdev; ret = devm_led_classdev_register(&pdev->dev, &card->recon_led); if (ret) - goto out_port; + goto err_free_arcdev; dev_set_drvdata(&dev->dev, card); ret = com20020_found(dev, IRQF_SHARED); if (ret) - goto out_port; + goto err_free_arcdev; devm_arcnet_led_init(dev, dev->dev_id, i); list_add(&card->list, &priv->list_dev); + continue; + +err_free_arcdev: + free_arcdev(dev); + break; } - - pci_set_drvdata(pdev, priv); - - return 0; - -out_port: - com20020pci_remove(pdev); + if (ret) + com20020pci_remove(pdev); return ret; } From e38cd53421ed4e37fc99662a0f2a0c567993844f Mon Sep 17 00:00:00 2001 From: Thomas Reichinger Date: Thu, 30 Nov 2023 12:35:03 +0100 Subject: [PATCH 21/68] arcnet: restoring support for multiple Sohard Arcnet cards [ Upstream commit 6b17a597fc2f13aaaa0a2780eb7edb9ae7ac9aea ] Probe of Sohard Arcnet cards fails, if 2 or more cards are installed in a system. See kernel log: [ 2.759203] arcnet: arcnet loaded [ 2.763648] arcnet:com20020: COM20020 chipset support (by David Woodhouse et al.) [ 2.770585] arcnet:com20020_pci: COM20020 PCI support [ 2.772295] com20020 0000:02:00.0: enabling device (0000 -> 0003) [ 2.772354] (unnamed net_device) (uninitialized): PLX-PCI Controls ... [ 3.071301] com20020 0000:02:00.0 arc0-0 (uninitialized): PCI COM20020: station FFh found at F080h, IRQ 101. [ 3.071305] com20020 0000:02:00.0 arc0-0 (uninitialized): Using CKP 64 - data rate 2.5 Mb/s [ 3.071534] com20020 0000:07:00.0: enabling device (0000 -> 0003) [ 3.071581] (unnamed net_device) (uninitialized): PLX-PCI Controls ... [ 3.369501] com20020 0000:07:00.0: Led pci:green:tx:0-0 renamed to pci:green:tx:0-0_1 due to name collision [ 3.369535] com20020 0000:07:00.0: Led pci:red:recon:0-0 renamed to pci:red:recon:0-0_1 due to name collision [ 3.370586] com20020 0000:07:00.0 arc0-0 (uninitialized): PCI COM20020: station E1h found at C000h, IRQ 35. [ 3.370589] com20020 0000:07:00.0 arc0-0 (uninitialized): Using CKP 64 - data rate 2.5 Mb/s [ 3.370608] com20020: probe of 0000:07:00.0 failed with error -5 commit 5ef216c1f848 ("arcnet: com20020-pci: add rotary index support") changes the device name of all COM20020 based PCI cards, even if only some cards support this: snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); The error happens because all Sohard Arcnet cards would be called arc0-0, since the Sohard Arcnet cards don't have a PLX rotary coder. I.e. EAE Arcnet cards have a PLX rotary coder, which sets the first decimal, ensuring unique devices names. This patch adds two new card feature flags to indicate which cards support LEDs and the PLX rotary coder. For EAE based cards the names still depend on the PLX rotary coder (untested, since missing EAE hardware). For Sohard based cards, this patch will result in devices being called arc0, arc1, ... (tested). Signed-off-by: Thomas Reichinger Fixes: 5ef216c1f848 ("arcnet: com20020-pci: add rotary index support") Link: https://lore.kernel.org/r/20231130113503.6812-1-thomas.reichinger@sohard.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/arcnet/arcdevice.h | 2 + drivers/net/arcnet/com20020-pci.c | 89 ++++++++++++++++--------------- 2 files changed, 48 insertions(+), 43 deletions(-) diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index 7178f5349fa8..7b783ee91834 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -186,6 +186,8 @@ do { \ #define ARC_IS_5MBIT 1 /* card default speed is 5MBit */ #define ARC_CAN_10MBIT 2 /* card uses COM20022, supporting 10MBit, but default is 2.5MBit. */ +#define ARC_HAS_LED 4 /* card has software controlled LEDs */ +#define ARC_HAS_ROTARY 8 /* card has rotary encoder */ /* information needed to define an encapsulation driver */ struct ArcProto { diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 28dccbc0e8d8..9d9e4200064f 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -213,12 +213,13 @@ static int com20020pci_probe(struct pci_dev *pdev, if (!strncmp(ci->name, "EAE PLX-PCI FB2", 15)) lp->backplane = 1; - /* Get the dev_id from the PLX rotary coder */ - if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15)) - dev_id_mask = 0x3; - dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask; - - snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); + if (ci->flags & ARC_HAS_ROTARY) { + /* Get the dev_id from the PLX rotary coder */ + if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15)) + dev_id_mask = 0x3; + dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask; + snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); + } if (arcnet_inb(ioaddr, COM20020_REG_R_STATUS) == 0xFF) { pr_err("IO address %Xh is empty!\n", ioaddr); @@ -230,6 +231,10 @@ static int com20020pci_probe(struct pci_dev *pdev, goto err_free_arcdev; } + ret = com20020_found(dev, IRQF_SHARED); + if (ret) + goto err_free_arcdev; + card = devm_kzalloc(&pdev->dev, sizeof(struct com20020_dev), GFP_KERNEL); if (!card) { @@ -239,41 +244,39 @@ static int com20020pci_probe(struct pci_dev *pdev, card->index = i; card->pci_priv = priv; - card->tx_led.brightness_set = led_tx_set; - card->tx_led.default_trigger = devm_kasprintf(&pdev->dev, - GFP_KERNEL, "arc%d-%d-tx", - dev->dev_id, i); - card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "pci:green:tx:%d-%d", - dev->dev_id, i); - card->tx_led.dev = &dev->dev; - card->recon_led.brightness_set = led_recon_set; - card->recon_led.default_trigger = devm_kasprintf(&pdev->dev, - GFP_KERNEL, "arc%d-%d-recon", - dev->dev_id, i); - card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "pci:red:recon:%d-%d", - dev->dev_id, i); - card->recon_led.dev = &dev->dev; + if (ci->flags & ARC_HAS_LED) { + card->tx_led.brightness_set = led_tx_set; + card->tx_led.default_trigger = devm_kasprintf(&pdev->dev, + GFP_KERNEL, "arc%d-%d-tx", + dev->dev_id, i); + card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "pci:green:tx:%d-%d", + dev->dev_id, i); + + card->tx_led.dev = &dev->dev; + card->recon_led.brightness_set = led_recon_set; + card->recon_led.default_trigger = devm_kasprintf(&pdev->dev, + GFP_KERNEL, "arc%d-%d-recon", + dev->dev_id, i); + card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "pci:red:recon:%d-%d", + dev->dev_id, i); + card->recon_led.dev = &dev->dev; + + ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); + if (ret) + goto err_free_arcdev; + + ret = devm_led_classdev_register(&pdev->dev, &card->recon_led); + if (ret) + goto err_free_arcdev; + + dev_set_drvdata(&dev->dev, card); + devm_arcnet_led_init(dev, dev->dev_id, i); + } + card->dev = dev; - - ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); - if (ret) - goto err_free_arcdev; - - ret = devm_led_classdev_register(&pdev->dev, &card->recon_led); - if (ret) - goto err_free_arcdev; - - dev_set_drvdata(&dev->dev, card); - - ret = com20020_found(dev, IRQF_SHARED); - if (ret) - goto err_free_arcdev; - - devm_arcnet_led_init(dev, dev->dev_id, i); - list_add(&card->list, &priv->list_dev); continue; @@ -329,7 +332,7 @@ static struct com20020_pci_card_info card_info_5mbit = { }; static struct com20020_pci_card_info card_info_sohard = { - .name = "PLX-PCI", + .name = "SOHARD SH ARC-PCI", .devcount = 1, /* SOHARD needs PCI base addr 4 */ .chan_map_tbl = { @@ -364,7 +367,7 @@ static struct com20020_pci_card_info card_info_eae_arc1 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static struct com20020_pci_card_info card_info_eae_ma1 = { @@ -396,7 +399,7 @@ static struct com20020_pci_card_info card_info_eae_ma1 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static struct com20020_pci_card_info card_info_eae_fb2 = { @@ -421,7 +424,7 @@ static struct com20020_pci_card_info card_info_eae_fb2 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static const struct pci_device_id com20020pci_id_table[] = { From 1ec21fde58daf930be64ba4659054373fd8435a2 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sun, 3 Dec 2023 01:14:41 +0900 Subject: [PATCH 22/68] ipv4: ip_gre: Avoid skb_pull() failure in ipgre_xmit() [ Upstream commit 80d875cfc9d3711a029f234ef7d680db79e8fa4b ] In ipgre_xmit(), skb_pull() may fail even if pskb_inet_may_pull() returns true. For example, applications can use PF_PACKET to create a malformed packet with no IP header. This type of packet causes a problem such as uninit-value access. This patch ensures that skb_pull() can pull the required size by checking the skb with pskb_network_may_pull() before skb_pull(). Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: Shigeru Yoshida Reviewed-by: Eric Dumazet Reviewed-by: Suman Ghosh Link: https://lore.kernel.org/r/20231202161441.221135-1-syoshida@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/ip_gre.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f8f008344273..db48dec61f30 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -607,15 +607,18 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, } if (dev->header_ops) { + int pull_len = tunnel->hlen + sizeof(struct iphdr); + if (skb_cow_head(skb, 0)) goto free_skb; tnl_params = (const struct iphdr *)skb->data; - /* Pull skb since ip_tunnel_xmit() needs skb->data pointing - * to gre header. - */ - skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + if (!pskb_network_may_pull(skb, pull_len)) + goto free_skb; + + /* ip_tunnel_xmit() needs skb->data pointing to gre header. */ + skb_pull(skb, pull_len); skb_reset_mac_header(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && From c61c61d7e7de951d585db4809544f3cc876ef4a7 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Mon, 4 Dec 2023 22:32:32 +0800 Subject: [PATCH 23/68] net: hns: fix fake link up on xge port [ Upstream commit f708aba40f9c1eeb9c7e93ed4863b5f85b09b288 ] If a xge port just connect with an optical module and no fiber, it may have a fake link up because there may be interference on the hardware. This patch adds an anti-shake to avoid the problem. And the time of anti-shake is base on tests. Fixes: b917078c1c10 ("net: hns: Add ACPI support to check SFP present") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 8aace2de0cc9..e34245649057 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -66,6 +66,27 @@ static enum mac_mode hns_get_enet_interface(const struct hns_mac_cb *mac_cb) } } +static u32 hns_mac_link_anti_shake(struct mac_driver *mac_ctrl_drv) +{ +#define HNS_MAC_LINK_WAIT_TIME 5 +#define HNS_MAC_LINK_WAIT_CNT 40 + + u32 link_status = 0; + int i; + + if (!mac_ctrl_drv->get_link_status) + return link_status; + + for (i = 0; i < HNS_MAC_LINK_WAIT_CNT; i++) { + msleep(HNS_MAC_LINK_WAIT_TIME); + mac_ctrl_drv->get_link_status(mac_ctrl_drv, &link_status); + if (!link_status) + break; + } + + return link_status; +} + void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) { struct mac_driver *mac_ctrl_drv; @@ -83,6 +104,14 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) &sfp_prsnt); if (!ret) *link_status = *link_status && sfp_prsnt; + + /* for FIBER port, it may have a fake link up. + * when the link status changes from down to up, we need to do + * anti-shake. the anti-shake time is base on tests. + * only FIBER port need to do this. + */ + if (*link_status && !mac_cb->link) + *link_status = hns_mac_link_anti_shake(mac_ctrl_drv); } mac_cb->link = *link_status; From 69431f609bf37311fbf90c507f8540f9ddf667c1 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 5 Dec 2023 21:58:12 +0100 Subject: [PATCH 24/68] netfilter: xt_owner: Fix for unsafe access of sk->sk_socket [ Upstream commit 7ae836a3d630e146b732fe8ef7d86b243748751f ] A concurrently running sock_orphan() may NULL the sk_socket pointer in between check and deref. Follow other users (like nft_meta.c for instance) and acquire sk_callback_lock before dereferencing sk_socket. Fixes: 0265ab44bacc ("[NETFILTER]: merge ipt_owner/ip6t_owner in xt_owner") Reported-by: Jann Horn Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_owner.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index e85ce69924ae..50332888c8d2 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -76,18 +76,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) */ return false; - filp = sk->sk_socket->file; - if (filp == NULL) + read_lock_bh(&sk->sk_callback_lock); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + if (filp == NULL) { + read_unlock_bh(&sk->sk_callback_lock); return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; + } if (info->match & XT_OWNER_UID) { kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ - !(info->invert & XT_OWNER_UID)) + !(info->invert & XT_OWNER_UID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } if (info->match & XT_OWNER_GID) { @@ -112,10 +117,13 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } } - if (match ^ !(info->invert & XT_OWNER_GID)) + if (match ^ !(info->invert & XT_OWNER_GID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } + read_unlock_bh(&sk->sk_callback_lock); return true; } From 7ffff0cc929fdfc62a74b384c4903d6496c910f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Dec 2023 16:18:41 +0000 Subject: [PATCH 25/68] tcp: do not accept ACK of bytes we never sent [ Upstream commit 3d501dd326fb1c73f1b8206d4c6e1d7b15c07e27 ] This patch is based on a detailed report and ideas from Yepeng Pan and Christian Rossow. ACK seq validation is currently following RFC 5961 5.2 guidelines: The ACK value is considered acceptable only if it is in the range of ((SND.UNA - MAX.SND.WND) <= SEG.ACK <= SND.NXT). All incoming segments whose ACK value doesn't satisfy the above condition MUST be discarded and an ACK sent back. It needs to be noted that RFC 793 on page 72 (fifth check) says: "If the ACK is a duplicate (SEG.ACK < SND.UNA), it can be ignored. If the ACK acknowledges something not yet sent (SEG.ACK > SND.NXT) then send an ACK, drop the segment, and return". The "ignored" above implies that the processing of the incoming data segment continues, which means the ACK value is treated as acceptable. This mitigation makes the ACK check more stringent since any ACK < SND.UNA wouldn't be accepted, instead only ACKs that are in the range ((SND.UNA - MAX.SND.WND) <= SEG.ACK <= SND.NXT) get through. This can be refined for new (and possibly spoofed) flows, by not accepting ACK for bytes that were never sent. This greatly improves TCP security at a little cost. I added a Fixes: tag to make sure this patch will reach stable trees, even if the 'blamed' patch was adhering to the RFC. tp->bytes_acked was added in linux-4.2 Following packetdrill test (courtesy of Yepeng Pan) shows the issue at hand: 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1024) = 0 // ---------------- Handshake ------------------- // // when window scale is set to 14 the window size can be extended to // 65535 * (2^14) = 1073725440. Linux would accept an ACK packet // with ack number in (Server_ISN+1-1073725440. Server_ISN+1) // ,though this ack number acknowledges some data never // sent by the server. +0 < S 0:0(0) win 65535 +0 > S. 0:0(0) ack 1 <...> +0 < . 1:1(0) ack 1 win 65535 +0 accept(3, ..., ...) = 4 // For the established connection, we send an ACK packet, // the ack packet uses ack number 1 - 1073725300 + 2^32, // where 2^32 is used to wrap around. // Note: we used 1073725300 instead of 1073725440 to avoid possible // edge cases. // 1 - 1073725300 + 2^32 = 3221241997 // Oops, old kernels happily accept this packet. +0 < . 1:1001(1000) ack 3221241997 win 65535 // After the kernel fix the following will be replaced by a challenge ACK, // and prior malicious frame would be dropped. +0 > . 1:1(0) ack 1001 Fixes: 354e4aa391ed ("tcp: RFC 5961 5.2 Blind Data Injection Attack Mitigation") Signed-off-by: Eric Dumazet Reported-by: Yepeng Pan Reported-by: Christian Rossow Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20231205161841.2702925-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fe5ef114beba..982fe464156a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3657,8 +3657,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * then we can probably ignore it. */ if (before(ack, prior_snd_una)) { + u32 max_window; + + /* do not accept ACK for bytes we never sent. */ + max_window = min_t(u64, tp->max_window, tp->bytes_acked); /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ - if (before(ack, prior_snd_una - tp->max_window)) { + if (before(ack, prior_snd_una - max_window)) { if (!(flag & FLAG_NO_CHALLENGE_ACK)) tcp_send_challenge_ack(sk, skb); return -1; From b4b89b7b2d4ba6cc22c2bcd19459aef7c921e589 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 6 Dec 2023 15:27:06 -0800 Subject: [PATCH 26/68] bpf: sockmap, updating the sg structure should also update curr [ Upstream commit bb9aefde5bbaf6c168c77ba635c155b4980c2287 ] Curr pointer should be updated when the sg structure is shifted. Fixes: 7246d8ed4dcce ("bpf: helper to pop data from messages") Signed-off-by: John Fastabend Link: https://lore.kernel.org/r/20231206232706.374377-3-john.fastabend@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/filter.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index d866e1c5970c..3c4dcdc7217e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2219,6 +2219,22 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes) return 0; } +static void sk_msg_reset_curr(struct sk_msg *msg) +{ + u32 i = msg->sg.start; + u32 len = 0; + + do { + len += sk_msg_elem(msg, i)->length; + sk_msg_iter_var_next(i); + if (len >= msg->sg.size) + break; + } while (i != msg->sg.end); + + msg->sg.curr = i; + msg->sg.copybreak = 0; +} + static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { .func = bpf_msg_cork_bytes, .gpl_only = false, @@ -2338,6 +2354,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, msg->sg.end - shift + NR_MSG_FRAG_IDS : msg->sg.end - shift; out: + sk_msg_reset_curr(msg); msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset; msg->data_end = msg->data + bytes; return 0; @@ -2471,6 +2488,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, msg->sg.data[new] = rsge; } + sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); return 0; } @@ -2642,6 +2660,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, sk_mem_uncharge(msg->sk, len - pop); msg->sg.size -= (len - pop); + sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); return 0; } From ad4cf776678bdb9c0e7559477ea067681e1789ef Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 21 Nov 2023 00:29:47 -0800 Subject: [PATCH 27/68] RDMA/bnxt_re: Correct module description string [ Upstream commit 422b19f7f006e813ee0865aadce6a62b3c263c42 ] The word "Driver" is repeated twice in the "modinfo bnxt_re" output description. Fix it. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1700555387-6277-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index a40bc23196bf..34d1c4264602 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -70,7 +70,7 @@ static char version[] = BNXT_RE_DESC "\n"; MODULE_AUTHOR("Eddie Wai "); -MODULE_DESCRIPTION(BNXT_RE_DESC " Driver"); +MODULE_DESCRIPTION(BNXT_RE_DESC); MODULE_LICENSE("Dual BSD/GPL"); /* globals */ From febb7bbe290d1ed2748f90a888a9274706e8101a Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 24 Nov 2023 19:27:47 +0100 Subject: [PATCH 28/68] hwmon: (acpi_power_meter) Fix 4.29 MW bug [ Upstream commit 1fefca6c57fb928d2131ff365270cbf863d89c88 ] The ACPI specification says: "If an error occurs while obtaining the meter reading or if the value is not available then an Integer with all bits set is returned" Since the "integer" is 32 bits in case of the ACPI power meter, userspace will get a power reading of 2^32 * 1000 miliwatts (~4.29 MW) in case of such an error. This was discovered due to a lm_sensors bugreport (https://github.com/lm-sensors/lm-sensors/issues/460). Fix this by returning -ENODATA instead. Tested-by: Fixes: de584afa5e18 ("hwmon driver for ACPI 4.0 power meters") Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20231124182747.13956-1-W_Armin@gmx.de Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/acpi_power_meter.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 740ac0a1b726..549c8f30acce 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -32,6 +32,7 @@ ACPI_MODULE_NAME(ACPI_POWER_METER_NAME); #define POWER_METER_CAN_NOTIFY (1 << 3) #define POWER_METER_IS_BATTERY (1 << 8) #define UNKNOWN_HYSTERESIS 0xFFFFFFFF +#define UNKNOWN_POWER 0xFFFFFFFF #define METER_NOTIFY_CONFIG 0x80 #define METER_NOTIFY_TRIP 0x81 @@ -343,6 +344,9 @@ static ssize_t show_power(struct device *dev, update_meter(resource); mutex_unlock(&resource->lock); + if (resource->power == UNKNOWN_POWER) + return -ENODATA; + return sprintf(buf, "%llu\n", resource->power * 1000); } From 4713be844546c25e523535f874d414bdbf2ab27c Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 4 Dec 2023 15:41:56 +0800 Subject: [PATCH 29/68] ASoC: wm_adsp: fix memleak in wm_adsp_buffer_populate [ Upstream commit 29046a78a3c0a1f8fa0427f164caa222f003cf5b ] When wm_adsp_buffer_read() fails, we should free buf->regions. Otherwise, the callers of wm_adsp_buffer_populate() will directly free buf on failure, which makes buf->regions a leaked memory. Fixes: a792af69b08f ("ASoC: wm_adsp: Refactor compress stream initialisation") Signed-off-by: Dinghao Liu Reviewed-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20231204074158.12026-1-dinghao.liu@zju.edu.cn Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm_adsp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index aedfa6b2895b..8df5f3bc6e97 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -3649,12 +3649,12 @@ static int wm_adsp_buffer_populate(struct wm_adsp_compr_buf *buf) ret = wm_adsp_buffer_read(buf, caps->region_defs[i].base_offset, ®ion->base_addr); if (ret < 0) - return ret; + goto err; ret = wm_adsp_buffer_read(buf, caps->region_defs[i].size_offset, &offset); if (ret < 0) - return ret; + goto err; region->cumulative_size = offset; @@ -3665,6 +3665,10 @@ static int wm_adsp_buffer_populate(struct wm_adsp_compr_buf *buf) } return 0; + +err: + kfree(buf->regions); + return ret; } static void wm_adsp_buffer_clear(struct wm_adsp_compr_buf *buf) From 8244ea916bfee68ef464285ad6810a8ef2160284 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 5 Dec 2023 17:17:35 +0100 Subject: [PATCH 30/68] tracing: Fix a warning when allocating buffered events fails [ Upstream commit 34209fe83ef8404353f91ab4ea4035dbc9922d04 ] Function trace_buffered_event_disable() produces an unexpected warning when the previous call to trace_buffered_event_enable() fails to allocate pages for buffered events. The situation can occur as follows: * The counter trace_buffered_event_ref is at 0. * The soft mode gets enabled for some event and trace_buffered_event_enable() is called. The function increments trace_buffered_event_ref to 1 and starts allocating event pages. * The allocation fails for some page and trace_buffered_event_disable() is called for cleanup. * Function trace_buffered_event_disable() decrements trace_buffered_event_ref back to 0, recognizes that it was the last use of buffered events and frees all allocated pages. * The control goes back to trace_buffered_event_enable() which returns. The caller of trace_buffered_event_enable() has no information that the function actually failed. * Some time later, the soft mode is disabled for the same event. Function trace_buffered_event_disable() is called. It warns on "WARN_ON_ONCE(!trace_buffered_event_ref)" and returns. Buffered events are just an optimization and can handle failures. Make trace_buffered_event_enable() exit on the first failure and left any cleanup later to when trace_buffered_event_disable() is called. Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ Link: https://lkml.kernel.org/r/20231205161736.19663-3-petr.pavlu@suse.com Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a15dffe60722..b8030f1e577b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2438,8 +2438,11 @@ void trace_buffered_event_enable(void) for_each_tracing_cpu(cpu) { page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY, 0); - if (!page) - goto failed; + /* This is just an optimization and can handle failures */ + if (!page) { + pr_err("Failed to allocate event buffer\n"); + break; + } event = page_address(page); memset(event, 0, sizeof(*event)); @@ -2453,10 +2456,6 @@ void trace_buffered_event_enable(void) WARN_ON_ONCE(1); preempt_enable(); } - - return; - failed: - trace_buffered_event_disable(); } static void enable_trace_buffered_event(void *data) From 59348f148235f0c7c3bceb1106fefd723da61a45 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 23 Nov 2023 16:19:41 +0800 Subject: [PATCH 31/68] scsi: be2iscsi: Fix a memleak in beiscsi_init_wrb_handle() [ Upstream commit 235f2b548d7f4ac5931d834f05d3f7f5166a2e72 ] When an error occurs in the for loop of beiscsi_init_wrb_handle(), we should free phwi_ctxt->be_wrbq before returning an error code to prevent potential memleak. Fixes: a7909b396ba7 ("[SCSI] be2iscsi: Fix dynamic CID allocation Mechanism in driver") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20231123081941.24854-1-dinghao.liu@zju.edu.cn Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/be2iscsi/be_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 53b33f146f82..d34c881c3f8a 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -2694,6 +2694,7 @@ static int beiscsi_init_wrb_handle(struct beiscsi_hba *phba) kfree(pwrb_context->pwrb_handle_base); kfree(pwrb_context->pwrb_handle_basestd); } + kfree(phwi_ctxt->be_wrbq); return -ENOMEM; } From 69b669cc638917542a4e0426140e541cada051ea Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 22 Nov 2023 14:46:36 +0800 Subject: [PATCH 32/68] ARM: imx: Check return value of devm_kasprintf in imx_mmdc_perf_init [ Upstream commit 1c2b1049af3f86545fcc5fae0fc725fb64b3a09e ] devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Release the id allocated in 'mmdc_pmu_init' when 'devm_kasprintf' return NULL Suggested-by: Ahmad Fatoum Fixes: e76bdfd7403a ("ARM: imx: Added perf functionality to mmdc driver") Signed-off-by: Kunwu Chan Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/mach-imx/mmdc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index b9efe9da06e0..3d76e8c28c51 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -502,6 +502,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "mmdc%d", ret); + if (!name) { + ret = -ENOMEM; + goto pmu_release_id; + } pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; pmu_mmdc->devtype_data = (struct fsl_mmdc_devtype_data *)of_id->data; @@ -524,9 +528,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b pmu_register_err: pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret); - ida_simple_remove(&mmdc_ida, pmu_mmdc->id); cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); hrtimer_cancel(&pmu_mmdc->hrtimer); +pmu_release_id: + ida_simple_remove(&mmdc_ida, pmu_mmdc->id); pmu_free: kfree(pmu_mmdc); return ret; From 4fe36f83f8d8a4e23fe0bfde707d56d5253a1b81 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Thu, 13 Feb 2020 10:52:56 +0800 Subject: [PATCH 33/68] ARM: dts: imx: make gpt node name generic [ Upstream commit 7c48b086965873c0aa93d99773cf64c033b76b2f ] Node name should be generic, use "timer" instead of "gpt" for gpt node. Signed-off-by: Anson Huang Signed-off-by: Shawn Guo Stable-dep-of: 397caf68e2d3 ("ARM: dts: imx7: Declare timers compatible with fsl,imx6dl-gpt") Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qdl.dtsi | 2 +- arch/arm/boot/dts/imx6sl.dtsi | 2 +- arch/arm/boot/dts/imx6sx.dtsi | 2 +- arch/arm/boot/dts/imx6ul.dtsi | 4 ++-- arch/arm/boot/dts/imx7s.dtsi | 8 ++++---- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 861392ff7086..0150b2d5c534 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -578,7 +578,7 @@ status = "disabled"; }; - gpt: gpt@2098000 { + gpt: timer@2098000 { compatible = "fsl,imx6q-gpt", "fsl,imx31-gpt"; reg = <0x02098000 0x4000>; interrupts = <0 55 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi index 3cf1da06e7f0..fbbae0004e62 100644 --- a/arch/arm/boot/dts/imx6sl.dtsi +++ b/arch/arm/boot/dts/imx6sl.dtsi @@ -386,7 +386,7 @@ clock-names = "ipg", "per"; }; - gpt: gpt@2098000 { + gpt: timer@2098000 { compatible = "fsl,imx6sl-gpt"; reg = <0x02098000 0x4000>; interrupts = <0 55 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 3dc1e97e145c..1afeae14560a 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -475,7 +475,7 @@ status = "disabled"; }; - gpt: gpt@2098000 { + gpt: timer@2098000 { compatible = "fsl,imx6sx-gpt", "fsl,imx6dl-gpt"; reg = <0x02098000 0x4000>; interrupts = ; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index 5b677b66162a..7037d2a45e60 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -433,7 +433,7 @@ status = "disabled"; }; - gpt1: gpt@2098000 { + gpt1: timer@2098000 { compatible = "fsl,imx6ul-gpt", "fsl,imx6sx-gpt"; reg = <0x02098000 0x4000>; interrupts = ; @@ -707,7 +707,7 @@ reg = <0x020e4000 0x4000>; }; - gpt2: gpt@20e8000 { + gpt2: timer@20e8000 { compatible = "fsl,imx6ul-gpt", "fsl,imx6sx-gpt"; reg = <0x020e8000 0x4000>; interrupts = ; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 791530124fb0..f774759af1aa 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -446,7 +446,7 @@ fsl,input-sel = <&iomuxc>; }; - gpt1: gpt@302d0000 { + gpt1: timer@302d0000 { compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302d0000 0x10000>; interrupts = ; @@ -455,7 +455,7 @@ clock-names = "ipg", "per"; }; - gpt2: gpt@302e0000 { + gpt2: timer@302e0000 { compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302e0000 0x10000>; interrupts = ; @@ -465,7 +465,7 @@ status = "disabled"; }; - gpt3: gpt@302f0000 { + gpt3: timer@302f0000 { compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302f0000 0x10000>; interrupts = ; @@ -475,7 +475,7 @@ status = "disabled"; }; - gpt4: gpt@30300000 { + gpt4: timer@30300000 { compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x30300000 0x10000>; interrupts = ; From 439166b1b2ee2512214a37a8514a04a04036582b Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 27 Nov 2023 17:05:01 +0100 Subject: [PATCH 34/68] ARM: dts: imx7: Declare timers compatible with fsl,imx6dl-gpt [ Upstream commit 397caf68e2d36532054cb14ae8995537f27f8b61 ] The timer nodes declare compatibility with "fsl,imx6sx-gpt", which itself is compatible with "fsl,imx6dl-gpt". Switch the fallback compatible from "fsl,imx6sx-gpt" to "fsl,imx6dl-gpt". Fixes: 949673450291 ("ARM: dts: add imx7d soc dtsi file") Signed-off-by: Philipp Zabel Signed-off-by: Roland Hieber Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx7s.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index f774759af1aa..7ce541fcac76 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -447,7 +447,7 @@ }; gpt1: timer@302d0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302d0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT1_ROOT_CLK>, @@ -456,7 +456,7 @@ }; gpt2: timer@302e0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302e0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT2_ROOT_CLK>, @@ -466,7 +466,7 @@ }; gpt3: timer@302f0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302f0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT3_ROOT_CLK>, @@ -476,7 +476,7 @@ }; gpt4: timer@30300000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x30300000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT4_ROOT_CLK>, From 610ebc289582740c561d98a497bb6f5086d57c15 Mon Sep 17 00:00:00 2001 From: Jason Zhang Date: Wed, 6 Dec 2023 09:31:39 +0800 Subject: [PATCH 35/68] ALSA: pcm: fix out-of-bounds in snd_pcm_state_names commit 2b3a7a302c9804e463f2ea5b54dc3a6ad106a344 upstream. The pcm state can be SNDRV_PCM_STATE_DISCONNECTED at disconnect callback, and there is not an entry of SNDRV_PCM_STATE_DISCONNECTED in snd_pcm_state_names. This patch adds the missing entry to resolve this issue. cat /proc/asound/card2/pcm0p/sub0/status That results in stack traces like the following: [ 99.702732][ T5171] Unexpected kernel BRK exception at EL1 [ 99.702774][ T5171] Internal error: BRK handler: f2005512 [#1] PREEMPT SMP [ 99.703858][ T5171] Modules linked in: bcmdhd(E) (...) [ 99.747425][ T5171] CPU: 3 PID: 5171 Comm: cat Tainted: G C OE 5.10.189-android13-4-00003-g4a17384380d8-ab11086999 #1 [ 99.748447][ T5171] Hardware name: Rockchip RK3588 CVTE V10 Board (DT) [ 99.749024][ T5171] pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--) [ 99.749616][ T5171] pc : snd_pcm_substream_proc_status_read+0x264/0x2bc [ 99.750204][ T5171] lr : snd_pcm_substream_proc_status_read+0xa4/0x2bc [ 99.750778][ T5171] sp : ffffffc0175abae0 [ 99.751132][ T5171] x29: ffffffc0175abb80 x28: ffffffc009a2c498 [ 99.751665][ T5171] x27: 0000000000000001 x26: ffffff810cbae6e8 [ 99.752199][ T5171] x25: 0000000000400cc0 x24: ffffffc0175abc60 [ 99.752729][ T5171] x23: 0000000000000000 x22: ffffff802f558400 [ 99.753263][ T5171] x21: ffffff81d8d8ff00 x20: ffffff81020cdc00 [ 99.753795][ T5171] x19: ffffff802d110000 x18: ffffffc014fbd058 [ 99.754326][ T5171] x17: 0000000000000000 x16: 0000000000000000 [ 99.754861][ T5171] x15: 000000000000c276 x14: ffffffff9a976fda [ 99.755392][ T5171] x13: 0000000065689089 x12: 000000000000d72e [ 99.755923][ T5171] x11: ffffff802d110000 x10: 00000000000000e0 [ 99.756457][ T5171] x9 : 9c431600c8385d00 x8 : 0000000000000008 [ 99.756990][ T5171] x7 : 0000000000000000 x6 : 000000000000003f [ 99.757522][ T5171] x5 : 0000000000000040 x4 : ffffffc0175abb70 [ 99.758056][ T5171] x3 : 0000000000000001 x2 : 0000000000000001 [ 99.758588][ T5171] x1 : 0000000000000000 x0 : 0000000000000000 [ 99.759123][ T5171] Call trace: [ 99.759404][ T5171] snd_pcm_substream_proc_status_read+0x264/0x2bc [ 99.759958][ T5171] snd_info_seq_show+0x54/0xa4 [ 99.760370][ T5171] seq_read_iter+0x19c/0x7d4 [ 99.760770][ T5171] seq_read+0xf0/0x128 [ 99.761117][ T5171] proc_reg_read+0x100/0x1f8 [ 99.761515][ T5171] vfs_read+0xf4/0x354 [ 99.761869][ T5171] ksys_read+0x7c/0x148 [ 99.762226][ T5171] __arm64_sys_read+0x20/0x30 [ 99.762625][ T5171] el0_svc_common+0xd0/0x1e4 [ 99.763023][ T5171] el0_svc+0x28/0x98 [ 99.763358][ T5171] el0_sync_handler+0x8c/0xf0 [ 99.763759][ T5171] el0_sync+0x1b8/0x1c0 [ 99.764118][ T5171] Code: d65f03c0 b9406102 17ffffae 94191565 (d42aa240) [ 99.764715][ T5171] ---[ end trace 1eeffa3e17c58e10 ]--- [ 99.780720][ T5171] Kernel panic - not syncing: BRK handler: Fatal exception Signed-off-by: Jason Zhang Cc: Link: https://lore.kernel.org/r/20231206013139.20506-1-jason.zhang@rock-chips.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 3561cdceaadc..9722cf378d31 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -251,6 +251,7 @@ static char *snd_pcm_state_names[] = { STATE(DRAINING), STATE(PAUSED), STATE(SUSPENDED), + STATE(DISCONNECTED), }; static char *snd_pcm_access_names[] = { From cb74e8fd6b2d7bf8e5f765ff101236a56c71afd8 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 5 Dec 2023 17:59:47 +0900 Subject: [PATCH 36/68] nilfs2: prevent WARNING in nilfs_sufile_set_segment_usage() commit 675abf8df1353e0e3bde314993e0796c524cfbf0 upstream. If nilfs2 reads a disk image with corrupted segment usage metadata, and its segment usage information is marked as an error for the segment at the write location, nilfs_sufile_set_segment_usage() can trigger WARN_ONs during log writing. Segments newly allocated for writing with nilfs_sufile_alloc() will not have this error flag set, but this unexpected situation will occur if the segment indexed by either nilfs->ns_segnum or nilfs->ns_nextnum (active segment) was marked in error. Fix this issue by inserting a sanity check to treat it as a file system corruption. Since error returns are not allowed during the execution phase where nilfs_sufile_set_segment_usage() is used, this inserts the sanity check into nilfs_sufile_mark_dirty() which pre-reads the buffer containing the segment usage record to be updated and sets it up in a dirty state for writing. In addition, nilfs_sufile_set_segment_usage() is also called when canceling log writing and undoing segment usage update, so in order to avoid issuing the same kernel warning in that case, in case of cancellation, avoid checking the error flag in nilfs_sufile_set_segment_usage(). Link: https://lkml.kernel.org/r/20231205085947.4431-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+14e9f834f6ddecece094@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=14e9f834f6ddecece094 Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/sufile.c | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index d85d3c758d7b..4626540008c1 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -504,15 +504,38 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); - if (!ret) { - mark_buffer_dirty(bh); - nilfs_mdt_mark_dirty(sufile); - kaddr = kmap_atomic(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); - nilfs_segment_usage_set_dirty(su); + if (ret) + goto out_sem; + + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + if (unlikely(nilfs_segment_usage_error(su))) { + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + kunmap_atomic(kaddr); brelse(bh); + if (nilfs_segment_is_active(nilfs, segnum)) { + nilfs_error(sufile->i_sb, + "active segment %llu is erroneous", + (unsigned long long)segnum); + } else { + /* + * Segments marked erroneous are never allocated by + * nilfs_sufile_alloc(); only active segments, ie, + * the segments indexed by ns_segnum or ns_nextnum, + * can be erroneous here. + */ + WARN_ON_ONCE(1); + } + ret = -EIO; + } else { + nilfs_segment_usage_set_dirty(su); + kunmap_atomic(kaddr); + mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(sufile); + brelse(bh); } +out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } @@ -539,9 +562,14 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, kaddr = kmap_atomic(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); - WARN_ON(nilfs_segment_usage_error(su)); - if (modtime) + if (modtime) { + /* + * Check segusage error and set su_lastmod only when updating + * this entry with a valid timestamp, not for cancellation. + */ + WARN_ON_ONCE(nilfs_segment_usage_error(su)); su->su_lastmod = cpu_to_le64(modtime); + } su->su_nblocks = cpu_to_le32(nblocks); kunmap_atomic(kaddr); From 84302391d130ecbc935b4a3cf58b2799f38a9985 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Dec 2023 16:52:09 -0500 Subject: [PATCH 37/68] tracing: Always update snapshot buffer size commit 7be76461f302ec05cbd62b90b2a05c64299ca01f upstream. It use to be that only the top level instance had a snapshot buffer (for latency tracers like wakeup and irqsoff). The update of the ring buffer size would check if the instance was the top level and if so, it would also update the snapshot buffer as it needs to be the same as the main buffer. Now that lower level instances also has a snapshot buffer, they too need to update their snapshot buffer sizes when the main buffer is changed, otherwise the following can be triggered: # cd /sys/kernel/tracing # echo 1500 > buffer_size_kb # mkdir instances/foo # echo irqsoff > instances/foo/current_tracer # echo 1000 > instances/foo/buffer_size_kb Produces: WARNING: CPU: 2 PID: 856 at kernel/trace/trace.c:1938 update_max_tr_single.part.0+0x27d/0x320 Which is: ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); if (ret == -EBUSY) { [..] } WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); <== here That's because ring_buffer_swap_cpu() has: int ret = -EINVAL; [..] /* At least make sure the two buffers are somewhat the same */ if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) goto out; [..] out: return ret; } Instead, update all instances' snapshot buffer sizes when their main buffer size is updated. Link: https://lkml.kernel.org/r/20231205220010.454662151@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 6d9b3fa5e7f6 ("tracing: Move tracing_max_latency into trace_array") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b8030f1e577b..8649e72099a8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5597,8 +5597,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, return ret; #ifdef CONFIG_TRACER_MAX_TRACE - if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) || - !tr->current_trace->use_max_tr) + if (!tr->current_trace->use_max_tr) goto out; ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); From 6f2e50961fe3e53cbe9858cf7eb34cecf93879ea Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 5 Dec 2023 17:17:34 +0100 Subject: [PATCH 38/68] tracing: Fix incomplete locking when disabling buffered events commit 7fed14f7ac9cf5e38c693836fe4a874720141845 upstream. The following warning appears when using buffered events: [ 203.556451] WARNING: CPU: 53 PID: 10220 at kernel/trace/ring_buffer.c:3912 ring_buffer_discard_commit+0x2eb/0x420 [...] [ 203.670690] CPU: 53 PID: 10220 Comm: stress-ng-sysin Tainted: G E 6.7.0-rc2-default #4 56e6d0fcf5581e6e51eaaecbdaec2a2338c80f3a [ 203.670704] Hardware name: Intel Corp. GROVEPORT/GROVEPORT, BIOS GVPRCRB1.86B.0016.D04.1705030402 05/03/2017 [ 203.670709] RIP: 0010:ring_buffer_discard_commit+0x2eb/0x420 [ 203.735721] Code: 4c 8b 4a 50 48 8b 42 48 49 39 c1 0f 84 b3 00 00 00 49 83 e8 01 75 b1 48 8b 42 10 f0 ff 40 08 0f 0b e9 fc fe ff ff f0 ff 47 08 <0f> 0b e9 77 fd ff ff 48 8b 42 10 f0 ff 40 08 0f 0b e9 f5 fe ff ff [ 203.735734] RSP: 0018:ffffb4ae4f7b7d80 EFLAGS: 00010202 [ 203.735745] RAX: 0000000000000000 RBX: ffffb4ae4f7b7de0 RCX: ffff8ac10662c000 [ 203.735754] RDX: ffff8ac0c750be00 RSI: ffff8ac10662c000 RDI: ffff8ac0c004d400 [ 203.781832] RBP: ffff8ac0c039cea0 R08: 0000000000000000 R09: 0000000000000000 [ 203.781839] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 203.781842] R13: ffff8ac10662c000 R14: ffff8ac0c004d400 R15: ffff8ac10662c008 [ 203.781846] FS: 00007f4cd8a67740(0000) GS:ffff8ad798880000(0000) knlGS:0000000000000000 [ 203.781851] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 203.781855] CR2: 0000559766a74028 CR3: 00000001804c4000 CR4: 00000000001506f0 [ 203.781862] Call Trace: [ 203.781870] [ 203.851949] trace_event_buffer_commit+0x1ea/0x250 [ 203.851967] trace_event_raw_event_sys_enter+0x83/0xe0 [ 203.851983] syscall_trace_enter.isra.0+0x182/0x1a0 [ 203.851990] do_syscall_64+0x3a/0xe0 [ 203.852075] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 203.852090] RIP: 0033:0x7f4cd870fa77 [ 203.982920] Code: 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 b8 89 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e9 43 0e 00 f7 d8 64 89 01 48 [ 203.982932] RSP: 002b:00007fff99717dd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000089 [ 203.982942] RAX: ffffffffffffffda RBX: 0000558ea1d7b6f0 RCX: 00007f4cd870fa77 [ 203.982948] RDX: 0000000000000000 RSI: 00007fff99717de0 RDI: 0000558ea1d7b6f0 [ 203.982957] RBP: 00007fff99717de0 R08: 00007fff997180e0 R09: 00007fff997180e0 [ 203.982962] R10: 00007fff997180e0 R11: 0000000000000246 R12: 00007fff99717f40 [ 204.049239] R13: 00007fff99718590 R14: 0000558e9f2127a8 R15: 00007fff997180b0 [ 204.049256] For instance, it can be triggered by running these two commands in parallel: $ while true; do echo hist:key=id.syscall:val=hitcount > \ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger; done $ stress-ng --sysinfo $(nproc) The warning indicates that the current ring_buffer_per_cpu is not in the committing state. It happens because the active ring_buffer_event doesn't actually come from the ring_buffer_per_cpu but is allocated from trace_buffered_event. The bug is in function trace_buffered_event_disable() where the following normally happens: * The code invokes disable_trace_buffered_event() via smp_call_function_many() and follows it by synchronize_rcu(). This increments the per-CPU variable trace_buffered_event_cnt on each target CPU and grants trace_buffered_event_disable() the exclusive access to the per-CPU variable trace_buffered_event. * Maintenance is performed on trace_buffered_event, all per-CPU event buffers get freed. * The code invokes enable_trace_buffered_event() via smp_call_function_many(). This decrements trace_buffered_event_cnt and releases the access to trace_buffered_event. A problem is that smp_call_function_many() runs a given function on all target CPUs except on the current one. The following can then occur: * Task X executing trace_buffered_event_disable() runs on CPU 0. * The control reaches synchronize_rcu() and the task gets rescheduled on another CPU 1. * The RCU synchronization finishes. At this point, trace_buffered_event_disable() has the exclusive access to all trace_buffered_event variables except trace_buffered_event[CPU0] because trace_buffered_event_cnt[CPU0] is never incremented and if the buffer is currently unused, remains set to 0. * A different task Y is scheduled on CPU 0 and hits a trace event. The code in trace_event_buffer_lock_reserve() sees that trace_buffered_event_cnt[CPU0] is set to 0 and decides the use the buffer provided by trace_buffered_event[CPU0]. * Task X continues its execution in trace_buffered_event_disable(). The code incorrectly frees the event buffer pointed by trace_buffered_event[CPU0] and resets the variable to NULL. * Task Y writes event data to the now freed buffer and later detects the created inconsistency. The issue is observable since commit dea499781a11 ("tracing: Fix warning in trace_buffered_event_disable()") which moved the call of trace_buffered_event_disable() in __ftrace_event_enable_disable() earlier, prior to invoking call->class->reg(.. TRACE_REG_UNREGISTER ..). The underlying problem in trace_buffered_event_disable() is however present since the original implementation in commit 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events"). Fix the problem by replacing the two smp_call_function_many() calls with on_each_cpu_mask() which invokes a given callback on all CPUs. Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ Link: https://lkml.kernel.org/r/20231205161736.19663-2-petr.pavlu@suse.com Cc: stable@vger.kernel.org Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") Fixes: dea499781a11 ("tracing: Fix warning in trace_buffered_event_disable()") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8649e72099a8..87efc6da7368 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2490,11 +2490,9 @@ void trace_buffered_event_disable(void) if (--trace_buffered_event_ref) return; - preempt_disable(); /* For each CPU, set the buffer as used. */ - smp_call_function_many(tracing_buffer_mask, - disable_trace_buffered_event, NULL, 1); - preempt_enable(); + on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, + NULL, true); /* Wait for all current users to finish */ synchronize_rcu(); @@ -2509,11 +2507,9 @@ void trace_buffered_event_disable(void) */ smp_wmb(); - preempt_disable(); /* Do the work on each cpu */ - smp_call_function_many(tracing_buffer_mask, - enable_trace_buffered_event, NULL, 1); - preempt_enable(); + on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, + true); } static struct ring_buffer *temp_buffer; From 965cbc6b623a84b381e73b94c20b3161c0f6f60f Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 5 Dec 2023 17:17:36 +0100 Subject: [PATCH 39/68] tracing: Fix a possible race when disabling buffered events commit c0591b1cccf708a47bc465c62436d669a4213323 upstream. Function trace_buffered_event_disable() is responsible for freeing pages backing buffered events and this process can run concurrently with trace_event_buffer_lock_reserve(). The following race is currently possible: * Function trace_buffered_event_disable() is called on CPU 0. It increments trace_buffered_event_cnt on each CPU and waits via synchronize_rcu() for each user of trace_buffered_event to complete. * After synchronize_rcu() is finished, function trace_buffered_event_disable() has the exclusive access to trace_buffered_event. All counters trace_buffered_event_cnt are at 1 and all pointers trace_buffered_event are still valid. * At this point, on a different CPU 1, the execution reaches trace_event_buffer_lock_reserve(). The function calls preempt_disable_notrace() and only now enters an RCU read-side critical section. The function proceeds and reads a still valid pointer from trace_buffered_event[CPU1] into the local variable "entry". However, it doesn't yet read trace_buffered_event_cnt[CPU1] which happens later. * Function trace_buffered_event_disable() continues. It frees trace_buffered_event[CPU1] and decrements trace_buffered_event_cnt[CPU1] back to 0. * Function trace_event_buffer_lock_reserve() continues. It reads and increments trace_buffered_event_cnt[CPU1] from 0 to 1. This makes it believe that it can use the "entry" that it already obtained but the pointer is now invalid and any access results in a use-after-free. Fix the problem by making a second synchronize_rcu() call after all trace_buffered_event values are set to NULL. This waits on all potential users in trace_event_buffer_lock_reserve() that still read a previous pointer from trace_buffered_event. Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ Link: https://lkml.kernel.org/r/20231205161736.19663-4-petr.pavlu@suse.com Cc: stable@vger.kernel.org Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 87efc6da7368..d7ca8f97b315 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2501,13 +2501,17 @@ void trace_buffered_event_disable(void) free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); per_cpu(trace_buffered_event, cpu) = NULL; } - /* - * Make sure trace_buffered_event is NULL before clearing - * trace_buffered_event_cnt. - */ - smp_wmb(); - /* Do the work on each cpu */ + /* + * Wait for all CPUs that potentially started checking if they can use + * their event buffer only after the previous synchronize_rcu() call and + * they still read a valid pointer from trace_buffered_event. It must be + * ensured they don't see cleared trace_buffered_event_cnt else they + * could wrongly decide to use the pointed-to buffer which is now freed. + */ + synchronize_rcu(); + + /* For each CPU, relinquish the buffer */ on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, true); } From 148d8f0707fa9093156c6dad1ae10e7e2a9c9c1a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 1 Dec 2023 14:10:21 +0100 Subject: [PATCH 40/68] packet: Move reference count in packet_sock to atomic_long_t commit db3fadacaf0c817b222090290d06ca2a338422d0 upstream. In some potential instances the reference count on struct packet_sock could be saturated and cause overflows which gets the kernel a bit confused. To prevent this, move to a 64-bit atomic reference count on 64-bit architectures to prevent the possibility of this type to overflow. Because we can not handle saturation, using refcount_t is not possible in this place. Maybe someday in the future if it changes it could be used. Also, instead of using plain atomic64_t, use atomic_long_t instead. 32-bit machines tend to be memory-limited (i.e. anything that increases a reference uses so much memory that you can't actually get to 2**32 references). 32-bit architectures also tend to have serious problems with 64-bit atomics. Hence, atomic_long_t is the more natural solution. Reported-by: "The UK's National Cyber Security Centre (NCSC)" Co-developed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Daniel Borkmann Cc: Linus Torvalds Cc: stable@kernel.org Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231201131021.19999-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 16 ++++++++-------- net/packet/internal.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0582abf5ab71..600a84a5c582 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4240,7 +4240,7 @@ static void packet_mm_open(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_inc(&pkt_sk(sk)->mapped); + atomic_long_inc(&pkt_sk(sk)->mapped); } static void packet_mm_close(struct vm_area_struct *vma) @@ -4250,7 +4250,7 @@ static void packet_mm_close(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_dec(&pkt_sk(sk)->mapped); + atomic_long_dec(&pkt_sk(sk)->mapped); } static const struct vm_operations_struct packet_mmap_ops = { @@ -4345,7 +4345,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; if (!closing) { - if (atomic_read(&po->mapped)) + if (atomic_long_read(&po->mapped)) goto out; if (packet_read_pending(rb)) goto out; @@ -4448,7 +4448,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; mutex_lock(&po->pg_vec_lock); - if (closing || atomic_read(&po->mapped) == 0) { + if (closing || atomic_long_read(&po->mapped) == 0) { err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); @@ -4466,9 +4466,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, po->prot_hook.func = (po->rx_ring.pg_vec) ? tpacket_rcv : packet_rcv; skb_queue_purge(rb_queue); - if (atomic_read(&po->mapped)) - pr_err("packet_mmap: vma is busy: %d\n", - atomic_read(&po->mapped)); + if (atomic_long_read(&po->mapped)) + pr_err("packet_mmap: vma is busy: %ld\n", + atomic_long_read(&po->mapped)); } mutex_unlock(&po->pg_vec_lock); @@ -4546,7 +4546,7 @@ static int packet_mmap(struct file *file, struct socket *sock, } } - atomic_inc(&po->mapped); + atomic_long_inc(&po->mapped); vma->vm_ops = &packet_mmap_ops; err = 0; diff --git a/net/packet/internal.h b/net/packet/internal.h index 2b2b85dadf8e..878fcdca8c25 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -125,7 +125,7 @@ struct packet_sock { __be16 num; struct packet_rollover *rollover; struct packet_mclist *mclist; - atomic_t mapped; + atomic_long_t mapped; enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; From 6109859f698297cd0557d5461ef175ce38bd70df Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 14 Aug 2023 09:50:42 +0300 Subject: [PATCH 41/68] arm64: dts: mediatek: mt7622: fix memory node warning check commit 8e6ecbfd44b5542a7598c1c5fc9c6dcb5d367f2a upstream. dtbs_check throws a warning at the memory node: Warning (unit_address_vs_reg): /memory: node has a reg or ranges property, but no unit name fix by adding the address into the node name. Cc: stable@vger.kernel.org Fixes: 0b6286dd96c0 ("arm64: dts: mt7622: add bananapi BPI-R64 board") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230814065042.4973-1-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts | 2 +- arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 81215cc3759a..7b095378c96c 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -69,7 +69,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x40000000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index 3f783348c66a..f586c1ee4a59 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -55,7 +55,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x20000000>; }; From 07bdb1bd24765a22a04e656d9bd292a1c17870f5 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:15 +0200 Subject: [PATCH 42/68] arm64: dts: mediatek: mt8173-evb: Fix regulator-fixed node names commit 24165c5dad7ba7c7624d05575a5e0cc851396c71 upstream. Fix a unit_address_vs_reg warning for the USB VBUS fixed regulators by renaming the regulator nodes from regulator@{0,1} to regulator-usb-p0 and regulator-usb-p1. Cc: stable@vger.kernel.org Fixes: c0891284a74a ("arm64: dts: mediatek: add USB3 DRD driver") Link: https://lore.kernel.org/r/20231025093816.44327-8-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8173-evb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts index 6dffada2e66b..2b66afcf026e 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts @@ -43,7 +43,7 @@ id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>; }; - usb_p1_vbus: regulator@0 { + usb_p1_vbus: regulator-usb-p1 { compatible = "regulator-fixed"; regulator-name = "usb_vbus"; regulator-min-microvolt = <5000000>; @@ -52,7 +52,7 @@ enable-active-high; }; - usb_p0_vbus: regulator@1 { + usb_p0_vbus: regulator-usb-p0 { compatible = "regulator-fixed"; regulator-name = "vbus"; regulator-min-microvolt = <5000000>; From 84ca356ec859478a2524a1885fae4bf7de4003b1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Jun 2022 11:06:23 -0700 Subject: [PATCH 43/68] perf/core: Add a new read format to get a number of lost samples [ Upstream commit 119a784c81270eb88e573174ed2209225d646656 ] Sometimes we want to know an accurate number of samples even if it's lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which might be shared with other events. So it's hard to know per-event lost count. Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from userspace. Original-patch-by: Jiri Olsa Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220616180623.1358843-1-namhyung@kernel.org Stable-dep-of: 382c27f4ed28 ("perf: Fix perf_event_validate_size()") Signed-off-by: Sasha Levin --- include/linux/perf_event.h | 2 ++ include/uapi/linux/perf_event.h | 5 ++++- kernel/events/core.c | 21 ++++++++++++++++++--- kernel/events/ring_buffer.c | 5 ++++- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4c7409e23318..9e0e20c11aa3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -702,6 +702,8 @@ struct perf_event { struct pid_namespace *ns; u64 id; + atomic64_t lost_samples; + u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index ceccd980ffcf..19c143a293be 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -273,6 +273,7 @@ enum { * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } && !PERF_FORMAT_GROUP * * { u64 nr; @@ -280,6 +281,7 @@ enum { * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } cntr[nr]; * } && PERF_FORMAT_GROUP * }; @@ -289,8 +291,9 @@ enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, PERF_FORMAT_GROUP = 1U << 3, + PERF_FORMAT_LOST = 1U << 4, - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ diff --git a/kernel/events/core.c b/kernel/events/core.c index cd32c7e55568..f5d5e0cdb223 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1726,6 +1726,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings) if (event->attr.read_format & PERF_FORMAT_ID) entry += sizeof(u64); + if (event->attr.read_format & PERF_FORMAT_LOST) + entry += sizeof(u64); + if (event->attr.read_format & PERF_FORMAT_GROUP) { nr += nr_siblings; size += sizeof(u64); @@ -4915,11 +4918,15 @@ static int __perf_read_group_add(struct perf_event *leader, values[n++] += perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&leader->lost_samples); for_each_sibling_event(sub, leader) { values[n++] += perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&sub->lost_samples); } unlock: @@ -4973,7 +4980,7 @@ static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; - u64 values[4]; + u64 values[5]; int n = 0; values[n++] = __perf_event_read_value(event, &enabled, &running); @@ -4983,6 +4990,8 @@ static int perf_read_one(struct perf_event *event, values[n++] = running; if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&event->lost_samples); if (copy_to_user(buf, values, n * sizeof(u64))) return -EFAULT; @@ -6312,7 +6321,7 @@ static void perf_output_read_one(struct perf_output_handle *handle, u64 enabled, u64 running) { u64 read_format = event->attr.read_format; - u64 values[4]; + u64 values[5]; int n = 0; values[n++] = perf_event_count(event); @@ -6326,6 +6335,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, } if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&event->lost_samples); __output_copy(handle, values, n * sizeof(u64)); } @@ -6336,7 +6347,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; - u64 values[5]; + u64 values[6]; int n = 0; values[n++] = 1 + leader->nr_siblings; @@ -6354,6 +6365,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&leader->lost_samples); __output_copy(handle, values, n * sizeof(u64)); @@ -6367,6 +6380,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&sub->lost_samples); __output_copy(handle, values, n * sizeof(u64)); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index fb3edb2f8ac9..679cc87b40f4 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -171,8 +171,10 @@ __perf_output_begin(struct perf_output_handle *handle, goto out; if (unlikely(rb->paused)) { - if (rb->nr_pages) + if (rb->nr_pages) { local_inc(&rb->lost); + atomic64_inc(&event->lost_samples); + } goto out; } @@ -255,6 +257,7 @@ __perf_output_begin(struct perf_output_handle *handle, fail: local_inc(&rb->lost); + atomic64_inc(&event->lost_samples); perf_output_put_handle(handle); out: rcu_read_unlock(); From 152f51d159f35b2f64d7046429703500375becc9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 Nov 2023 15:24:52 +0100 Subject: [PATCH 44/68] perf: Fix perf_event_validate_size() [ Upstream commit 382c27f4ed28f803b1f1473ac2d8db0afc795a1b ] Budimir noted that perf_event_validate_size() only checks the size of the newly added event, even though the sizes of all existing events can also change due to not all events having the same read_format. When we attach the new event, perf_group_attach(), we do re-compute the size for all events. Fixes: a723968c0ed3 ("perf: Fix u16 overflows") Reported-by: Budimir Markovic Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sasha Levin --- kernel/events/core.c | 61 +++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f5d5e0cdb223..fcc8e3823198 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1711,31 +1711,34 @@ static inline void perf_event__state_init(struct perf_event *event) PERF_EVENT_STATE_INACTIVE; } -static void __perf_event_read_size(struct perf_event *event, int nr_siblings) +static int __perf_event_read_size(u64 read_format, int nr_siblings) { int entry = sizeof(u64); /* value */ int size = 0; int nr = 1; - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) size += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) size += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_ID) + if (read_format & PERF_FORMAT_ID) entry += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_LOST) + if (read_format & PERF_FORMAT_LOST) entry += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_GROUP) { + if (read_format & PERF_FORMAT_GROUP) { nr += nr_siblings; size += sizeof(u64); } - size += entry * nr; - event->read_size = size; + /* + * Since perf_event_validate_size() limits this to 16k and inhibits + * adding more siblings, this will never overflow. + */ + return size + nr * entry; } static void __perf_event_header_size(struct perf_event *event, u64 sample_type) @@ -1776,8 +1779,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) */ static void perf_event__header_size(struct perf_event *event) { - __perf_event_read_size(event, - event->group_leader->nr_siblings); + event->read_size = + __perf_event_read_size(event->attr.read_format, + event->group_leader->nr_siblings); __perf_event_header_size(event, event->attr.sample_type); } @@ -1808,24 +1812,35 @@ static void perf_event__id_header_size(struct perf_event *event) event->id_header_size = size; } +/* + * Check that adding an event to the group does not result in anybody + * overflowing the 64k event limit imposed by the output buffer. + * + * Specifically, check that the read_size for the event does not exceed 16k, + * read_size being the one term that grows with groups size. Since read_size + * depends on per-event read_format, also (re)check the existing events. + * + * This leaves 48k for the constant size fields and things like callchains, + * branch stacks and register sets. + */ static bool perf_event_validate_size(struct perf_event *event) { - /* - * The values computed here will be over-written when we actually - * attach the event. - */ - __perf_event_read_size(event, event->group_leader->nr_siblings + 1); - __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); - perf_event__id_header_size(event); + struct perf_event *sibling, *group_leader = event->group_leader; - /* - * Sum the lot; should not exceed the 64k limit we have on records. - * Conservative limit to allow for callchains and other variable fields. - */ - if (event->read_size + event->header_size + - event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) + if (__perf_event_read_size(event->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) return false; + if (__perf_event_read_size(group_leader->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) + return false; + + for_each_sibling_event(sibling, group_leader) { + if (__perf_event_read_size(sibling->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) + return false; + } + return true; } From 05918dec9a1ee5f1924a6b9b3779ce8e8ca32e44 Mon Sep 17 00:00:00 2001 From: Boerge Struempfel Date: Wed, 29 Nov 2023 16:23:07 +0100 Subject: [PATCH 45/68] gpiolib: sysfs: Fix error handling on failed export [ Upstream commit 95dd1e34ff5bbee93a28ff3947eceaf6de811b1a ] If gpio_set_transitory() fails, we should free the GPIO again. Most notably, the flag FLAG_REQUESTED has previously been set in gpiod_request_commit(), and should be reset on failure. To my knowledge, this does not affect any current users, since the gpio_set_transitory() mainly returns 0 and -ENOTSUPP, which is converted to 0. However the gpio_set_transitory() function calles the .set_config() function of the corresponding GPIO chip and there are some GPIO drivers in which some (unlikely) branches return other values like -EPROBE_DEFER, and -EINVAL. In these cases, the above mentioned FLAG_REQUESTED would not be reset, which results in the pin being blocked until the next reboot. Fixes: e10f72bf4b3e ("gpio: gpiolib: Generalise state persistence beyond sleep") Signed-off-by: Boerge Struempfel Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib-sysfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c index 558cd900d399..b993416961a7 100644 --- a/drivers/gpio/gpiolib-sysfs.c +++ b/drivers/gpio/gpiolib-sysfs.c @@ -490,14 +490,17 @@ static ssize_t export_store(struct class *class, } status = gpiod_set_transitory(desc, false); - if (!status) { - status = gpiod_export(desc, true); - if (status < 0) - gpiod_free(desc); - else - set_bit(FLAG_SYSFS, &desc->flags); + if (status) { + gpiod_free(desc); + goto done; } + status = gpiod_export(desc, true); + if (status < 0) + gpiod_free(desc); + else + set_bit(FLAG_SYSFS, &desc->flags); + done: if (status) pr_debug("%s: status %d\n", __func__, status); From a1f29e995fd7957c6fc1c898a6f22c24bf3524c8 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 11 Mar 2023 23:39:55 +0100 Subject: [PATCH 46/68] mmc: core: add helpers mmc_regulator_enable/disable_vqmmc [ Upstream commit 8d91f3f8ae57e6292142ca89f322e90fa0d6ac02 ] There's a number of drivers (e.g. dw_mmc, meson-gx, mmci, sunxi) using the same mechanism and a private flag vqmmc_enabled to deal with enabling/disabling the vqmmc regulator. Move this to the core and create new helpers mmc_regulator_enable_vqmmc and mmc_regulator_disable_vqmmc. Signed-off-by: Heiner Kallweit Acked-by: Martin Blumenstingl Link: https://lore.kernel.org/r/71586432-360f-9b92-17f6-b05a8a971bc2@gmail.com Signed-off-by: Ulf Hansson Stable-dep-of: 477865af60b2 ("mmc: sdhci-sprd: Fix vqmmc not shutting down after the card was pulled") Signed-off-by: Sasha Levin --- drivers/mmc/core/regulator.c | 41 ++++++++++++++++++++++++++++++++++++ include/linux/mmc/host.h | 3 +++ 2 files changed, 44 insertions(+) diff --git a/drivers/mmc/core/regulator.c b/drivers/mmc/core/regulator.c index b6febbcf8978..327032cbbb1f 100644 --- a/drivers/mmc/core/regulator.c +++ b/drivers/mmc/core/regulator.c @@ -258,3 +258,44 @@ int mmc_regulator_get_supply(struct mmc_host *mmc) return 0; } EXPORT_SYMBOL_GPL(mmc_regulator_get_supply); + +/** + * mmc_regulator_enable_vqmmc - enable VQMMC regulator for a host + * @mmc: the host to regulate + * + * Returns 0 or errno. Enables the regulator for vqmmc. + * Keeps track of the enable status for ensuring that calls to + * regulator_enable/disable are balanced. + */ +int mmc_regulator_enable_vqmmc(struct mmc_host *mmc) +{ + int ret = 0; + + if (!IS_ERR(mmc->supply.vqmmc) && !mmc->vqmmc_enabled) { + ret = regulator_enable(mmc->supply.vqmmc); + if (ret < 0) + dev_err(mmc_dev(mmc), "enabling vqmmc regulator failed\n"); + else + mmc->vqmmc_enabled = true; + } + + return ret; +} +EXPORT_SYMBOL_GPL(mmc_regulator_enable_vqmmc); + +/** + * mmc_regulator_disable_vqmmc - disable VQMMC regulator for a host + * @mmc: the host to regulate + * + * Returns 0 or errno. Disables the regulator for vqmmc. + * Keeps track of the enable status for ensuring that calls to + * regulator_enable/disable are balanced. + */ +void mmc_regulator_disable_vqmmc(struct mmc_host *mmc) +{ + if (!IS_ERR(mmc->supply.vqmmc) && mmc->vqmmc_enabled) { + regulator_disable(mmc->supply.vqmmc); + mmc->vqmmc_enabled = false; + } +} +EXPORT_SYMBOL_GPL(mmc_regulator_disable_vqmmc); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index a0e1cf1bef4e..68880c2b4fe0 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -397,6 +397,7 @@ struct mmc_host { unsigned int use_blk_mq:1; /* use blk-mq */ unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */ unsigned int can_dma_map_merge:1; /* merging can be used */ + unsigned int vqmmc_enabled:1; /* vqmmc regulator is enabled */ int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ @@ -534,6 +535,8 @@ static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc, #endif int mmc_regulator_get_supply(struct mmc_host *mmc); +int mmc_regulator_enable_vqmmc(struct mmc_host *mmc); +void mmc_regulator_disable_vqmmc(struct mmc_host *mmc); static inline int mmc_card_is_removable(struct mmc_host *host) { From 1796ae6a7a8c51b2c0017898ea3ccacfc613504e Mon Sep 17 00:00:00 2001 From: Wenchao Chen Date: Wed, 15 Nov 2023 16:34:06 +0800 Subject: [PATCH 47/68] mmc: sdhci-sprd: Fix vqmmc not shutting down after the card was pulled [ Upstream commit 477865af60b2117ceaa1d558e03559108c15c78c ] With cat regulator_summary, we found that vqmmc was not shutting down after the card was pulled. cat /sys/kernel/debug/regulator/regulator_summary 1.before fix 1)Insert SD card vddsdio 1 1 0 unknown 3500mV 0mA 1200mV 3750mV 71100000.mmc-vqmmc 1 0mA 3500mV 3600mV 2)Pull out the SD card vddsdio 1 1 0 unknown 3500mV 0mA 1200mV 3750mV 71100000.mmc-vqmmc 1 0mA 3500mV 3600mV 2.after fix 1)Insert SD cardt vddsdio 1 1 0 unknown 3500mV 0mA 1200mV 3750mV 71100000.mmc-vqmmc 1 0mA 3500mV 3600mV 2)Pull out the SD card vddsdio 0 1 0 unknown 3500mV 0mA 1200mV 3750mV 71100000.mmc-vqmmc 0 0mA 3500mV 3600mV Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Wenchao Chen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231115083406.7368-1-wenchao.chen@unisoc.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-sprd.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 1b1dad9d9a60..b624589e62a0 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -381,12 +381,33 @@ static unsigned int sdhci_sprd_get_ro(struct sdhci_host *host) return 0; } +static void sdhci_sprd_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ + struct mmc_host *mmc = host->mmc; + + switch (mode) { + case MMC_POWER_OFF: + mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, 0); + + mmc_regulator_disable_vqmmc(mmc); + break; + case MMC_POWER_ON: + mmc_regulator_enable_vqmmc(mmc); + break; + case MMC_POWER_UP: + mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, vdd); + break; + } +} + static struct sdhci_ops sdhci_sprd_ops = { .read_l = sdhci_sprd_readl, .write_l = sdhci_sprd_writel, .write_w = sdhci_sprd_writew, .write_b = sdhci_sprd_writeb, .set_clock = sdhci_sprd_set_clock, + .set_power = sdhci_sprd_set_power, .get_max_clock = sdhci_sprd_get_max_clock, .get_min_clock = sdhci_sprd_get_min_clock, .set_bus_width = sdhci_set_bus_width, @@ -630,6 +651,10 @@ static int sdhci_sprd_probe(struct platform_device *pdev) host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_DDR50); + ret = mmc_regulator_get_supply(host->mmc); + if (ret) + goto pm_runtime_disable; + ret = sdhci_setup_host(host); if (ret) goto pm_runtime_disable; From fefc0559c58e3a2060967ef7bb4ad53cedc2cfde Mon Sep 17 00:00:00 2001 From: Konstantin Aladyshev Date: Wed, 6 Dec 2023 11:07:44 +0300 Subject: [PATCH 48/68] usb: gadget: f_hid: fix report descriptor allocation commit 61890dc28f7d9e9aac8a9471302613824c22fae4 upstream. The commit 89ff3dfac604 ("usb: gadget: f_hid: fix f_hidg lifetime vs cdev") has introduced a bug that leads to hid device corruption after the replug operation. Reverse device managed memory allocation for the report descriptor to fix the issue. Tested: This change was tested on the AMD EthanolX CRB server with the BMC based on the OpenBMC distribution. The BMC provides KVM functionality via the USB gadget device: - before: KVM page refresh results in a broken USB device, - after: KVM page refresh works without any issues. Fixes: 89ff3dfac604 ("usb: gadget: f_hid: fix f_hidg lifetime vs cdev") Cc: stable@vger.kernel.org Signed-off-by: Konstantin Aladyshev Link: https://lore.kernel.org/r/20231206080744.253-2-aladyshev22@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index c9d61d4dc9f5..571560d689c8 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -88,6 +88,7 @@ static void hidg_release(struct device *dev) { struct f_hidg *hidg = container_of(dev, struct f_hidg, dev); + kfree(hidg->report_desc); kfree(hidg->set_report_buf); kfree(hidg); } @@ -1293,9 +1294,9 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) hidg->report_length = opts->report_length; hidg->report_desc_length = opts->report_desc_length; if (opts->report_desc) { - hidg->report_desc = devm_kmemdup(&hidg->dev, opts->report_desc, - opts->report_desc_length, - GFP_KERNEL); + hidg->report_desc = kmemdup(opts->report_desc, + opts->report_desc_length, + GFP_KERNEL); if (!hidg->report_desc) { put_device(&hidg->dev); --opts->refcnt; From a9022cbdd0ae26f4aa35d3039263d58899c63ddd Mon Sep 17 00:00:00 2001 From: Cameron Williams Date: Thu, 2 Nov 2023 21:10:40 +0000 Subject: [PATCH 49/68] parport: Add support for Brainboxes IX/UC/PX parallel cards commit 1a031f6edc460e9562098bdedc3918da07c30a6e upstream. Adds support for Intashield IX-500/IX-550, UC-146/UC-157, PX-146/PX-157, PX-203 and PX-475 (LPT port) Cc: stable@vger.kernel.org Signed-off-by: Cameron Williams Acked-by: Sudip Mukherjee Link: https://lore.kernel.org/r/AS4PR02MB790389C130410BD864C8DCC9C4A6A@AS4PR02MB7903.eurprd02.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/parport/parport_pc.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 3bc0027b7844..23aced808242 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -2648,6 +2648,8 @@ enum parport_pc_pci_cards { netmos_9865, quatech_sppxp100, wch_ch382l, + brainboxes_uc146, + brainboxes_px203, }; @@ -2711,6 +2713,8 @@ static struct parport_pc_pci { /* netmos_9865 */ { 1, { { 0, -1 }, } }, /* quatech_sppxp100 */ { 1, { { 0, 1 }, } }, /* wch_ch382l */ { 1, { { 2, -1 }, } }, + /* brainboxes_uc146 */ { 1, { { 3, -1 }, } }, + /* brainboxes_px203 */ { 1, { { 0, -1 }, } }, }; static const struct pci_device_id parport_pc_pci_tbl[] = { @@ -2802,6 +2806,23 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 }, /* WCH CH382L PCI-E single parallel port card */ { 0x1c00, 0x3050, 0x1c00, 0x3050, 0, 0, wch_ch382l }, + /* Brainboxes IX-500/550 */ + { PCI_VENDOR_ID_INTASHIELD, 0x402a, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + /* Brainboxes UC-146/UC-157 */ + { PCI_VENDOR_ID_INTASHIELD, 0x0be1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_uc146 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0be2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_uc146 }, + /* Brainboxes PX-146/PX-257 */ + { PCI_VENDOR_ID_INTASHIELD, 0x401c, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + /* Brainboxes PX-203 */ + { PCI_VENDOR_ID_INTASHIELD, 0x4007, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_px203 }, + /* Brainboxes PX-475 */ + { PCI_VENDOR_ID_INTASHIELD, 0x401f, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, { 0, } /* terminate list */ }; MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl); From 880b035bc64ede44364a7bf6c7842b77d71b2389 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Wed, 29 Nov 2023 19:23:50 +0000 Subject: [PATCH 50/68] usb: typec: class: fix typec_altmode_put_partner to put plugs commit b17b7fe6dd5c6ff74b38b0758ca799cdbb79e26e upstream. When typec_altmode_put_partner is called by a plug altmode upon release, the port altmode the plug belongs to will not remove its reference to the plug. The check to see if the altmode being released evaluates against the released altmode's partner instead of the calling altmode itself, so change adev in typec_altmode_put_partner to properly refer to the altmode being released. typec_altmode_set_partner is not run for port altmodes, so also add a check in typec_altmode_release to prevent typec_altmode_put_partner() calls on port altmode release. Fixes: 8a37d87d72f0 ("usb: typec: Bus type for alternate modes") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231129192349.1773623-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index db926641b79d..79bebda96c55 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -193,7 +193,7 @@ static void typec_altmode_put_partner(struct altmode *altmode) if (!partner) return; - adev = &partner->adev; + adev = &altmode->adev; if (is_typec_plug(adev->dev.parent)) { struct typec_plug *plug = to_typec_plug(adev->dev.parent); @@ -465,7 +465,8 @@ static void typec_altmode_release(struct device *dev) { struct altmode *alt = to_altmode(to_typec_altmode(dev)); - typec_altmode_put_partner(alt); + if (!is_typec_port(dev->parent)) + typec_altmode_put_partner(alt); altmode_id_remove(alt->adev.dev.parent, alt->id); kfree(alt); From d896c47f8cfc61b138533853af272185875afff3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Nov 2023 18:15:03 +0100 Subject: [PATCH 51/68] ARM: PL011: Fix DMA support commit 58ac1b3799799069d53f5bf95c093f2fe8dd3cc5 upstream. Since there is no guarantee that the memory returned by dma_alloc_coherent() is associated with a 'struct page', using the architecture specific phys_to_page() is wrong, but using virt_to_page() would be as well. Stop using sg lists altogether and just use the *_single() functions instead. This also simplifies the code a bit since the scatterlists in this driver always have only one entry anyway. https://lore.kernel.org/lkml/86db0fe5-930d-4cbb-bd7d-03367da38951@app.fastmail.com/ Use consistent names for dma buffers gc: Add a commit log from the initial thread: https://lore.kernel.org/lkml/86db0fe5-930d-4cbb-bd7d-03367da38951@app.fastmail.com/ Use consistent names for dma buffers Fixes: cb06ff102e2d7 ("ARM: PL011: Add support for Rx DMA buffer polling.") Signed-off-by: Arnd Bergmann Tested-by: Gregory CLEMENT Signed-off-by: Gregory CLEMENT Cc: stable Link: https://lore.kernel.org/r/20231122171503.235649-1-gregory.clement@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 112 +++++++++++++++----------------- 1 file changed, 54 insertions(+), 58 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 86084090232d..5f9d0ae8129d 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -227,17 +227,18 @@ static struct vendor_data vendor_zte = { /* Deals with DMA transactions */ -struct pl011_sgbuf { - struct scatterlist sg; - char *buf; +struct pl011_dmabuf { + dma_addr_t dma; + size_t len; + char *buf; }; struct pl011_dmarx_data { struct dma_chan *chan; struct completion complete; bool use_buf_b; - struct pl011_sgbuf sgbuf_a; - struct pl011_sgbuf sgbuf_b; + struct pl011_dmabuf dbuf_a; + struct pl011_dmabuf dbuf_b; dma_cookie_t cookie; bool running; struct timer_list timer; @@ -250,7 +251,8 @@ struct pl011_dmarx_data { struct pl011_dmatx_data { struct dma_chan *chan; - struct scatterlist sg; + dma_addr_t dma; + size_t len; char *buf; bool queued; }; @@ -371,32 +373,24 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap) #define PL011_DMA_BUFFER_SIZE PAGE_SIZE -static int pl011_sgbuf_init(struct dma_chan *chan, struct pl011_sgbuf *sg, +static int pl011_dmabuf_init(struct dma_chan *chan, struct pl011_dmabuf *db, enum dma_data_direction dir) { - dma_addr_t dma_addr; - - sg->buf = dma_alloc_coherent(chan->device->dev, - PL011_DMA_BUFFER_SIZE, &dma_addr, GFP_KERNEL); - if (!sg->buf) + db->buf = dma_alloc_coherent(chan->device->dev, PL011_DMA_BUFFER_SIZE, + &db->dma, GFP_KERNEL); + if (!db->buf) return -ENOMEM; - - sg_init_table(&sg->sg, 1); - sg_set_page(&sg->sg, phys_to_page(dma_addr), - PL011_DMA_BUFFER_SIZE, offset_in_page(dma_addr)); - sg_dma_address(&sg->sg) = dma_addr; - sg_dma_len(&sg->sg) = PL011_DMA_BUFFER_SIZE; + db->len = PL011_DMA_BUFFER_SIZE; return 0; } -static void pl011_sgbuf_free(struct dma_chan *chan, struct pl011_sgbuf *sg, +static void pl011_dmabuf_free(struct dma_chan *chan, struct pl011_dmabuf *db, enum dma_data_direction dir) { - if (sg->buf) { + if (db->buf) { dma_free_coherent(chan->device->dev, - PL011_DMA_BUFFER_SIZE, sg->buf, - sg_dma_address(&sg->sg)); + PL011_DMA_BUFFER_SIZE, db->buf, db->dma); } } @@ -557,8 +551,8 @@ static void pl011_dma_tx_callback(void *data) spin_lock_irqsave(&uap->port.lock, flags); if (uap->dmatx.queued) - dma_unmap_sg(dmatx->chan->device->dev, &dmatx->sg, 1, - DMA_TO_DEVICE); + dma_unmap_single(dmatx->chan->device->dev, dmatx->dma, + dmatx->len, DMA_TO_DEVICE); dmacr = uap->dmacr; uap->dmacr = dmacr & ~UART011_TXDMAE; @@ -644,18 +638,19 @@ static int pl011_dma_tx_refill(struct uart_amba_port *uap) memcpy(&dmatx->buf[first], &xmit->buf[0], second); } - dmatx->sg.length = count; - - if (dma_map_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE) != 1) { + dmatx->len = count; + dmatx->dma = dma_map_single(dma_dev->dev, dmatx->buf, count, + DMA_TO_DEVICE); + if (dmatx->dma == DMA_MAPPING_ERROR) { uap->dmatx.queued = false; dev_dbg(uap->port.dev, "unable to map TX DMA\n"); return -EBUSY; } - desc = dmaengine_prep_slave_sg(chan, &dmatx->sg, 1, DMA_MEM_TO_DEV, + desc = dmaengine_prep_slave_single(chan, dmatx->dma, dmatx->len, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc) { - dma_unmap_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE); + dma_unmap_single(dma_dev->dev, dmatx->dma, dmatx->len, DMA_TO_DEVICE); uap->dmatx.queued = false; /* * If DMA cannot be used right now, we complete this @@ -819,8 +814,8 @@ __acquires(&uap->port.lock) dmaengine_terminate_async(uap->dmatx.chan); if (uap->dmatx.queued) { - dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, - DMA_TO_DEVICE); + dma_unmap_single(uap->dmatx.chan->device->dev, uap->dmatx.dma, + uap->dmatx.len, DMA_TO_DEVICE); uap->dmatx.queued = false; uap->dmacr &= ~UART011_TXDMAE; pl011_write(uap->dmacr, uap, REG_DMACR); @@ -834,15 +829,15 @@ static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap) struct dma_chan *rxchan = uap->dmarx.chan; struct pl011_dmarx_data *dmarx = &uap->dmarx; struct dma_async_tx_descriptor *desc; - struct pl011_sgbuf *sgbuf; + struct pl011_dmabuf *dbuf; if (!rxchan) return -EIO; /* Start the RX DMA job */ - sgbuf = uap->dmarx.use_buf_b ? - &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; - desc = dmaengine_prep_slave_sg(rxchan, &sgbuf->sg, 1, + dbuf = uap->dmarx.use_buf_b ? + &uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a; + desc = dmaengine_prep_slave_single(rxchan, dbuf->dma, dbuf->len, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); /* @@ -882,8 +877,8 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, bool readfifo) { struct tty_port *port = &uap->port.state->port; - struct pl011_sgbuf *sgbuf = use_buf_b ? - &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; + struct pl011_dmabuf *dbuf = use_buf_b ? + &uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a; int dma_count = 0; u32 fifotaken = 0; /* only used for vdbg() */ @@ -892,7 +887,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, if (uap->dmarx.poll_rate) { /* The data can be taken by polling */ - dmataken = sgbuf->sg.length - dmarx->last_residue; + dmataken = dbuf->len - dmarx->last_residue; /* Recalculate the pending size */ if (pending >= dmataken) pending -= dmataken; @@ -906,7 +901,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, * Note that tty_insert_flip_buf() tries to take as many chars * as it can. */ - dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, + dma_count = tty_insert_flip_string(port, dbuf->buf + dmataken, pending); uap->port.icount.rx += dma_count; @@ -917,7 +912,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, /* Reset the last_residue for Rx DMA poll */ if (uap->dmarx.poll_rate) - dmarx->last_residue = sgbuf->sg.length; + dmarx->last_residue = dbuf->len; /* * Only continue with trying to read the FIFO if all DMA chars have @@ -954,8 +949,8 @@ static void pl011_dma_rx_irq(struct uart_amba_port *uap) { struct pl011_dmarx_data *dmarx = &uap->dmarx; struct dma_chan *rxchan = dmarx->chan; - struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ? - &dmarx->sgbuf_b : &dmarx->sgbuf_a; + struct pl011_dmabuf *dbuf = dmarx->use_buf_b ? + &dmarx->dbuf_b : &dmarx->dbuf_a; size_t pending; struct dma_tx_state state; enum dma_status dmastat; @@ -977,7 +972,7 @@ static void pl011_dma_rx_irq(struct uart_amba_port *uap) pl011_write(uap->dmacr, uap, REG_DMACR); uap->dmarx.running = false; - pending = sgbuf->sg.length - state.residue; + pending = dbuf->len - state.residue; BUG_ON(pending > PL011_DMA_BUFFER_SIZE); /* Then we terminate the transfer - we now know our residue */ dmaengine_terminate_all(rxchan); @@ -1004,8 +999,8 @@ static void pl011_dma_rx_callback(void *data) struct pl011_dmarx_data *dmarx = &uap->dmarx; struct dma_chan *rxchan = dmarx->chan; bool lastbuf = dmarx->use_buf_b; - struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ? - &dmarx->sgbuf_b : &dmarx->sgbuf_a; + struct pl011_dmabuf *dbuf = dmarx->use_buf_b ? + &dmarx->dbuf_b : &dmarx->dbuf_a; size_t pending; struct dma_tx_state state; int ret; @@ -1023,7 +1018,7 @@ static void pl011_dma_rx_callback(void *data) * the DMA irq handler. So we check the residue here. */ rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); - pending = sgbuf->sg.length - state.residue; + pending = dbuf->len - state.residue; BUG_ON(pending > PL011_DMA_BUFFER_SIZE); /* Then we terminate the transfer - we now know our residue */ dmaengine_terminate_all(rxchan); @@ -1075,16 +1070,16 @@ static void pl011_dma_rx_poll(struct timer_list *t) unsigned long flags = 0; unsigned int dmataken = 0; unsigned int size = 0; - struct pl011_sgbuf *sgbuf; + struct pl011_dmabuf *dbuf; int dma_count; struct dma_tx_state state; - sgbuf = dmarx->use_buf_b ? &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; + dbuf = dmarx->use_buf_b ? &uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a; rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); if (likely(state.residue < dmarx->last_residue)) { - dmataken = sgbuf->sg.length - dmarx->last_residue; + dmataken = dbuf->len - dmarx->last_residue; size = dmarx->last_residue - state.residue; - dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, + dma_count = tty_insert_flip_string(port, dbuf->buf + dmataken, size); if (dma_count == size) dmarx->last_residue = state.residue; @@ -1131,7 +1126,7 @@ static void pl011_dma_startup(struct uart_amba_port *uap) return; } - sg_init_one(&uap->dmatx.sg, uap->dmatx.buf, PL011_DMA_BUFFER_SIZE); + uap->dmatx.len = PL011_DMA_BUFFER_SIZE; /* The DMA buffer is now the FIFO the TTY subsystem can use */ uap->port.fifosize = PL011_DMA_BUFFER_SIZE; @@ -1141,7 +1136,7 @@ static void pl011_dma_startup(struct uart_amba_port *uap) goto skip_rx; /* Allocate and map DMA RX buffers */ - ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_a, + ret = pl011_dmabuf_init(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE); if (ret) { dev_err(uap->port.dev, "failed to init DMA %s: %d\n", @@ -1149,12 +1144,12 @@ static void pl011_dma_startup(struct uart_amba_port *uap) goto skip_rx; } - ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_b, + ret = pl011_dmabuf_init(uap->dmarx.chan, &uap->dmarx.dbuf_b, DMA_FROM_DEVICE); if (ret) { dev_err(uap->port.dev, "failed to init DMA %s: %d\n", "RX buffer B", ret); - pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, + pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE); goto skip_rx; } @@ -1208,8 +1203,9 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap) /* In theory, this should already be done by pl011_dma_flush_buffer */ dmaengine_terminate_all(uap->dmatx.chan); if (uap->dmatx.queued) { - dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, - DMA_TO_DEVICE); + dma_unmap_single(uap->dmatx.chan->device->dev, + uap->dmatx.dma, uap->dmatx.len, + DMA_TO_DEVICE); uap->dmatx.queued = false; } @@ -1220,8 +1216,8 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap) if (uap->using_rx_dma) { dmaengine_terminate_all(uap->dmarx.chan); /* Clean up the RX DMA */ - pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, DMA_FROM_DEVICE); - pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_b, DMA_FROM_DEVICE); + pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE); + pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_b, DMA_FROM_DEVICE); if (uap->dmarx.poll_rate) del_timer_sync(&uap->dmarx.timer); uap->using_rx_dma = false; From ce79cf407c64edd0b38c924f4547e6d7a2ede24b Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 23 Nov 2023 08:28:18 +0100 Subject: [PATCH 52/68] serial: sc16is7xx: address RX timeout interrupt errata commit 08ce9a1b72e38cf44c300a44ac5858533eb3c860 upstream. This device has a silicon bug that makes it report a timeout interrupt but no data in the FIFO. The datasheet states the following in the errata section 18.1.4: "If the host reads the receive FIFO at the same time as a time-out interrupt condition happens, the host might read 0xCC (time-out) in the Interrupt Indication Register (IIR), but bit 0 of the Line Status Register (LSR) is not set (means there is no data in the receive FIFO)." The errata description seems to indicate it concerns only polled mode of operation when reading bit 0 of the LSR register. However, tests have shown and NXP has confirmed that the RXLVL register also yields 0 when the bug is triggered, and hence the IRQ driven implementation in this driver is equally affected. This bug has hit us on production units and when it does, sc16is7xx_irq() would spin forever because sc16is7xx_port_irq() keeps seeing an interrupt in the IIR register that is not cleared because the driver does not call into sc16is7xx_handle_rx() unless the RXLVL register reports at least one byte in the FIFO. Fix this by always reading one byte from the FIFO when this condition is detected in order to clear the interrupt. This approach was confirmed to be correct by NXP through their support channels. Tested by: Hugo Villeneuve Signed-off-by: Daniel Mack Co-Developed-by: Maxim Popov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231123072818.1394539-1-daniel@zonque.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 091cf5fe9030..42e1e6101485 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -694,6 +694,18 @@ static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) case SC16IS7XX_IIR_RTOI_SRC: case SC16IS7XX_IIR_XOFFI_SRC: rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG); + + /* + * There is a silicon bug that makes the chip report a + * time-out interrupt but no data in the FIFO. This is + * described in errata section 18.1.4. + * + * When this happens, read one byte from the FIFO to + * clear the interrupt. + */ + if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen) + rxlen = 1; + if (rxlen) sc16is7xx_handle_rx(port, rxlen, iir); break; From 3371eac21119e2401dbd78eefbb6e3acb87dc96f Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Tue, 31 Oct 2023 14:12:42 +0100 Subject: [PATCH 53/68] serial: 8250_omap: Add earlycon support for the AM654 UART controller commit 8e42c301ce64e0dcca547626eb486877d502d336 upstream. Currently there is no support for earlycon on the AM654 UART controller. This commit adds it. Signed-off-by: Ronald Wahl Reviewed-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20231031131242.15516-1-rwahl@gmx.de Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_early.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c index 5cd8c36c8fcc..2acaea5f3232 100644 --- a/drivers/tty/serial/8250/8250_early.c +++ b/drivers/tty/serial/8250/8250_early.c @@ -176,6 +176,7 @@ static int __init early_omap8250_setup(struct earlycon_device *device, OF_EARLYCON_DECLARE(omap8250, "ti,omap2-uart", early_omap8250_setup); OF_EARLYCON_DECLARE(omap8250, "ti,omap3-uart", early_omap8250_setup); OF_EARLYCON_DECLARE(omap8250, "ti,omap4-uart", early_omap8250_setup); +OF_EARLYCON_DECLARE(omap8250, "ti,am654-uart", early_omap8250_setup); #endif From 1aee33d43d6c2659442e4aba574b045e1d928029 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 1 Dec 2023 19:37:27 +0100 Subject: [PATCH 54/68] x86/CPU/AMD: Check vendor in the AMD microcode callback commit 9b8493dc43044376716d789d07699f17d538a7c4 upstream. Commit in Fixes added an AMD-specific microcode callback. However, it didn't check the CPU vendor the kernel runs on explicitly. The only reason the Zenbleed check in it didn't run on other x86 vendors hardware was pure coincidental luck: if (!cpu_has_amd_erratum(c, amd_zenbleed)) return; gives true on other vendors because they don't have those families and models. However, with the removal of the cpu_has_amd_erratum() in 05f5f73936fa ("x86/CPU/AMD: Drop now unused CPU erratum checking function") that coincidental condition is gone, leading to the zenbleed check getting executed on other vendors too. Add the explicit vendor check for the whole callback as it should've been done in the first place. Fixes: 522b1d69219d ("x86/cpu/amd: Add a Zenbleed fix") Cc: Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20231201184226.16749-1-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index eb3cd4ad45ae..b2cdf1c07e56 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1248,5 +1248,8 @@ static void zenbleed_check_cpu(void *unused) void amd_check_microcode(void) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return; + on_each_cpu(zenbleed_check_cpu, NULL, 1); } From 77a353924d8f2101f1c1e7b46a61328c949d7388 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 9 Nov 2023 13:36:24 +0100 Subject: [PATCH 55/68] KVM: s390/mm: Properly reset no-dat commit 27072b8e18a73ffeffb1c140939023915a35134b upstream. When the CMMA state needs to be reset, the no-dat bit also needs to be reset. Failure to do so could cause issues in the guest, since the guest expects the bit to be cleared after a reset. Cc: Reviewed-by: Nico Boehr Message-ID: <20231109123624.37314-1-imbrenda@linux.ibm.com> Signed-off-by: Claudio Imbrenda Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 28ca07360e97..c2ed9b859860 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -699,7 +699,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); } if (reset) - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgste_set_unlock(ptep, pgste); preempt_enable(); } From 32f4536c108f781f3442c58ebbeaaf44a1afc3ab Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 29 Nov 2023 23:15:47 +0900 Subject: [PATCH 56/68] nilfs2: fix missing error check for sb_set_blocksize call commit d61d0ab573649789bf9eb909c89a1a193b2e3d10 upstream. When mounting a filesystem image with a block size larger than the page size, nilfs2 repeatedly outputs long error messages with stack traces to the kernel log, such as the following: getblk(): invalid block size 8192 requested logical block size: 512 ... Call Trace: dump_stack_lvl+0x92/0xd4 dump_stack+0xd/0x10 bdev_getblk+0x33a/0x354 __breadahead+0x11/0x80 nilfs_search_super_root+0xe2/0x704 [nilfs2] load_nilfs+0x72/0x504 [nilfs2] nilfs_mount+0x30f/0x518 [nilfs2] legacy_get_tree+0x1b/0x40 vfs_get_tree+0x18/0xc4 path_mount+0x786/0xa88 __ia32_sys_mount+0x147/0x1a8 __do_fast_syscall_32+0x56/0xc8 do_fast_syscall_32+0x29/0x58 do_SYSENTER_32+0x15/0x18 entry_SYSENTER_32+0x98/0xf1 ... This overloads the system logger. And to make matters worse, it sometimes crashes the kernel with a memory access violation. This is because the return value of the sb_set_blocksize() call, which should be checked for errors, is not checked. The latter issue is due to out-of-buffer memory being accessed based on a large block size that caused sb_set_blocksize() to fail for buffers read with the initial minimum block size that remained unupdated in the super_block structure. Since nilfs2 mkfs tool does not accept block sizes larger than the system page size, this has been overlooked. However, it is possible to create this situation by intentionally modifying the tool or by passing a filesystem image created on a system with a large page size to a system with a smaller page size and mounting it. Fix this issue by inserting the expected error handling for the call to sb_set_blocksize(). Link: https://lkml.kernel.org/r/20231129141547.4726-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/the_nilfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index d550a564645e..c8d869bc25b0 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -688,7 +688,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) goto failed_sbh; } nilfs_release_super_block(nilfs); - sb_set_blocksize(sb, blocksize); + if (!sb_set_blocksize(sb, blocksize)) { + nilfs_msg(sb, KERN_ERR, "bad blocksize %d", blocksize); + err = -EINVAL; + goto out; + } err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); if (err) From 18824f592aad4124d79751bbc1500ea86ac3ff29 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 6 Dec 2023 13:26:47 +0000 Subject: [PATCH 57/68] io_uring/af_unix: disable sending io_uring over sockets commit 705318a99a138c29a512a72c3e0043b3cd7f55f4 upstream. File reference cycles have caused lots of problems for io_uring in the past, and it still doesn't work exactly right and races with unix_stream_read_generic(). The safest fix would be to completely disallow sending io_uring files via sockets via SCM_RIGHT, so there are no possible cycles invloving registered files and thus rendering SCM accounting on the io_uring side unnecessary. Cc: Fixes: 0091bfc81741b ("io_uring/af_unix: defer registered files gc to io_uring release") Reported-and-suggested-by: Jann Horn Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/c716c88321939156909cfa1bd8b0faaf1c804103.1701868795.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 101 +------------------------------------------------ net/core/scm.c | 6 +++ 2 files changed, 7 insertions(+), 100 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index adc263400471..9de8961763b0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3137,101 +3137,6 @@ static void io_finish_async(struct io_ring_ctx *ctx) } } -#if defined(CONFIG_UNIX) -static void io_destruct_skb(struct sk_buff *skb) -{ - struct io_ring_ctx *ctx = skb->sk->sk_user_data; - int i; - - for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++) - if (ctx->sqo_wq[i]) - flush_workqueue(ctx->sqo_wq[i]); - - unix_destruct_scm(skb); -} - -/* - * Ensure the UNIX gc is aware of our file set, so we are certain that - * the io_uring can be safely unregistered on process exit, even if we have - * loops in the file referencing. - */ -static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) -{ - struct sock *sk = ctx->ring_sock->sk; - struct scm_fp_list *fpl; - struct sk_buff *skb; - int i; - - fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); - if (!fpl) - return -ENOMEM; - - skb = alloc_skb(0, GFP_KERNEL); - if (!skb) { - kfree(fpl); - return -ENOMEM; - } - - skb->sk = sk; - skb->scm_io_uring = 1; - skb->destructor = io_destruct_skb; - - fpl->user = get_uid(ctx->user); - for (i = 0; i < nr; i++) { - fpl->fp[i] = get_file(ctx->user_files[i + offset]); - unix_inflight(fpl->user, fpl->fp[i]); - } - - fpl->max = fpl->count = nr; - UNIXCB(skb).fp = fpl; - refcount_add(skb->truesize, &sk->sk_wmem_alloc); - skb_queue_head(&sk->sk_receive_queue, skb); - - for (i = 0; i < nr; i++) - fput(fpl->fp[i]); - - return 0; -} - -/* - * If UNIX sockets are enabled, fd passing can cause a reference cycle which - * causes regular reference counting to break down. We rely on the UNIX - * garbage collection to take care of this problem for us. - */ -static int io_sqe_files_scm(struct io_ring_ctx *ctx) -{ - unsigned left, total; - int ret = 0; - - total = 0; - left = ctx->nr_user_files; - while (left) { - unsigned this_files = min_t(unsigned, left, SCM_MAX_FD); - - ret = __io_sqe_files_scm(ctx, this_files, total); - if (ret) - break; - left -= this_files; - total += this_files; - } - - if (!ret) - return 0; - - while (total < ctx->nr_user_files) { - fput(ctx->user_files[total]); - total++; - } - - return ret; -} -#else -static int io_sqe_files_scm(struct io_ring_ctx *ctx) -{ - return 0; -} -#endif - static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) { @@ -3285,11 +3190,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return ret; } - ret = io_sqe_files_scm(ctx); - if (ret) - io_sqe_files_unregister(ctx); - - return ret; + return 0; } static int io_sq_offload_start(struct io_ring_ctx *ctx, diff --git a/net/core/scm.c b/net/core/scm.c index 31a38239c92f..5525c14f33f1 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -103,6 +104,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; + /* don't allow io_uring files */ + if (io_uring_get_socket(file)) { + fput(file); + return -EINVAL; + } *fpp++ = file; fpl->count++; } From a149fbadb9be7b5a7d814715b0ff7014381a30cf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 11 Dec 2023 14:41:30 +0200 Subject: [PATCH 58/68] netlink: don't call ->netlink_bind with table lock held From: Florian Westphal commit f2764bd4f6a8dffaec3e220728385d9756b3c2cb upstream. When I added support to allow generic netlink multicast groups to be restricted to subscribers with CAP_NET_ADMIN I was unaware that a genl_bind implementation already existed in the past. It was reverted due to ABBA deadlock: 1. ->netlink_bind gets called with the table lock held. 2. genetlink bind callback is invoked, it grabs the genl lock. But when a new genl subsystem is (un)registered, these two locks are taken in reverse order. One solution would be to revert again and add a comment in genl referring 1e82a62fec613, "genetlink: remove genl_bind"). This would need a second change in mptcp to not expose the raw token value anymore, e.g. by hashing the token with a secret key so userspace can still associate subflow events with the correct mptcp connection. However, Paolo Abeni reminded me to double-check why the netlink table is locked in the first place. I can't find one. netlink_bind() is already called without this lock when userspace joins a group via NETLINK_ADD_MEMBERSHIP setsockopt. Same holds for the netlink_unbind operation. Digging through the history, commit f773608026ee1 ("netlink: access nlk groups safely in netlink bind and getname") expanded the lock scope. commit 3a20773beeeeade ("net: netlink: cap max groups which will be considered in netlink_bind()") ... removed the nlk->ngroups access that the lock scope extension was all about. Reduce the lock scope again and always call ->netlink_bind without the table lock. The Fixes tag should be vs. the patch mentioned in the link below, but that one got squash-merged into the patch that came earlier in the series. Fixes: 4d54cc32112d8d ("mptcp: avoid lock_fast usage in accept path") Link: https://lore.kernel.org/mptcp/20210213000001.379332-8-mathew.j.martineau@linux.intel.com/T/#u Cc: Cong Wang Cc: Xin Long Cc: Johannes Berg Cc: Sean Tranchetti Cc: Paolo Abeni Cc: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Ido Schimmel Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 29eabd45b832..3cb27a27b420 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1020,7 +1020,6 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; } - netlink_lock_table(); if (nlk->netlink_bind && groups) { int group; @@ -1032,13 +1031,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (!err) continue; netlink_undo_bind(group, groups, sk); - goto unlock; + return err; } } /* No need for barriers here as we return to user-space without * using any of the bound attributes. */ + netlink_lock_table(); if (!bound) { err = nladdr->nl_pid ? netlink_insert(sk, nladdr->nl_pid) : From 263bffd2b6aad5deedb5b65b1dab02f09052be66 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 11 Dec 2023 14:41:31 +0200 Subject: [PATCH 59/68] genetlink: add CAP_NET_ADMIN test for multicast bind This is a partial backport of upstream commit 4d54cc32112d ("mptcp: avoid lock_fast usage in accept path"). It is only a partial backport because the patch in the link below was erroneously squash-merged into upstream commit 4d54cc32112d ("mptcp: avoid lock_fast usage in accept path"). Below is the original patch description from Florian Westphal: " genetlink sets NL_CFG_F_NONROOT_RECV for its netlink socket so anyone can subscribe to multicast messages. rtnetlink doesn't allow this unconditionally, rtnetlink_bind() restricts bind requests to CAP_NET_ADMIN for a few groups. This allows to set GENL_UNS_ADMIN_PERM flag on genl mcast groups to mandate CAP_NET_ADMIN. This will be used by the upcoming mptcp netlink event facility which exposes the token (mptcp connection identifier) to userspace. " Link: https://lore.kernel.org/mptcp/20210213000001.379332-8-mathew.j.martineau@linux.intel.com/ Signed-off-by: Ido Schimmel Signed-off-by: Greg Kroah-Hartman --- include/net/genetlink.h | 1 + net/netlink/genetlink.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 2d9e67a69cbe..a8c9c8d1eb51 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -14,6 +14,7 @@ */ struct genl_multicast_group { char name[GENL_NAMSIZ]; + u8 flags; }; struct genl_ops; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 102b8d6b5612..34e3c8eb5911 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -989,11 +989,43 @@ static struct genl_family genl_ctrl __ro_after_init = { .netnsok = true, }; +static int genl_bind(struct net *net, int group) +{ + const struct genl_family *family; + unsigned int id; + int ret = 0; + + genl_lock_all(); + + idr_for_each_entry(&genl_fam_idr, family, id) { + const struct genl_multicast_group *grp; + int i; + + if (family->n_mcgrps == 0) + continue; + + i = group - family->mcgrp_offset; + if (i < 0 || i >= family->n_mcgrps) + continue; + + grp = &family->mcgrps[i]; + if ((grp->flags & GENL_UNS_ADMIN_PERM) && + !ns_capable(net->user_ns, CAP_NET_ADMIN)) + ret = -EPERM; + + break; + } + + genl_unlock_all(); + return ret; +} + static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, + .bind = genl_bind, }; /* we'll bump the group number right afterwards */ From fe8402511ed80ce9a7ee696f457a3d856d52cd47 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 11 Dec 2023 14:41:32 +0200 Subject: [PATCH 60/68] psample: Require 'CAP_NET_ADMIN' when joining "packets" group commit 44ec98ea5ea9cfecd31a5c4cc124703cb5442832 upstream. The "psample" generic netlink family notifies sampled packets over the "packets" multicast group. This is problematic since by default generic netlink allows non-root users to listen to these notifications. Fix by marking the group with the 'GENL_UNS_ADMIN_PERM' flag. This will prevent non-root users or root without the 'CAP_NET_ADMIN' capability (in the user namespace owning the network namespace) from joining the group. Tested using [1]. Before: # capsh -- -c ./psample_repo # capsh --drop=cap_net_admin -- -c ./psample_repo After: # capsh -- -c ./psample_repo # capsh --drop=cap_net_admin -- -c ./psample_repo Failed to join "packets" multicast group [1] $ cat psample.c #include #include #include #include int join_grp(struct nl_sock *sk, const char *grp_name) { int grp, err; grp = genl_ctrl_resolve_grp(sk, "psample", grp_name); if (grp < 0) { fprintf(stderr, "Failed to resolve \"%s\" multicast group\n", grp_name); return grp; } err = nl_socket_add_memberships(sk, grp, NFNLGRP_NONE); if (err) { fprintf(stderr, "Failed to join \"%s\" multicast group\n", grp_name); return err; } return 0; } int main(int argc, char **argv) { struct nl_sock *sk; int err; sk = nl_socket_alloc(); if (!sk) { fprintf(stderr, "Failed to allocate socket\n"); return -1; } err = genl_connect(sk); if (err) { fprintf(stderr, "Failed to connect socket\n"); return err; } err = join_grp(sk, "config"); if (err) return err; err = join_grp(sk, "packets"); if (err) return err; return 0; } $ gcc -I/usr/include/libnl3 -lnl-3 -lnl-genl-3 -o psample_repo psample.c Fixes: 6ae0a6286171 ("net: Introduce psample, a new genetlink channel for packet sampling") Reported-by: "The UK's National Cyber Security Centre (NCSC)" Signed-off-by: Ido Schimmel Reviewed-by: Jacob Keller Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231206213102.1824398-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/psample/psample.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/psample/psample.c b/net/psample/psample.c index 6f2fbc6b9eb2..6d29983cbd07 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -28,7 +28,8 @@ enum psample_nl_multicast_groups { static const struct genl_multicast_group psample_nl_mcgrps[] = { [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME }, - [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME }, + [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME, + .flags = GENL_UNS_ADMIN_PERM }, }; static struct genl_family psample_nl_family __ro_after_init; From 4a341627a10959ef0db16f758bfd0d10fa9d73eb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 11 Dec 2023 14:41:33 +0200 Subject: [PATCH 61/68] drop_monitor: Require 'CAP_SYS_ADMIN' when joining "events" group commit e03781879a0d524ce3126678d50a80484a513c4b upstream. The "NET_DM" generic netlink family notifies drop locations over the "events" multicast group. This is problematic since by default generic netlink allows non-root users to listen to these notifications. Fix by adding a new field to the generic netlink multicast group structure that when set prevents non-root users or root without the 'CAP_SYS_ADMIN' capability (in the user namespace owning the network namespace) from joining the group. Set this field for the "events" group. Use 'CAP_SYS_ADMIN' rather than 'CAP_NET_ADMIN' because of the nature of the information that is shared over this group. Note that the capability check in this case will always be performed against the initial user namespace since the family is not netns aware and only operates in the initial network namespace. A new field is added to the structure rather than using the "flags" field because the existing field uses uAPI flags and it is inappropriate to add a new uAPI flag for an internal kernel check. In net-next we can rework the "flags" field to use internal flags and fold the new field into it. But for now, in order to reduce the amount of changes, add a new field. Since the information can only be consumed by root, mark the control plane operations that start and stop the tracing as root-only using the 'GENL_ADMIN_PERM' flag. Tested using [1]. Before: # capsh -- -c ./dm_repo # capsh --drop=cap_sys_admin -- -c ./dm_repo After: # capsh -- -c ./dm_repo # capsh --drop=cap_sys_admin -- -c ./dm_repo Failed to join "events" multicast group [1] $ cat dm.c #include #include #include #include int main(int argc, char **argv) { struct nl_sock *sk; int grp, err; sk = nl_socket_alloc(); if (!sk) { fprintf(stderr, "Failed to allocate socket\n"); return -1; } err = genl_connect(sk); if (err) { fprintf(stderr, "Failed to connect socket\n"); return err; } grp = genl_ctrl_resolve_grp(sk, "NET_DM", "events"); if (grp < 0) { fprintf(stderr, "Failed to resolve \"events\" multicast group\n"); return grp; } err = nl_socket_add_memberships(sk, grp, NFNLGRP_NONE); if (err) { fprintf(stderr, "Failed to join \"events\" multicast group\n"); return err; } return 0; } $ gcc -I/usr/include/libnl3 -lnl-3 -lnl-genl-3 -o dm_repo dm.c Fixes: 9a8afc8d3962 ("Network Drop Monitor: Adding drop monitor implementation & Netlink protocol") Reported-by: "The UK's National Cyber Security Centre (NCSC)" Signed-off-by: Ido Schimmel Reviewed-by: Jacob Keller Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231206213102.1824398-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/net/genetlink.h | 2 ++ net/core/drop_monitor.c | 4 +++- net/netlink/genetlink.c | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index a8c9c8d1eb51..2d52776def52 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -11,10 +11,12 @@ /** * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family + * @cap_sys_admin: whether %CAP_SYS_ADMIN is required for binding */ struct genl_multicast_group { char name[GENL_NAMSIZ]; u8 flags; + u8 cap_sys_admin:1; }; struct genl_ops; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index e8e8389ddc96..feb946c954b6 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -180,7 +180,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) } static const struct genl_multicast_group dropmon_mcgrps[] = { - { .name = "events", }, + { .name = "events", .cap_sys_admin = 1 }, }; static void send_dm_alert(struct work_struct *work) @@ -1539,11 +1539,13 @@ static const struct genl_ops dropmon_ops[] = { .cmd = NET_DM_CMD_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_STOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_CONFIG_GET, diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 34e3c8eb5911..a03e16e06e29 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1012,6 +1012,9 @@ static int genl_bind(struct net *net, int group) if ((grp->flags & GENL_UNS_ADMIN_PERM) && !ns_capable(net->user_ns, CAP_NET_ADMIN)) ret = -EPERM; + if (grp->cap_sys_admin && + !ns_capable(net->user_ns, CAP_SYS_ADMIN)) + ret = -EPERM; break; } From dd9e851944aae32eff382777a4527d36922df64d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 18 Aug 2022 17:36:41 -0700 Subject: [PATCH 62/68] tools headers UAPI: Sync linux/perf_event.h with the kernel sources commit 65ba872a6971c11ceb342c3330f059289c0e6bdb upstream. To pick the trivial change in: 119a784c81270eb8 ("perf/core: Add a new read format to get a number of lost samples") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Ian Rogers Cc: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220819003644.508916-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/include/uapi/linux/perf_event.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index fabe5aeaa351..6f1bbf2ada4c 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -273,6 +273,7 @@ enum { * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } && !PERF_FORMAT_GROUP * * { u64 nr; @@ -280,6 +281,7 @@ enum { * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } cntr[nr]; * } && PERF_FORMAT_GROUP * }; @@ -289,8 +291,9 @@ enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, PERF_FORMAT_GROUP = 1U << 3, + PERF_FORMAT_LOST = 1U << 4, - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ From bdee8b2805b8245b61fce6c44885a93931d06fde Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 11 Dec 2023 15:52:30 +0100 Subject: [PATCH 63/68] Revert "btrfs: add dmesg output for first mount and last unmount of a filesystem" This reverts commit dd94ffab1b6d84b3ba9a8d09b6b0f44610d397eb which is commit 2db313205f8b96eea467691917138d646bb50aef upstream. As pointed out by many, the disk_super structure is NOT initialized before it is dereferenced in the function fs/btrfs/disk-io.c:open_ctree() that this commit adds, so something went wrong here. Revert it for now until it gets straightened out. Link: https://lore.kernel.org/r/5b0eb360-3765-40e1-854a-9da6d97eb405@roeck-us.net Link: https://lore.kernel.org/r/20231209172836.GA2154579@dev-arch.thelio-3990X Reported-by: Guenter Roeck Reported-by: Nathan Chancellor Cc: Anand Jain Cc: Qu Wenruo Cc: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 1 - fs/btrfs/super.c | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dda86994e0a4..b4ed11b5f148 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2829,7 +2829,6 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } - btrfs_info(fs_info, "first mount of filesystem %pU", disk_super->fsid); /* * Verify the type first, if that or the checksum value are * corrupted, we'll find out diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2804b055b4ed..ea8b5b2d859d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -291,10 +291,7 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, static void btrfs_put_super(struct super_block *sb) { - struct btrfs_fs_info *fs_info = btrfs_sb(sb); - - btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid); - close_ctree(fs_info); + close_ctree(btrfs_sb(sb)); } enum { From ab5813bb20711a8e3493b152e50b3dda3aed5858 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 4 Dec 2023 14:01:59 +0000 Subject: [PATCH 64/68] cifs: Fix non-availability of dedup breaking generic/304 [ Upstream commit 691a41d8da4b34fe72f09393505f55f28a8f34ec ] Deduplication isn't supported on cifs, but cifs doesn't reject it, instead treating it as extent duplication/cloning. This can cause generic/304 to go silly and run for hours on end. Fix cifs to indicate EOPNOTSUPP if REMAP_FILE_DEDUP is set in ->remap_file_range(). Note that it's unclear whether or not commit b073a08016a1 is meant to cause cifs to return an error if REMAP_FILE_DEDUP. Fixes: b073a08016a1 ("cifs: fix that return -EINVAL when do dedupe operation") Cc: stable@vger.kernel.org Suggested-by: Dave Chinner cc: Xiaoli Feng cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: Darrick Wong cc: fstests@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/3876191.1701555260@warthog.procyon.org.uk/ Signed-off-by: David Howells Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/cifsfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 917441e3018a..12f632287d16 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1079,7 +1079,9 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, unsigned int xid; int rc; - if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + if (remap_flags & REMAP_FILE_DEDUP) + return -EOPNOTSUPP; + if (remap_flags & ~REMAP_FILE_ADVISORY) return -EINVAL; cifs_dbg(FYI, "clone range\n"); From d99376b70247394b39307051f994060a085a417e Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 5 Dec 2023 21:49:29 -0300 Subject: [PATCH 65/68] smb: client: fix potential NULL deref in parse_dfs_referrals() [ Upstream commit 92414333eb375ed64f4ae92d34d579e826936480 ] If server returned no data for FSCTL_DFS_GET_REFERRALS, @dfs_rsp will remain NULL and then parse_dfs_referrals() will dereference it. Fix this by returning -EIO when no output data is returned. Besides, we can't fix it in SMB2_ioctl() as some FSCTLs are allowed to return no data as per MS-SMB2 2.2.32. Fixes: 9d49640a21bf ("CIFS: implement get_dfs_refer for SMB2+") Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index ad9b207432e1..5fea15a5f341 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2498,6 +2498,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, (char **)&dfs_rsp, &dfs_rsp_size); } while (rc == -EAGAIN); + if (!rc && !dfs_rsp) + rc = -EIO; if (rc) { if ((rc != -ENOENT) && (rc != -EOPNOTSUPP)) cifs_tcon_dbg(VFS, "ioctl error in %s rc=%d\n", __func__, rc); From c6a1282e530d6e34da292857ec4c230c3a289199 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Tue, 13 Sep 2022 18:20:24 +0530 Subject: [PATCH 66/68] devcoredump : Serialize devcd_del work [ Upstream commit 01daccf748323dfc61112f474cf2ba81015446b0 ] In following scenario(diagram), when one thread X running dev_coredumpm() adds devcd device to the framework which sends uevent notification to userspace and another thread Y reads this uevent and call to devcd_data_write() which eventually try to delete the queued timer that is not initialized/queued yet. So, debug object reports some warning and in the meantime, timer is initialized and queued from X path. and from Y path, it gets reinitialized again and timer->entry.pprev=NULL and try_to_grab_pending() stucks. To fix this, introduce mutex and a boolean flag to serialize the behaviour. cpu0(X) cpu1(Y) dev_coredump() uevent sent to user space device_add() ======================> user space process Y reads the uevents writes to devcd fd which results into writes to devcd_data_write() mod_delayed_work() try_to_grab_pending() del_timer() debug_assert_init() INIT_DELAYED_WORK() schedule_delayed_work() debug_object_fixup() timer_fixup_assert_init() timer_setup() do_init_timer() /* Above call reinitializes the timer to timer->entry.pprev=NULL and this will be checked later in timer_pending() call. */ timer_pending() !hlist_unhashed_lockless(&timer->entry) !h->pprev /* del_timer() checks h->pprev and finds it to be NULL due to which try_to_grab_pending() stucks. */ Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/ Signed-off-by: Mukesh Ojha Link: https://lore.kernel.org/r/1663073424-13663-1-git-send-email-quic_mojha@quicinc.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: af54d778a038 ("devcoredump: Send uevent once devcd is ready") Signed-off-by: Sasha Levin --- drivers/base/devcoredump.c | 83 +++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index e42d0b514384..ab2fbb43ccdb 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -29,6 +29,47 @@ struct devcd_entry { struct device devcd_dev; void *data; size_t datalen; + /* + * Here, mutex is required to serialize the calls to del_wk work between + * user/kernel space which happens when devcd is added with device_add() + * and that sends uevent to user space. User space reads the uevents, + * and calls to devcd_data_write() which try to modify the work which is + * not even initialized/queued from devcoredump. + * + * + * + * cpu0(X) cpu1(Y) + * + * dev_coredump() uevent sent to user space + * device_add() ======================> user space process Y reads the + * uevents writes to devcd fd + * which results into writes to + * + * devcd_data_write() + * mod_delayed_work() + * try_to_grab_pending() + * del_timer() + * debug_assert_init() + * INIT_DELAYED_WORK() + * schedule_delayed_work() + * + * + * Also, mutex alone would not be enough to avoid scheduling of + * del_wk work after it get flush from a call to devcd_free() + * mentioned as below. + * + * disabled_store() + * devcd_free() + * mutex_lock() devcd_data_write() + * flush_delayed_work() + * mutex_unlock() + * mutex_lock() + * mod_delayed_work() + * mutex_unlock() + * So, delete_work flag is required. + */ + struct mutex mutex; + bool delete_work; struct module *owner; ssize_t (*read)(char *buffer, loff_t offset, size_t count, void *data, size_t datalen); @@ -88,7 +129,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct devcd_entry *devcd = dev_to_devcd(dev); - mod_delayed_work(system_wq, &devcd->del_wk, 0); + mutex_lock(&devcd->mutex); + if (!devcd->delete_work) { + devcd->delete_work = true; + mod_delayed_work(system_wq, &devcd->del_wk, 0); + } + mutex_unlock(&devcd->mutex); return count; } @@ -116,7 +162,12 @@ static int devcd_free(struct device *dev, void *data) { struct devcd_entry *devcd = dev_to_devcd(dev); + mutex_lock(&devcd->mutex); + if (!devcd->delete_work) + devcd->delete_work = true; + flush_delayed_work(&devcd->del_wk); + mutex_unlock(&devcd->mutex); return 0; } @@ -126,6 +177,30 @@ static ssize_t disabled_show(struct class *class, struct class_attribute *attr, return sprintf(buf, "%d\n", devcd_disabled); } +/* + * + * disabled_store() worker() + * class_for_each_device(&devcd_class, + * NULL, NULL, devcd_free) + * ... + * ... + * while ((dev = class_dev_iter_next(&iter)) + * devcd_del() + * device_del() + * put_device() <- last reference + * error = fn(dev, data) devcd_dev_release() + * devcd_free(dev, data) kfree(devcd) + * mutex_lock(&devcd->mutex); + * + * + * In the above diagram, It looks like disabled_store() would be racing with parallely + * running devcd_del() and result in memory abort while acquiring devcd->mutex which + * is called after kfree of devcd memory after dropping its last reference with + * put_device(). However, this will not happens as fn(dev, data) runs + * with its own reference to device via klist_node so it is not its last reference. + * so, above situation would not occur. + */ + static ssize_t disabled_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) { @@ -282,13 +357,16 @@ void dev_coredumpm(struct device *dev, struct module *owner, devcd->read = read; devcd->free = free; devcd->failing_dev = get_device(dev); + devcd->delete_work = false; + mutex_init(&devcd->mutex); device_initialize(&devcd->devcd_dev); dev_set_name(&devcd->devcd_dev, "devcd%d", atomic_inc_return(&devcd_count)); devcd->devcd_dev.class = &devcd_class; + mutex_lock(&devcd->mutex); if (device_add(&devcd->devcd_dev)) goto put_device; @@ -302,10 +380,11 @@ void dev_coredumpm(struct device *dev, struct module *owner, INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); - + mutex_unlock(&devcd->mutex); return; put_device: put_device(&devcd->devcd_dev); + mutex_unlock(&devcd->mutex); put_module: module_put(owner); free: From 06bcac5c51519566d45f9e21b85d56457e12fcc2 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Fri, 17 Nov 2023 20:19:32 +0530 Subject: [PATCH 67/68] devcoredump: Send uevent once devcd is ready [ Upstream commit af54d778a03853801d681c98c0c2a6c316ef9ca7 ] dev_coredumpm() creates a devcoredump device and adds it to the core kernel framework which eventually end up sending uevent to the user space and later creates a symbolic link to the failed device. An application running in userspace may be interested in this symbolic link to get the name of the failed device. In a issue scenario, once uevent sent to the user space it start reading '/sys/class/devcoredump/devcdX/failing_device' to get the actual name of the device which might not been created and it is in its path of creation. To fix this, suppress sending uevent till the failing device symbolic link gets created and send uevent once symbolic link is created successfully. Fixes: 833c95456a70 ("device coredump: add new device coredump class") Signed-off-by: Mukesh Ojha Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1700232572-25823-1-git-send-email-quic_mojha@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/devcoredump.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index ab2fbb43ccdb..e9398bcffab6 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -367,6 +367,7 @@ void dev_coredumpm(struct device *dev, struct module *owner, devcd->devcd_dev.class = &devcd_class; mutex_lock(&devcd->mutex); + dev_set_uevent_suppress(&devcd->devcd_dev, true); if (device_add(&devcd->devcd_dev)) goto put_device; @@ -378,6 +379,8 @@ void dev_coredumpm(struct device *dev, struct module *owner, "devcoredump")) /* nothing - symlink will be missing */; + dev_set_uevent_suppress(&devcd->devcd_dev, false); + kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); mutex_unlock(&devcd->mutex); From 16e6e107a688046df37976fb6d7310e886c8115d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Dec 2023 18:18:18 +0100 Subject: [PATCH 68/68] Linux 5.4.264 Link: https://lore.kernel.org/r/20231211182015.049134368@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Harshit Mogalapalli Tested-by: Shuah Khan Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Tested-by: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3936ad84e1ba..eeb06cdcf64f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 263 +SUBLEVEL = 264 EXTRAVERSION = NAME = Kleptomaniac Octopus